Transformers
Safetensors
nlivathinos commited on
Commit
772af5e
·
verified ·
1 Parent(s): 4659a7d

Layout model based on RT-DETR-v2

Browse files

RT-DETRv2 trained on document datasets

model_artifacts/layout/config.json CHANGED
@@ -3,13 +3,31 @@
3
  "activation_function": "silu",
4
  "anchor_image_size": null,
5
  "architectures": [
6
- "RTDetrForObjectDetection"
7
  ],
8
  "attention_dropout": 0.0,
9
  "auxiliary_loss": true,
10
  "backbone": null,
11
  "backbone_config": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  "model_type": "rt_detr_resnet",
 
13
  "out_features": [
14
  "stage2",
15
  "stage3",
@@ -19,6 +37,13 @@
19
  2,
20
  3,
21
  4
 
 
 
 
 
 
 
22
  ]
23
  },
24
  "backbone_kwargs": null,
@@ -34,8 +59,10 @@
34
  256
35
  ],
36
  "decoder_layers": 6,
 
 
37
  "decoder_n_points": 4,
38
- "disable_custom_kernels": true,
39
  "dropout": 0.0,
40
  "encode_proj_layers": [
41
  2
@@ -62,47 +89,45 @@
62
  "freeze_backbone_batch_norms": true,
63
  "hidden_expansion": 1.0,
64
  "id2label": {
65
- "0": "background",
66
- "1": "Caption",
67
- "10": "Text",
68
- "11": "Title",
69
- "12": "Document Index",
70
- "13": "Code",
71
- "14": "Checkbox-Selected",
72
- "15": "Checkbox-Unselected",
73
- "16": "Form",
74
- "17": "Key-Value Region",
75
- "2": "Footnote",
76
- "3": "Formula",
77
- "4": "List-item",
78
- "5": "Page-footer",
79
- "6": "Page-header",
80
- "7": "Picture",
81
- "8": "Section-header",
82
- "9": "Table"
83
  },
84
  "initializer_bias_prior_prob": null,
85
  "initializer_range": 0.01,
86
  "is_encoder_decoder": true,
87
  "label2id": {
88
- "Caption": "1",
89
- "Checkbox-Selected": "14",
90
- "Checkbox-Unselected": "15",
91
- "Code": "13",
92
- "Document Index": "12",
93
- "Footnote": "2",
94
- "Form": "16",
95
- "Formula": "3",
96
- "Key-Value Region": "17",
97
- "List-item": "4",
98
- "Page-footer": "5",
99
- "Page-header": "6",
100
- "Picture": "7",
101
- "Section-header": "8",
102
- "Table": "9",
103
- "Text": "10",
104
- "Title": "11",
105
- "background": "0"
106
  },
107
  "label_noise_ratio": 0.5,
108
  "layer_norm_eps": 1e-05,
@@ -112,14 +137,14 @@
112
  "matcher_class_cost": 2.0,
113
  "matcher_gamma": 2.0,
114
  "matcher_giou_cost": 2.0,
115
- "model_type": "rt_detr",
116
  "normalize_before": false,
117
  "num_denoising": 100,
118
  "num_feature_levels": 3,
119
  "num_queries": 300,
120
  "positional_encoding_temperature": 10000,
121
  "torch_dtype": "float32",
122
- "transformers_version": "4.46.2",
123
  "use_focal_loss": true,
124
  "use_pretrained_backbone": false,
125
  "use_timm_backbone": false,
 
3
  "activation_function": "silu",
4
  "anchor_image_size": null,
5
  "architectures": [
6
+ "RTDetrV2ForObjectDetection"
7
  ],
8
  "attention_dropout": 0.0,
9
  "auxiliary_loss": true,
10
  "backbone": null,
11
  "backbone_config": {
12
+ "depths": [
13
+ 3,
14
+ 4,
15
+ 6,
16
+ 3
17
+ ],
18
+ "downsample_in_bottleneck": false,
19
+ "downsample_in_first_stage": false,
20
+ "embedding_size": 64,
21
+ "hidden_act": "relu",
22
+ "hidden_sizes": [
23
+ 256,
24
+ 512,
25
+ 1024,
26
+ 2048
27
+ ],
28
+ "layer_type": "bottleneck",
29
  "model_type": "rt_detr_resnet",
30
+ "num_channels": 3,
31
  "out_features": [
32
  "stage2",
33
  "stage3",
 
37
  2,
38
  3,
39
  4
40
+ ],
41
+ "stage_names": [
42
+ "stem",
43
+ "stage1",
44
+ "stage2",
45
+ "stage3",
46
+ "stage4"
47
  ]
48
  },
49
  "backbone_kwargs": null,
 
59
  256
60
  ],
61
  "decoder_layers": 6,
62
+ "decoder_method": "default",
63
+ "decoder_n_levels": 3,
64
  "decoder_n_points": 4,
65
+ "decoder_offset_scale": 0.5,
66
  "dropout": 0.0,
67
  "encode_proj_layers": [
68
  2
 
89
  "freeze_backbone_batch_norms": true,
90
  "hidden_expansion": 1.0,
91
  "id2label": {
92
+ "0": "Caption",
93
+ "1": "Footnote",
94
+ "2": "Formula",
95
+ "3": "List-item",
96
+ "4": "Page-footer",
97
+ "5": "Page-header",
98
+ "6": "Picture",
99
+ "7": "Section-header",
100
+ "8": "Table",
101
+ "9": "Text",
102
+ "10": "Title",
103
+ "11": "Document Index",
104
+ "12": "Code",
105
+ "13": "Checkbox-Selected",
106
+ "14": "Checkbox-Unselected",
107
+ "15": "Form",
108
+ "16": "Key-Value Region"
 
109
  },
110
  "initializer_bias_prior_prob": null,
111
  "initializer_range": 0.01,
112
  "is_encoder_decoder": true,
113
  "label2id": {
114
+ "Caption": 0,
115
+ "Checkbox-Selected": 13,
116
+ "Checkbox-Unselected": 14,
117
+ "Code": 12,
118
+ "Document Index": 11,
119
+ "Footnote": 1,
120
+ "Form": 15,
121
+ "Formula": 2,
122
+ "Key-Value Region": 16,
123
+ "List-item": 3,
124
+ "Page-footer": 4,
125
+ "Page-header": 5,
126
+ "Picture": 6,
127
+ "Section-header": 7,
128
+ "Table": 8,
129
+ "Text": 9,
130
+ "Title": 10
 
131
  },
132
  "label_noise_ratio": 0.5,
133
  "layer_norm_eps": 1e-05,
 
137
  "matcher_class_cost": 2.0,
138
  "matcher_gamma": 2.0,
139
  "matcher_giou_cost": 2.0,
140
+ "model_type": "rt_detr_v2",
141
  "normalize_before": false,
142
  "num_denoising": 100,
143
  "num_feature_levels": 3,
144
  "num_queries": 300,
145
  "positional_encoding_temperature": 10000,
146
  "torch_dtype": "float32",
147
+ "transformers_version": "4.51.3",
148
  "use_focal_loss": true,
149
  "use_pretrained_backbone": false,
150
  "use_timm_backbone": false,
model_artifacts/layout/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31e60b4709571b613bc8736a9c982fb550d8d7a1809160a68a8282af60c8910b
3
- size 171666216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b37f85ce9e044df9069471a4b0b764e1be93a44cb7adadfe37e496e928e676f5
3
+ size 171658996