esun-choi commited on
Commit
9b879f1
Β·
1 Parent(s): dd92162

Initial Commit

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. __pycache__/craft.cpython-310.pyc +0 -0
  2. __pycache__/craft_utils.cpython-310.pyc +0 -0
  3. __pycache__/file_utils.cpython-310.pyc +0 -0
  4. __pycache__/imgproc.cpython-310.pyc +0 -0
  5. __pycache__/mosaik.cpython-310.pyc +0 -0
  6. __pycache__/ner.cpython-310.pyc +0 -0
  7. __pycache__/recognize.cpython-310.pyc +0 -0
  8. __pycache__/refinenet.cpython-310.pyc +0 -0
  9. __pycache__/seg.cpython-310.pyc +0 -0
  10. basenet/__init__.py +0 -0
  11. basenet/__pycache__/__init__.cpython-310.pyc +0 -0
  12. basenet/__pycache__/vgg16_bn.cpython-310.pyc +0 -0
  13. basenet/vgg16_bn.py +72 -0
  14. craft.py +76 -0
  15. craft_utils.py +217 -0
  16. cropped2.png +0 -0
  17. dino/__pycache__/predict.cpython-310.pyc +0 -0
  18. dino/dino/__init__.py +1 -0
  19. dino/dino/__pycache__/__init__.cpython-310.pyc +0 -0
  20. dino/dino/__pycache__/model.cpython-310.pyc +0 -0
  21. dino/dino/__pycache__/modules.cpython-310.pyc +0 -0
  22. dino/dino/__pycache__/parts.cpython-310.pyc +0 -0
  23. dino/dino/model.py +47 -0
  24. dino/dino/parts.py +67 -0
  25. dino/predict.py +76 -0
  26. file_utils.py +77 -0
  27. imgproc.py +70 -0
  28. input/1.png +0 -0
  29. input/2.png +0 -0
  30. main.py +321 -0
  31. mosaik.py +29 -0
  32. ner.py +102 -0
  33. output/mosaiked.png +0 -0
  34. output/text_recongnize2.txt +40 -0
  35. readme.txt +3 -0
  36. recognize.py +14 -0
  37. refinenet.py +65 -0
  38. seg.py +58 -0
  39. sr/__pycache__/sr.cpython-310.pyc +0 -0
  40. sr/esrgan +1 -0
  41. sr/sr.py +15 -0
  42. temporal_mask/mask.png +0 -0
  43. text_area/new_1.png +0 -0
  44. text_area/new_10.png +0 -0
  45. text_area/new_11.png +0 -0
  46. text_area/new_12.png +0 -0
  47. text_area/new_13.png +0 -0
  48. text_area/new_14.png +0 -0
  49. text_area/new_15.png +0 -0
  50. text_area/new_16.png +0 -0
__pycache__/craft.cpython-310.pyc ADDED
Binary file (2.4 kB). View file
 
__pycache__/craft_utils.cpython-310.pyc ADDED
Binary file (5.7 kB). View file
 
__pycache__/file_utils.cpython-310.pyc ADDED
Binary file (2.51 kB). View file
 
__pycache__/imgproc.cpython-310.pyc ADDED
Binary file (2.1 kB). View file
 
__pycache__/mosaik.cpython-310.pyc ADDED
Binary file (718 Bytes). View file
 
__pycache__/ner.cpython-310.pyc ADDED
Binary file (914 Bytes). View file
 
__pycache__/recognize.cpython-310.pyc ADDED
Binary file (736 Bytes). View file
 
__pycache__/refinenet.cpython-310.pyc ADDED
Binary file (1.95 kB). View file
 
__pycache__/seg.cpython-310.pyc ADDED
Binary file (1.53 kB). View file
 
basenet/__init__.py ADDED
File without changes
basenet/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (164 Bytes). View file
 
basenet/__pycache__/vgg16_bn.cpython-310.pyc ADDED
Binary file (2.27 kB). View file
 
basenet/vgg16_bn.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import namedtuple
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.nn.init as init
6
+ from torchvision import models
7
+
8
+ def init_weights(modules):
9
+ for m in modules:
10
+ if isinstance(m, nn.Conv2d):
11
+ init.xavier_uniform_(m.weight.data)
12
+ if m.bias is not None:
13
+ m.bias.data.zero_()
14
+ elif isinstance(m, nn.BatchNorm2d):
15
+ m.weight.data.fill_(1)
16
+ m.bias.data.zero_()
17
+ elif isinstance(m, nn.Linear):
18
+ m.weight.data.normal_(0, 0.01)
19
+ m.bias.data.zero_()
20
+
21
+ class vgg16_bn(torch.nn.Module):
22
+ def __init__(self, pretrained=True, freeze=True):
23
+ super(vgg16_bn, self).__init__()
24
+
25
+ vgg_pretrained_features = models.vgg16_bn(pretrained=pretrained).features
26
+ self.slice1 = torch.nn.Sequential()
27
+ self.slice2 = torch.nn.Sequential()
28
+ self.slice3 = torch.nn.Sequential()
29
+ self.slice4 = torch.nn.Sequential()
30
+ self.slice5 = torch.nn.Sequential()
31
+ for x in range(12): # conv2_2
32
+ self.slice1.add_module(str(x), vgg_pretrained_features[x])
33
+ for x in range(12, 19): # conv3_3
34
+ self.slice2.add_module(str(x), vgg_pretrained_features[x])
35
+ for x in range(19, 29): # conv4_3
36
+ self.slice3.add_module(str(x), vgg_pretrained_features[x])
37
+ for x in range(29, 39): # conv5_3
38
+ self.slice4.add_module(str(x), vgg_pretrained_features[x])
39
+
40
+ # fc6, fc7 without atrous conv
41
+ self.slice5 = torch.nn.Sequential(
42
+ nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
43
+ nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6),
44
+ nn.Conv2d(1024, 1024, kernel_size=1)
45
+ )
46
+
47
+ if not pretrained:
48
+ init_weights(self.slice1.modules())
49
+ init_weights(self.slice2.modules())
50
+ init_weights(self.slice3.modules())
51
+ init_weights(self.slice4.modules())
52
+
53
+ init_weights(self.slice5.modules()) # no pretrained model for fc6 and fc7
54
+
55
+ if freeze:
56
+ for param in self.slice1.parameters(): # only first conv
57
+ param.requires_grad= False
58
+
59
+ def forward(self, X):
60
+ h = self.slice1(X)
61
+ h_relu2_2 = h
62
+ h = self.slice2(h)
63
+ h_relu3_2 = h
64
+ h = self.slice3(h)
65
+ h_relu4_3 = h
66
+ h = self.slice4(h)
67
+ h_relu5_3 = h
68
+ h = self.slice5(h)
69
+ h_fc7 = h
70
+ vgg_outputs = namedtuple("VggOutputs", ['fc7', 'relu5_3', 'relu4_3', 'relu3_2', 'relu2_2'])
71
+ out = vgg_outputs(h_fc7, h_relu5_3, h_relu4_3, h_relu3_2, h_relu2_2)
72
+ return out
craft.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+
6
+ from basenet.vgg16_bn import vgg16_bn, init_weights
7
+
8
+ class double_conv(nn.Module):
9
+ def __init__(self, in_ch, mid_ch, out_ch):
10
+ super(double_conv, self).__init__()
11
+ self.conv = nn.Sequential(
12
+ nn.Conv2d(in_ch + mid_ch, mid_ch, kernel_size=1),
13
+ nn.BatchNorm2d(mid_ch),
14
+ nn.ReLU(inplace=True),
15
+ nn.Conv2d(mid_ch, out_ch, kernel_size=3, padding=1),
16
+ nn.BatchNorm2d(out_ch),
17
+ nn.ReLU(inplace=True)
18
+ )
19
+
20
+ def forward(self, x):
21
+ x = self.conv(x)
22
+ return x
23
+
24
+
25
+ class CRAFT(nn.Module):
26
+ def __init__(self, pretrained=False, freeze=False):
27
+ super(CRAFT, self).__init__()
28
+
29
+ """ Base network """
30
+ self.basenet = vgg16_bn(pretrained, freeze)
31
+
32
+ """ U network """
33
+ self.upconv1 = double_conv(1024, 512, 256)
34
+ self.upconv2 = double_conv(512, 256, 128)
35
+ self.upconv3 = double_conv(256, 128, 64)
36
+ self.upconv4 = double_conv(128, 64, 32)
37
+
38
+ num_class = 2
39
+ self.conv_cls = nn.Sequential(
40
+ nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.ReLU(inplace=True),
41
+ nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.ReLU(inplace=True),
42
+ nn.Conv2d(32, 16, kernel_size=3, padding=1), nn.ReLU(inplace=True),
43
+ nn.Conv2d(16, 16, kernel_size=1), nn.ReLU(inplace=True),
44
+ nn.Conv2d(16, num_class, kernel_size=1),
45
+ )
46
+
47
+ init_weights(self.upconv1.modules())
48
+ init_weights(self.upconv2.modules())
49
+ init_weights(self.upconv3.modules())
50
+ init_weights(self.upconv4.modules())
51
+ init_weights(self.conv_cls.modules())
52
+
53
+ def forward(self, x):
54
+ """ Base network """
55
+ sources = self.basenet(x)
56
+
57
+ """ U network """
58
+ y = torch.cat([sources[0], sources[1]], dim=1)
59
+ y = self.upconv1(y)
60
+
61
+ y = F.interpolate(y, size=sources[2].size()[2:], mode='bilinear', align_corners=False)
62
+ y = torch.cat([y, sources[2]], dim=1)
63
+ y = self.upconv2(y)
64
+
65
+ y = F.interpolate(y, size=sources[3].size()[2:], mode='bilinear', align_corners=False)
66
+ y = torch.cat([y, sources[3]], dim=1)
67
+ y = self.upconv3(y)
68
+
69
+ y = F.interpolate(y, size=sources[4].size()[2:], mode='bilinear', align_corners=False)
70
+ y = torch.cat([y, sources[4]], dim=1)
71
+ feature = self.upconv4(y)
72
+
73
+ y = self.conv_cls(feature)
74
+
75
+ return y.permute(0,2,3,1), feature
76
+
craft_utils.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ import numpy as np
4
+ import cv2
5
+ import math
6
+
7
+ def warpCoord(Minv, pt):
8
+ out = np.matmul(Minv, (pt[0], pt[1], 1))
9
+ return np.array([out[0]/out[2], out[1]/out[2]])
10
+
11
+
12
+ def getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text):
13
+ linkmap = linkmap.copy()
14
+ textmap = textmap.copy()
15
+ img_h, img_w = textmap.shape
16
+
17
+ ret, text_score = cv2.threshold(textmap, low_text, 1, 0)
18
+ ret, link_score = cv2.threshold(linkmap, link_threshold, 1, 0)
19
+
20
+ text_score_comb = np.clip(text_score + link_score, 0, 1)
21
+ nLabels, labels, stats, centroids = cv2.connectedComponentsWithStats(text_score_comb.astype(np.uint8), connectivity=4)
22
+
23
+ det = []
24
+ mapper = []
25
+ for k in range(1,nLabels):
26
+ size = stats[k, cv2.CC_STAT_AREA]
27
+ if size < 10: continue
28
+
29
+ if np.max(textmap[labels==k]) < text_threshold: continue
30
+
31
+ segmap = np.zeros(textmap.shape, dtype=np.uint8)
32
+ segmap[labels==k] = 255
33
+ segmap[np.logical_and(link_score==1, text_score==0)] = 0
34
+ x, y = stats[k, cv2.CC_STAT_LEFT], stats[k, cv2.CC_STAT_TOP]
35
+ w, h = stats[k, cv2.CC_STAT_WIDTH], stats[k, cv2.CC_STAT_HEIGHT]
36
+ niter = int(math.sqrt(size * min(w, h) / (w * h)) * 2)
37
+ sx, ex, sy, ey = x - niter, x + w + niter + 1, y - niter, y + h + niter + 1
38
+ if sx < 0 : sx = 0
39
+ if sy < 0 : sy = 0
40
+ if ex >= img_w: ex = img_w
41
+ if ey >= img_h: ey = img_h
42
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(1 + niter, 1 + niter))
43
+ segmap[sy:ey, sx:ex] = cv2.dilate(segmap[sy:ey, sx:ex], kernel)
44
+
45
+ np_contours = np.roll(np.array(np.where(segmap!=0)),1,axis=0).transpose().reshape(-1,2)
46
+ rectangle = cv2.minAreaRect(np_contours)
47
+ box = cv2.boxPoints(rectangle)
48
+
49
+ w, h = np.linalg.norm(box[0] - box[1]), np.linalg.norm(box[1] - box[2])
50
+ box_ratio = max(w, h) / (min(w, h) + 1e-5)
51
+ if abs(1 - box_ratio) <= 0.1:
52
+ l, r = min(np_contours[:,0]), max(np_contours[:,0])
53
+ t, b = min(np_contours[:,1]), max(np_contours[:,1])
54
+ box = np.array([[l, t], [r, t], [r, b], [l, b]], dtype=np.float32)
55
+
56
+ startidx = box.sum(axis=1).argmin()
57
+ box = np.roll(box, 4-startidx, 0)
58
+ box = np.array(box)
59
+
60
+ det.append(box)
61
+ mapper.append(k)
62
+
63
+ return det, labels, mapper
64
+
65
+ def getPoly_core(boxes, labels, mapper, linkmap):
66
+ num_cp = 5
67
+ max_len_ratio = 0.7
68
+ expand_ratio = 1.45
69
+ max_r = 2.0
70
+ step_r = 0.2
71
+
72
+ polys = []
73
+ for k, box in enumerate(boxes):
74
+ w, h = int(np.linalg.norm(box[0] - box[1]) + 1), int(np.linalg.norm(box[1] - box[2]) + 1)
75
+ if w < 10 or h < 10:
76
+ polys.append(None); continue
77
+
78
+ tar = np.float32([[0,0],[w,0],[w,h],[0,h]])
79
+ M = cv2.getPerspectiveTransform(box, tar)
80
+ word_label = cv2.warpPerspective(labels, M, (w, h), flags=cv2.INTER_NEAREST)
81
+ try:
82
+ Minv = np.linalg.inv(M)
83
+ except:
84
+ polys.append(None); continue
85
+
86
+ cur_label = mapper[k]
87
+ word_label[word_label != cur_label] = 0
88
+ word_label[word_label > 0] = 1
89
+
90
+ cp = []
91
+ max_len = -1
92
+ for i in range(w):
93
+ region = np.where(word_label[:,i] != 0)[0]
94
+ if len(region) < 2 : continue
95
+ cp.append((i, region[0], region[-1]))
96
+ length = region[-1] - region[0] + 1
97
+ if length > max_len: max_len = length
98
+
99
+ if h * max_len_ratio < max_len:
100
+ polys.append(None); continue
101
+
102
+ tot_seg = num_cp * 2 + 1
103
+ seg_w = w / tot_seg
104
+ pp = [None] * num_cp
105
+ cp_section = [[0, 0]] * tot_seg
106
+ seg_height = [0] * num_cp
107
+ seg_num = 0
108
+ num_sec = 0
109
+ prev_h = -1
110
+ for i in range(0,len(cp)):
111
+ (x, sy, ey) = cp[i]
112
+ if (seg_num + 1) * seg_w <= x and seg_num <= tot_seg:
113
+ # average previous segment
114
+ if num_sec == 0: break
115
+ cp_section[seg_num] = [cp_section[seg_num][0] / num_sec, cp_section[seg_num][1] / num_sec]
116
+ num_sec = 0
117
+
118
+ # reset variables
119
+ seg_num += 1
120
+ prev_h = -1
121
+
122
+ # accumulate center points
123
+ cy = (sy + ey) * 0.5
124
+ cur_h = ey - sy + 1
125
+ cp_section[seg_num] = [cp_section[seg_num][0] + x, cp_section[seg_num][1] + cy]
126
+ num_sec += 1
127
+
128
+ if seg_num % 2 == 0: continue # No polygon area
129
+
130
+ if prev_h < cur_h:
131
+ pp[int((seg_num - 1)/2)] = (x, cy)
132
+ seg_height[int((seg_num - 1)/2)] = cur_h
133
+ prev_h = cur_h
134
+
135
+ # processing last segment
136
+ if num_sec != 0:
137
+ cp_section[-1] = [cp_section[-1][0] / num_sec, cp_section[-1][1] / num_sec]
138
+
139
+ # pass if num of pivots is not sufficient or segment widh is smaller than character height
140
+ if None in pp or seg_w < np.max(seg_height) * 0.25:
141
+ polys.append(None); continue
142
+
143
+ # calc median maximum of pivot points
144
+ half_char_h = np.median(seg_height) * expand_ratio / 2
145
+
146
+ # calc gradiant and apply to make horizontal pivots
147
+ new_pp = []
148
+ for i, (x, cy) in enumerate(pp):
149
+ dx = cp_section[i * 2 + 2][0] - cp_section[i * 2][0]
150
+ dy = cp_section[i * 2 + 2][1] - cp_section[i * 2][1]
151
+ if dx == 0: # gradient if zero
152
+ new_pp.append([x, cy - half_char_h, x, cy + half_char_h])
153
+ continue
154
+ rad = - math.atan2(dy, dx)
155
+ c, s = half_char_h * math.cos(rad), half_char_h * math.sin(rad)
156
+ new_pp.append([x - s, cy - c, x + s, cy + c])
157
+
158
+ # get edge points to cover character heatmaps
159
+ isSppFound, isEppFound = False, False
160
+ grad_s = (pp[1][1] - pp[0][1]) / (pp[1][0] - pp[0][0]) + (pp[2][1] - pp[1][1]) / (pp[2][0] - pp[1][0])
161
+ grad_e = (pp[-2][1] - pp[-1][1]) / (pp[-2][0] - pp[-1][0]) + (pp[-3][1] - pp[-2][1]) / (pp[-3][0] - pp[-2][0])
162
+ for r in np.arange(0.5, max_r, step_r):
163
+ dx = 2 * half_char_h * r
164
+ if not isSppFound:
165
+ line_img = np.zeros(word_label.shape, dtype=np.uint8)
166
+ dy = grad_s * dx
167
+ p = np.array(new_pp[0]) - np.array([dx, dy, dx, dy])
168
+ cv2.line(line_img, (int(p[0]), int(p[1])), (int(p[2]), int(p[3])), 1, thickness=1)
169
+ if np.sum(np.logical_and(word_label, line_img)) == 0 or r + 2 * step_r >= max_r:
170
+ spp = p
171
+ isSppFound = True
172
+ if not isEppFound:
173
+ line_img = np.zeros(word_label.shape, dtype=np.uint8)
174
+ dy = grad_e * dx
175
+ p = np.array(new_pp[-1]) + np.array([dx, dy, dx, dy])
176
+ cv2.line(line_img, (int(p[0]), int(p[1])), (int(p[2]), int(p[3])), 1, thickness=1)
177
+ if np.sum(np.logical_and(word_label, line_img)) == 0 or r + 2 * step_r >= max_r:
178
+ epp = p
179
+ isEppFound = True
180
+ if isSppFound and isEppFound:
181
+ break
182
+
183
+ if not (isSppFound and isEppFound):
184
+ polys.append(None); continue
185
+
186
+ poly = []
187
+ poly.append(warpCoord(Minv, (spp[0], spp[1])))
188
+ for p in new_pp:
189
+ poly.append(warpCoord(Minv, (p[0], p[1])))
190
+ poly.append(warpCoord(Minv, (epp[0], epp[1])))
191
+ poly.append(warpCoord(Minv, (epp[2], epp[3])))
192
+ for p in reversed(new_pp):
193
+ poly.append(warpCoord(Minv, (p[2], p[3])))
194
+ poly.append(warpCoord(Minv, (spp[2], spp[3])))
195
+
196
+ # add to final result
197
+ polys.append(np.array(poly))
198
+
199
+ return polys
200
+
201
+ def getDetBoxes(textmap, linkmap, text_threshold, link_threshold, low_text, poly=False):
202
+ boxes, labels, mapper = getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text)
203
+
204
+ if poly:
205
+ polys = getPoly_core(boxes, labels, mapper, linkmap)
206
+ else:
207
+ polys = [None] * len(boxes)
208
+
209
+ return boxes, polys
210
+
211
+ def adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net = 2):
212
+ if len(polys) > 0:
213
+ polys = np.array(polys)
214
+ for k in range(len(polys)):
215
+ if polys[k] is not None:
216
+ polys[k] *= (ratio_w * ratio_net, ratio_h * ratio_net)
217
+ return polys
cropped2.png ADDED
dino/__pycache__/predict.cpython-310.pyc ADDED
Binary file (1.89 kB). View file
 
dino/dino/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .model import Dinov2
dino/dino/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (202 Bytes). View file
 
dino/dino/__pycache__/model.cpython-310.pyc ADDED
Binary file (1.63 kB). View file
 
dino/dino/__pycache__/modules.cpython-310.pyc ADDED
Binary file (2.58 kB). View file
 
dino/dino/__pycache__/parts.cpython-310.pyc ADDED
Binary file (2.58 kB). View file
 
dino/dino/model.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from .parts import *
3
+
4
+
5
+ class Dinov2(nn.Module):
6
+ def __init__(self, n_channels, n_classes, bilinear=False):
7
+ super(Dinov2, self).__init__()
8
+ self.n_channels = n_channels
9
+ self.n_classes = n_classes
10
+ self.bilinear = bilinear
11
+
12
+ self.inc = (DoubleConv(n_channels, 64))
13
+ self.down1 = (Down(64, 128))
14
+ self.down2 = (Down(128, 256))
15
+ self.down3 = (Down(256, 512))
16
+ factor = 2 if bilinear else 1
17
+ self.down4 = (Down(512, 1024 // factor))
18
+ self.up1 = (Up(1024, 512 // factor, bilinear))
19
+ self.up2 = (Up(512, 256 // factor, bilinear))
20
+ self.up3 = (Up(256, 128 // factor, bilinear))
21
+ self.up4 = (Up(128, 64, bilinear))
22
+ self.outc = (OutConv(64, n_classes))
23
+
24
+ def forward(self, x):
25
+ x1 = self.inc(x)
26
+ x2 = self.down1(x1)
27
+ x3 = self.down2(x2)
28
+ x4 = self.down3(x3)
29
+ x5 = self.down4(x4)
30
+ x = self.up1(x5, x4)
31
+ x = self.up2(x, x3)
32
+ x = self.up3(x, x2)
33
+ x = self.up4(x, x1)
34
+ logits = self.outc(x)
35
+ return logits
36
+
37
+ def use_checkpointing(self):
38
+ self.inc = torch.utils.checkpoint(self.inc)
39
+ self.down1 = torch.utils.checkpoint(self.down1)
40
+ self.down2 = torch.utils.checkpoint(self.down2)
41
+ self.down3 = torch.utils.checkpoint(self.down3)
42
+ self.down4 = torch.utils.checkpoint(self.down4)
43
+ self.up1 = torch.utils.checkpoint(self.up1)
44
+ self.up2 = torch.utils.checkpoint(self.up2)
45
+ self.up3 = torch.utils.checkpoint(self.up3)
46
+ self.up4 = torch.utils.checkpoint(self.up4)
47
+ self.outc = torch.utils.checkpoint(self.outc)
dino/dino/parts.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+
5
+
6
+ class DoubleConv(nn.Module):
7
+
8
+ def __init__(self, in_channels, out_channels, mid_channels=None):
9
+ super().__init__()
10
+ if not mid_channels:
11
+ mid_channels = out_channels
12
+ self.double_conv = nn.Sequential(
13
+ nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
14
+ nn.BatchNorm2d(mid_channels),
15
+ nn.ReLU(inplace=True),
16
+ nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
17
+ nn.BatchNorm2d(out_channels),
18
+ nn.ReLU(inplace=True)
19
+ )
20
+
21
+ def forward(self, x):
22
+ return self.double_conv(x)
23
+
24
+
25
+ class Down(nn.Module):
26
+
27
+ def __init__(self, in_channels, out_channels):
28
+ super().__init__()
29
+ self.maxpool_conv = nn.Sequential(
30
+ nn.MaxPool2d(2),
31
+ DoubleConv(in_channels, out_channels)
32
+ )
33
+
34
+ def forward(self, x):
35
+ return self.maxpool_conv(x)
36
+
37
+
38
+ class Up(nn.Module):
39
+
40
+ def __init__(self, in_channels, out_channels, bilinear=True):
41
+ super().__init__()
42
+
43
+ if bilinear:
44
+ self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
45
+ self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
46
+ else:
47
+ self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
48
+ self.conv = DoubleConv(in_channels, out_channels)
49
+
50
+ def forward(self, x1, x2):
51
+ x1 = self.up(x1)
52
+ diffY = x2.size()[2] - x1.size()[2]
53
+ diffX = x2.size()[3] - x1.size()[3]
54
+
55
+ x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
56
+ diffY // 2, diffY - diffY // 2])
57
+ x = torch.cat([x2, x1], dim=1)
58
+ return self.conv(x)
59
+
60
+
61
+ class OutConv(nn.Module):
62
+ def __init__(self, in_channels, out_channels):
63
+ super(OutConv, self).__init__()
64
+ self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
65
+
66
+ def forward(self, x):
67
+ return self.conv(x)
dino/predict.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import numpy as np
3
+ import torch
4
+ import torch.nn.functional as F
5
+ from PIL import Image
6
+
7
+
8
+ def preprocess(mask_values, pil_img, scale, is_mask):
9
+ pil_img=Image.fromarray(pil_img)
10
+ w, h = pil_img.size
11
+ newW, newH = int(scale * w), int(scale * h)
12
+ pil_img = pil_img.resize((newW, newH))
13
+ img = np.asarray(pil_img)
14
+
15
+ if is_mask:
16
+ mask = np.zeros((newH, newW), dtype=np.int64)
17
+ for i, v in enumerate(mask_values):
18
+ if img.ndim == 2:
19
+ mask[img == v] = i
20
+ else:
21
+ mask[(img == v).all(-1)] = i
22
+
23
+ return mask
24
+
25
+ else:
26
+ if img.ndim == 2:
27
+ img = img[np.newaxis, ...]
28
+ else:
29
+ img = img.transpose((2, 0, 1))
30
+
31
+ if (img > 1).any():
32
+ img = img / 255.0
33
+
34
+ return img
35
+ def predict_img(net,
36
+ full_img,
37
+ device,
38
+ scale_factor=1,
39
+ out_threshold=0.5):
40
+ net.eval()
41
+ img = torch.from_numpy(preprocess(None, full_img, scale_factor, is_mask=False))
42
+ img = img.unsqueeze(0)
43
+ img = img.to(device=device, dtype=torch.float32)
44
+
45
+ with torch.no_grad():
46
+ output = net(img).cpu()
47
+
48
+ if net.n_classes > 1:
49
+ mask = output.argmax(dim=1)
50
+ else:
51
+ mask = torch.sigmoid(output) > out_threshold
52
+
53
+ return mask[0].long().squeeze().numpy()
54
+
55
+
56
+
57
+
58
+
59
+
60
+ def mask_to_image(mask: np.ndarray, mask_values):
61
+ if isinstance(mask_values[0], list):
62
+ out = np.zeros((mask.shape[-2], mask.shape[-1], len(mask_values[0])), dtype=np.uint8)
63
+ elif mask_values == [0, 1]:
64
+ out = np.zeros((mask.shape[-2], mask.shape[-1]), dtype=bool)
65
+ else:
66
+ out = np.zeros((mask.shape[-2], mask.shape[-1]), dtype=np.uint8)
67
+
68
+ if mask.ndim == 3:
69
+ mask = np.argmax(mask, axis=0)
70
+
71
+ for i, v in enumerate(mask_values):
72
+ out[mask == i] = v
73
+
74
+ return Image.fromarray(out)
75
+
76
+
file_utils.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ import os
3
+ import numpy as np
4
+ import cv2
5
+ import imgproc
6
+ from mosaik import mosaik
7
+ # borrowed from https://github.com/lengstrom/fast-style-transfer/blob/master/src/utils.py
8
+ def get_files(img_dir):
9
+ imgs, masks, xmls = list_files(img_dir)
10
+ return imgs, masks, xmls
11
+
12
+ def list_files(in_path):
13
+ img_files = []
14
+ mask_files = []
15
+ gt_files = []
16
+ for (dirpath, dirnames, filenames) in os.walk(in_path):
17
+ for file in filenames:
18
+ filename, ext = os.path.splitext(file)
19
+ ext = str.lower(ext)
20
+ if ext == '.jpg' or ext == '.jpeg' or ext == '.gif' or ext == '.png' or ext == '.pgm':
21
+ img_files.append(os.path.join(dirpath, file))
22
+ elif ext == '.bmp':
23
+ mask_files.append(os.path.join(dirpath, file))
24
+ elif ext == '.xml' or ext == '.gt' or ext == '.txt':
25
+ gt_files.append(os.path.join(dirpath, file))
26
+ elif ext == '.zip':
27
+ continue
28
+ # img_files.sort()
29
+ # mask_files.sort()
30
+ # gt_files.sort()
31
+ return img_files, mask_files, gt_files
32
+
33
+ def saveResult(img_file, img, boxes, dirname='./result/', verticals=None, texts=None):
34
+ """ save text detection result one by one
35
+ Args:
36
+ img_file (str): image file name
37
+ img (array): raw image context
38
+ boxes (array): array of result file
39
+ Shape: [num_detections, 4] for BB output / [num_detections, 4] for QUAD output
40
+ Return:
41
+ None
42
+ """
43
+ img = np.array(img)
44
+
45
+ # make result file list
46
+ filename, file_ext = os.path.splitext(os.path.basename(img_file))
47
+
48
+ # result directory
49
+ res_file = dirname + "res_" + filename + '.txt'
50
+ res_img_file = dirname + "res_" + filename + '.jpg'
51
+
52
+ if not os.path.isdir(dirname):
53
+ os.mkdir(dirname)
54
+
55
+ with open(res_file, 'w') as f:
56
+ for i, box in enumerate(boxes):
57
+ poly = np.array(box).astype(np.int32).reshape((-1))
58
+ strResult = ','.join([str(p) for p in poly]) + '\r\n'
59
+ f.write(strResult)
60
+
61
+ poly = poly.reshape(-1, 2)
62
+ cv2.polylines(img, [poly.reshape((-1, 1, 2))], True, color=(0, 0, 255), thickness=2)
63
+ ptColor = (0, 255, 255)
64
+ if verticals is not None:
65
+ if verticals[i]:
66
+ ptColor = (255, 0, 0)
67
+
68
+ if texts is not None:
69
+ font = cv2.FONT_HERSHEY_SIMPLEX
70
+ font_scale = 0.5
71
+ cv2.putText(img, "{}".format(texts[i]), (poly[0][0]+1, poly[0][1]+1), font, font_scale, (0, 0, 0), thickness=1)
72
+ cv2.putText(img, "{}".format(texts[i]), tuple(poly[0]), font, font_scale, (0, 255, 255), thickness=1)
73
+
74
+ # Save result image
75
+ cv2.imwrite(res_img_file, img)
76
+ return img
77
+
imgproc.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Copyright (c) 2019-present NAVER Corp.
3
+ MIT License
4
+ """
5
+
6
+ # -*- coding: utf-8 -*-
7
+ import numpy as np
8
+ from skimage import io
9
+ import cv2
10
+
11
+ def loadImage(img_file):
12
+ img = io.imread(img_file) # RGB order
13
+ if img.shape[0] == 2: img = img[0]
14
+ if len(img.shape) == 2 : img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
15
+ if img.shape[2] == 4: img = img[:,:,:3]
16
+ img = np.array(img)
17
+
18
+ return img
19
+
20
+ def normalizeMeanVariance(in_img, mean=(0.485, 0.456, 0.406), variance=(0.229, 0.224, 0.225)):
21
+ # should be RGB order
22
+ img = in_img.copy().astype(np.float32)
23
+
24
+ img -= np.array([mean[0] * 255.0, mean[1] * 255.0, mean[2] * 255.0], dtype=np.float32)
25
+ img /= np.array([variance[0] * 255.0, variance[1] * 255.0, variance[2] * 255.0], dtype=np.float32)
26
+ return img
27
+
28
+ def denormalizeMeanVariance(in_img, mean=(0.485, 0.456, 0.406), variance=(0.229, 0.224, 0.225)):
29
+ # should be RGB order
30
+ img = in_img.copy()
31
+ img *= variance
32
+ img += mean
33
+ img *= 255.0
34
+ img = np.clip(img, 0, 255).astype(np.uint8)
35
+ return img
36
+
37
+ def resize_aspect_ratio(img, square_size, interpolation, mag_ratio=1):
38
+ height, width, channel = img.shape
39
+
40
+ # magnify image size
41
+ target_size = mag_ratio * max(height, width)
42
+
43
+ # set original image size
44
+ if target_size > square_size:
45
+ target_size = square_size
46
+
47
+ ratio = target_size / max(height, width)
48
+
49
+ target_h, target_w = int(height * ratio), int(width * ratio)
50
+ proc = cv2.resize(img, (target_w, target_h), interpolation = interpolation)
51
+
52
+
53
+ # make canvas and paste image
54
+ target_h32, target_w32 = target_h, target_w
55
+ if target_h % 32 != 0:
56
+ target_h32 = target_h + (32 - target_h % 32)
57
+ if target_w % 32 != 0:
58
+ target_w32 = target_w + (32 - target_w % 32)
59
+ resized = np.zeros((target_h32, target_w32, channel), dtype=np.float32)
60
+ resized[0:target_h, 0:target_w, :] = proc
61
+ target_h, target_w = target_h32, target_w32
62
+
63
+ size_heatmap = (int(target_w/2), int(target_h/2))
64
+
65
+ return resized, ratio, size_heatmap
66
+
67
+ def cvt2HeatmapImg(img):
68
+ img = (np.clip(img, 0, 1) * 255).astype(np.uint8)
69
+ img = cv2.applyColorMap(img, cv2.COLORMAP_JET)
70
+ return img
input/1.png ADDED
input/2.png ADDED
main.py ADDED
@@ -0,0 +1,321 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from recognize import recongize
2
+ from ner import ner
3
+ import os
4
+ import time
5
+ import argparse
6
+ from sr.sr import sr
7
+ import torch
8
+ from scipy.ndimage import gaussian_filter
9
+ from PIL import Image
10
+ import numpy as np
11
+ import torch.nn as nn
12
+ import torch.backends.cudnn as cudnn
13
+ from torch.autograd import Variable
14
+ from mosaik import mosaik
15
+ from PIL import Image
16
+ import cv2
17
+ from skimage import io
18
+ import numpy as np
19
+ import craft_utils
20
+ import imgproc
21
+ import file_utils
22
+ from seg import segmentation,mask_percentage
23
+ from craft import CRAFT
24
+ from collections import OrderedDict
25
+ import gradio as gr
26
+ from refinenet import RefineNet
27
+
28
+
29
+ # craft, refine λͺ¨λΈ λΆˆλŸ¬μ˜€λŠ” μ½”λ“œ
30
+ def copyStateDict(state_dict):
31
+ if list(state_dict.keys())[0].startswith("module"):
32
+ start_idx = 1
33
+ else:
34
+ start_idx = 0
35
+ new_state_dict = OrderedDict()
36
+ for k, v in state_dict.items():
37
+ name = ".".join(k.split(".")[start_idx:])
38
+ new_state_dict[name] = v
39
+ return new_state_dict
40
+
41
+ def str2bool(v):
42
+ return v.lower() in ("yes", "y", "true", "t", "1")
43
+
44
+ parser = argparse.ArgumentParser(description='CRAFT Text Detection')
45
+ parser.add_argument('--trained_model', default='weights/craft_mlt_25k.pth', type=str, help='μ‚¬μ „ν•™μŠ΅ craft λͺ¨λΈ')
46
+ parser.add_argument('--text_threshold', default=0.7, type=float, help='text confidence threshold')
47
+ parser.add_argument('--low_text', default=0.4, type=float, help='text low-bound score')
48
+ parser.add_argument('--link_threshold', default=0.4, type=float, help='link confidence threshold')
49
+ parser.add_argument('--cuda', default=True, type=str2bool, help='Use cuda for inference')
50
+ parser.add_argument('--canvas_size', default=1280, type=int, help='image size for inference')
51
+ parser.add_argument('--mag_ratio', default=1.5, type=float, help='image magnification ratio')
52
+ parser.add_argument('--poly', default=False, action='store_true', help='enable polygon type')
53
+ parser.add_argument('--show_time', default=False, action='store_true', help='show processing time')
54
+ parser.add_argument('--test_folder', default='data/', type=str, help='folder path to input images')
55
+ parser.add_argument('--refine', default=True, help='enable link refiner')
56
+ parser.add_argument('--image_path', default="input/1.png", help='input image')
57
+ parser.add_argument('--refiner_model', default='weights/craft_refiner_CTW1500.pth', type=str, help='pretrained refiner model')
58
+ parser.add_argument('--result_path', default="result.png", help='result image')
59
+ parser.add_argument('--percent', default=25, type=int,help='percent of invoice in full image frame')
60
+ args = parser.parse_args()
61
+
62
+
63
+ image_list, _, _ = file_utils.get_files(args.test_folder)
64
+
65
+
66
+ def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly, refine_net=None):
67
+ t0 = time.time()
68
+
69
+ img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio)
70
+ ratio_h = ratio_w = 1 / target_ratio
71
+
72
+ x = imgproc.normalizeMeanVariance(img_resized)
73
+ x = torch.from_numpy(x).permute(2, 0, 1)
74
+ x = Variable(x.unsqueeze(0))
75
+ if cuda:
76
+ x = x.cuda()
77
+
78
+ with torch.no_grad():
79
+ y, feature = net(x)
80
+
81
+ score_text = y[0,:,:,0].cpu().data.numpy()
82
+ score_link = y[0,:,:,1].cpu().data.numpy()
83
+
84
+ if refine_net is not None:
85
+ with torch.no_grad():
86
+ y_refiner = refine_net(y, feature)
87
+ score_link = y_refiner[0,:,:,0].cpu().data.numpy()
88
+
89
+ t0 = time.time() - t0
90
+ t1 = time.time()
91
+
92
+ boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly)
93
+
94
+ boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
95
+ polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
96
+ for k in range(len(polys)):
97
+ if polys[k] is None: polys[k] = boxes[k]
98
+
99
+ t1 = time.time() - t1
100
+
101
+ # render results (optional)
102
+ render_img = score_text.copy()
103
+ render_img = np.hstack((render_img, score_link))
104
+ ret_score_text = imgproc.cvt2HeatmapImg(render_img)
105
+
106
+
107
+ return boxes, polys, ret_score_text
108
+
109
+ def text_detect(image,net,refine_net):
110
+
111
+ bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, refine_net)
112
+
113
+
114
+ return bboxes
115
+
116
+
117
+ def get_box_from_refer(reference_boxes):
118
+
119
+ real_boxes=[]
120
+ for box in reference_boxes:
121
+
122
+ real_boxes.append(box//2)
123
+
124
+ return real_boxes
125
+ def get_min_max(box):
126
+ xlist=[]
127
+ ylist=[]
128
+ for coor in box:
129
+ xlist.append(coor[0])
130
+ ylist.append(coor[1])
131
+ return min(xlist),max(xlist),min(ylist),max(ylist)
132
+
133
+ def main(image_path0):
134
+ # 1단계
135
+
136
+ # ==> craft λͺ¨λΈκ³Ό refinenet λͺ¨λΈμ„ 뢈러였고 cuda device 에 μ–Ήνž™λ‹ˆλ‹€.
137
+
138
+ net = CRAFT()
139
+ if args.cuda:
140
+ net.load_state_dict(copyStateDict(torch.load(args.trained_model)))
141
+
142
+ if args.cuda:
143
+ net = net.cuda()
144
+ cudnn.benchmark = False
145
+
146
+ net.eval()
147
+
148
+ refine_net = None
149
+ if args.refine:
150
+ refine_net = RefineNet()
151
+ if args.cuda:
152
+ refine_net.load_state_dict(copyStateDict(torch.load(args.refiner_model)))
153
+ refine_net = refine_net.cuda()
154
+
155
+
156
+ refine_net.eval()
157
+ args.poly = True
158
+
159
+ # 2단계
160
+
161
+ # gradio λΉˆμΉΈμ— 이미지λ₯Ό λ„£κ³  A 에 μž…λ ₯λ©λ‹ˆλ‹€.
162
+
163
+ A=image_path0
164
+ image_list=[]
165
+ image_list.append(A)
166
+ for k, image_path in enumerate(image_list):
167
+
168
+
169
+
170
+ image = imgproc.loadImage(image_path)
171
+ if image.shape[2]>3:
172
+ image=image[:,:,0:3]
173
+
174
+ original_image=image
175
+ # μ΄λ―Έμ§€μ—μ„œ 솑μž₯λΆ€λΆ„λ§Œ dinov2 λͺ¨λΈλ‘œ segmentation 을 ν•©λ‹ˆλ‹€.
176
+
177
+ image2=segmentation(image)
178
+ image3=Image.fromarray(image2)
179
+ image3.save("temporal_mask/mask.png")
180
+ # λ§ˆμŠ€ν¬μ΄λ―Έμ§€(white pixel, black background)λ₯Ό λ§Œλ“­λ‹ˆλ‹€.
181
+ # μœ„ 마슀크 μ΄λ―Έμ§€μ—μ„œ 각 덩어리듀(솑μž₯으둜 μΆ”μ •)이 전체 μ΄λ―Έμ§€λ‚΄μ—μ„œ λͺ‡ν”„λ‘œμ°¨μ§€ν•˜λŠ”μ§€ κ³„μ‚°ν•©λ‹ˆλ‹€.
182
+ contours_list,percentage_list=mask_percentage("temporal_mask/mask.png")
183
+ normal_image_list=[]
184
+
185
+ small_coordinate_list=[]
186
+ original_coordinate_list=[]
187
+
188
+
189
+ #3단계
190
+
191
+
192
+ # percentage list의 경우 솑μž₯으둜 μΆ”μ •λ˜λŠ” λ­‰μΉ˜λ“€μ˜ νΌμ„ΌνŠΈλ₯Ό λͺ¨μ•„놓은것이고
193
+ # contours listλŠ” μ΄λ―Έμ§€λ‚΄μ—μ„œ 솑μž₯으둜 μΆ”μ •λ˜λŠ” λ­‰μΉ˜λ“€μ΄ ν¬λ‘­λ˜μ–΄μ„œ μ •λ ¬λœ λ¦¬μŠ€νŠΈμž…λ‹ˆλ‹€.
194
+ # 예 : percentatge list 의 첫번째 μš”μ†ŒλŠ” contours list의 첫번째 μš”μ†Œμ˜ percentage
195
+
196
+ for index,percentage in enumerate(percentage_list):
197
+
198
+ if 5<percentage:
199
+
200
+ # percentage κ°€ μ•„λ―Έμ§€λ‚΄μ—μ„œ 5ν”„λ‘œ λ„˜λŠ” 것듀은 normal list둜 ν¬ν•¨λ©λ‹ˆλ‹€.
201
+ # normal listμ•ˆμ—λŠ” μ΄λ―Έμ§€λ‚΄μ—μ„œ μΆ©λΆ„νžˆ 큰 λ­‰μΉ˜λ“€(솑μž₯으둜 μΆ”μ •) 을 λͺ¨μ•„λ†“μ•˜μŠ΅λ‹ˆλ‹€.
202
+ # 5ν”„λ‘œ λ―Έλ§ŒμΈκ²ƒλ“€μ€ small coordinate list에 ν¬ν•¨λ˜κ³  맀우 μž‘μ€ λ­‰μΉ˜λ‘œ κ°„μ£Όν•©λ‹ˆλ‹€.
203
+ # λ§€μš°μž‘μ€ λ­‰μΉ˜μ˜ 경우 zoom in을 ν–ˆμ„λ•Œ λ­‰μΉ˜(솑μž₯으둜 μΆ”μ •)λ‚΄ κΈ€μžκ°€ 거의 λ³΄μ΄μ§€μ•Šμ•„μ„œ λ”°λΌμ„œ λ­‰μΉ˜ 전체λ₯Ό mosaikν•©λ‹ˆλ‹€.
204
+
205
+
206
+ contour=contours_list[index]
207
+
208
+ x_list=[]
209
+ y_list=[]
210
+ contour2=list(contour)
211
+
212
+ for r in contour2:
213
+ r2=r[0]
214
+ x_list.append(r2[0])
215
+ y_list.append(r2[1])
216
+ x_min=min(x_list)
217
+ y_min=min(y_list)
218
+ x_max=max(x_list)
219
+ y_max=max(y_list)
220
+ original_coordinate_list.append([y_min,y_max,x_min,x_max])
221
+ image2=original_image[y_min:y_max,x_min:x_max,:]
222
+ normal_image_list.append(image2)
223
+
224
+
225
+ #
226
+ else:
227
+ contour=contours_list[index]
228
+
229
+ x_list=[]
230
+ y_list=[]
231
+ contour2=list(contour)
232
+
233
+ for r in contour2:
234
+ r2=r[0]
235
+ x_list.append(r2[0])
236
+ y_list.append(r2[1])
237
+ x_min=min(x_list)
238
+ y_min=min(y_list)
239
+ x_max=max(x_list)
240
+ y_max=max(y_list)
241
+ small_coordinate_list.append([y_min,y_max,x_min,x_max]) #솑μž₯ 5ν”„λ‘œλ―Έλ§Œμ˜ μ’Œν‘œ
242
+
243
+
244
+ # 4단계 (λ§€μš°μž‘μ€ 솑μž₯)
245
+
246
+ # small coordinate listμ•ˆμ— λ§€μš°μž‘μ€ 솑μž₯듀이 λͺ¨μ—¬μ Έμžˆμ§€λ§Œ listμ•ˆμ— μš”μ†Œκ°€ μ—†μœΌλ©΄ 5λ‹¨κ³„λ‘œ λ°”λ‘œκ°‘λ‹ˆλ‹€.
247
+ # λ°”λ‘œ κ°€μ§€μ•Šμ„κ²½μš°(list μ•ˆμš”μ†Œ μ΅œμ†Œν•˜λ‚˜) mosaik λ₯Ό ν†΅ν•΄μ„œ μ „μ²΄μ΄λ―Έμ§€μ—μ„œ μž‘μ€ λ­‰μΉ˜μ— ν•΄λ‹Ήν•˜λŠ” μ’Œν‘œλ“€μ„ λͺ¨λ‘ λͺ¨μžμ΄ν¬ν•©λ‹ˆλ‹€.
248
+
249
+ if len(small_coordinate_list)>0:
250
+ original_image=mosaik(original_image,small_coordinate_list)
251
+ else:
252
+ pass
253
+
254
+ # 5단계 (μ–΄λŠμ •λ„ μ‚¬μ΄μ¦ˆ μžˆλŠ” 솑μž₯)
255
+
256
+ # normal image listμ•ˆμ— μ μ ˆν•œ 크기의 솑μž₯(쀌 ν•˜λ©΄ κΈ€μž λ³΄μ΄λŠ”) 듀이 μžˆμŠ΅λ‹ˆλ‹€.
257
+ # craft μž…μž₯μ—μ„œ text μœ„μΉ˜λ₯Ό return ν• μˆ˜ μžˆκ²Œλ” 크둭된 솑μž₯을 esrgan 으둜 ν™”μ§ˆκ°œμ„ ν•©λ‹ˆλ‹€.
258
+ # ν™”μ§ˆκ°œμ„ λœ 솑μž₯을 craft에 λ„£μ–΄μ„œ μ •ν™•ν•˜κ²Œ text μ’Œν‘œλ“€μ„ λͺ¨λ‘ κ΅¬ν•©λ‹ˆλ‹€.
259
+ # μ’Œν‘œλ₯Ό κ΅¬ν• λ•Œ ν™”μ§ˆ 쒋은 솑μž₯μ΄λ―Έμ§€μ˜ μ’Œν‘œλ₯Ό κ·ΈλŒ€λ‘œ return ν•˜μ§€ μ•Šκ³  원본 솑μž₯이미지에 λ§žμΆ”μ–΄μ„œ scale(//2) ν•˜κ³  μ΅œμ’…μ’Œν‘œλ₯Ό κ΅¬ν•©λ‹ˆλ‹€.
260
+
261
+ for index,normal_image in enumerate(normal_image_list):
262
+ reference_image=sr(normal_image)
263
+ reference_boxes=text_detect(reference_image,net=net,refine_net=refine_net)
264
+ boxes=get_box_from_refer(reference_boxes)
265
+ for index2,box in enumerate(boxes):
266
+ xmin,xmax,ymin,ymax=get_min_max(box)
267
+
268
+ text_area=normal_image[int(ymin):int(ymax),int(xmin):int(xmax),:]
269
+ text_area=Image.fromarray(text_area)
270
+ os.makedirs("text_area",exist_ok=True)
271
+ text_area.save(f"text_area/new_{index2+1}.png")
272
+
273
+
274
+ # 6단계 (text recognize, ner)
275
+
276
+ # μœ„ μ’Œν‘œλ“€μ„ ν†΅ν•΄μ„œ 솑μž₯ λ‚΄μ—μ„œ λ°•μŠ€λ“€μ„ ν¬λ‘­ν•©λ‹ˆλ‹€.
277
+ # 크둭된 솑μž₯λ‚΄ λΆ€λΆ„(크둭된 λ°•μŠ€ , 즉 text μžˆλŠ”κ³³μœΌλ‘œ μΆ”μ •λ˜λŠ”κ³³) 을 trocr μ—λ„£μŠ΅λ‹ˆλ‹€.
278
+ # trocr은 μƒμžλ‚΄μ— μΆ”μ •λ˜λŠ” textλ₯Ό λ³΄μ—¬μ€λ‹ˆλ‹€.
279
+ # textλ₯Ό ko electra μ—λ„£μ–΄μ„œ ν•΄λ‹Ή μƒμžμ—μžˆλŠ” textκ°€ κ°œμΈμ •λ³΄μΈμ§€μ•„λ‹Œμ§€ νŒλ³„ν•©λ‹ˆλ‹€.
280
+ # 솑μž₯λ‚΄ ν•΄λ‹Ή μƒμžκ°€ κ°œμΈμ •λ³΄λ‘œ(λ ˆμ΄λΈ” :1) μΆ”μ •λ κ²½μš° λͺ¨μžμ΄ν¬λ₯Όν•©λ‹ˆλ‹€.
281
+ # λͺ¨μžμ΄ν¬λΌκ³  νŒλ³„ν• κ²½μš° ν•΄λ‹Ήμƒμžμ˜ μ’Œν‘œλ₯Ό 솑μž₯이미지에 λ§žλŠ” μ’Œν‘œλ‘œ λ³€ν™˜ν•˜κ³  κ·Έ μ’Œν‘œμ— ν•΄λ‹Ήν•˜λŠ” 뢀뢄을 λͺ¨μžμ΄ν¬ν•©λ‹ˆλ‹€.
282
+ # λΆ€λΆ„μ μœΌλ‘œ λͺ¨μžμ΄ν¬λœ 솑μž₯이미지λ₯Ό 전체이미지(솑μž₯을 ν¬ν•¨ν•˜λŠ” 이미지)에 λΆ™μž…λ‹ˆλ‹€.
283
+
284
+ text=recongize(text_area)
285
+ label=ner(text)
286
+ with open("output/text_recongnize2.txt","a") as recognized:
287
+ recognized.writelines(str(index2+1))
288
+ recognized.writelines(" ")
289
+ recognized.writelines(str(text))
290
+ recognized.writelines(" ")
291
+ recognized.writelines(str(label))
292
+ recognized.writelines("\n")
293
+ recognized.close()
294
+ print("done")
295
+ if label==1:
296
+ A=normal_image[int(ymin):int(ymax),int(xmin):int(xmax),:]
297
+ normal_image[int(ymin):int(ymax),int(xmin):int(xmax),:] = gaussian_filter(A, sigma=16)
298
+
299
+ else:
300
+ pass
301
+ a,b,c,d=original_coordinate_list[index]
302
+ original_image[a:b,c:d,:]=normal_image
303
+ original_image=Image.fromarray(original_image)
304
+ original_image.save("output/mosaiked.png")
305
+ print("masked complete")
306
+ return original_image
307
+
308
+
309
+ # if __name__ == '__main__':
310
+
311
+
312
+
313
+ # iface = gr.Interface(
314
+ # fn=main,
315
+ # inputs=gr.Image(type="filepath", label="Invoice Image"),
316
+ # outputs=gr.Image(type="pil", label="Masked Invoice Image"),
317
+ # live=True
318
+ # )
319
+
320
+ # iface.launch()
321
+
mosaik.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from PIL import Image
3
+ from scipy.ndimage.filters import gaussian_filter
4
+ import cv2
5
+ import numpy as np
6
+ from scipy.ndimage import gaussian_filter
7
+ import matplotlib.pyplot as plt
8
+ from matplotlib.patches import Polygon
9
+
10
+ def mosaik(img,bboxes):
11
+ for box in bboxes:
12
+ #[y_min,y_max,x_min,x_max]) #
13
+
14
+ cropped=img[box[0]:box[1],box[2]:box[3],:]
15
+
16
+
17
+ cropped=np.array(cropped)
18
+ cropped = gaussian_filter(cropped, sigma=16)
19
+ img[box[0]:box[1],box[2]:box[3],:]=cropped
20
+
21
+
22
+ return img
23
+
24
+
25
+
26
+
27
+
28
+
29
+
ner.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForTokenClassification
2
+ from transformers import pipeline
3
+ from collections import defaultdict
4
+ import torch
5
+ #device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
6
+ device = torch.device("cuda")
7
+ tokenizer = AutoTokenizer.from_pretrained("Leo97/KoELECTRA-small-v3-modu-ner")
8
+ model = AutoModelForTokenClassification.from_pretrained("Leo97/KoELECTRA-small-v3-modu-ner")
9
+ model.to(device)
10
+
11
+
12
+ def check_entity(entities):
13
+ for entity_info in entities:
14
+ entity_value = entity_info.get('entity', '').upper()
15
+ if 'LC' in entity_value or 'PS' in entity_value:
16
+ return 1
17
+ return 0
18
+ def ner(example):
19
+ ner = pipeline("ner", model=model, tokenizer=tokenizer,device=device)
20
+ ner_results = ner(example)
21
+ ner_results=check_entity(ner_results)
22
+ return ner_results
23
+
24
+
25
+
26
+ # ν•˜λ‚˜
27
+ # def find_longest_value_key(input_dict):
28
+ # max_length = 0
29
+ # max_length_keys = []
30
+
31
+ # for key, value in input_dict.items():
32
+ # current_length = len(value)
33
+ # if current_length > max_length:
34
+ # max_length = current_length
35
+ # max_length_keys = [key]
36
+ # elif current_length == max_length:
37
+ # max_length_keys.append(key)
38
+
39
+ # if len(max_length_keys) == 1:
40
+ # return 0
41
+ # else:
42
+ # return 1
43
+
44
+
45
+
46
+ # def find_longest_value_key2(input_dict):
47
+ # if not input_dict:
48
+ # return None
49
+
50
+ # max_key = max(input_dict, key=lambda k: len(input_dict[k]))
51
+ # return max_key
52
+
53
+
54
+ # def find_most_frequent_entity(entities):
55
+ # entity_counts = defaultdict(list)
56
+
57
+ # for item in entities:
58
+ # split_entity = item['entity'].split('-')
59
+
60
+ # entity_type = split_entity[1]
61
+ # entity_counts[entity_type].append(item['score'])
62
+ # number=find_longest_value_key(entity_counts)
63
+ # if number==1:
64
+ # max_entities = []
65
+ # max_score_average = -1
66
+
67
+ # for entity, scores in entity_counts.items():
68
+ # score_average = sum(scores) / len(scores)
69
+
70
+ # if score_average > max_score_average:
71
+ # max_entities = [entity]
72
+ # max_score_average = score_average
73
+ # elif score_average == max_score_average:
74
+ # max_entities.append(entity)
75
+ # if len(max_entities)>0:
76
+ # return max_entities if len(max_entities) > 1 else max_entities[0]
77
+ # else:
78
+ # return "Do not mosaik"
79
+ # else:
80
+ # A=find_longest_value_key2(entity_counts)
81
+
82
+ # return A
83
+
84
+
85
+
86
+
87
+ # ν•˜λ‚˜λΌλ„ ps λ‚˜ lc κ°€ 있으면 λ°”λ‘œ ps , lc κΊΌλ‚΄κΈ°
88
+
89
+
90
+ # label=filtering(ner_results)
91
+ # if label.find("PS")>-1 or label.find("LC")>-1:
92
+ # return 1
93
+ # else:
94
+ # return 0
95
+ #print(ner("홍길동"))
96
+
97
+
98
+
99
+
100
+ #label=check_label(example)
101
+
102
+
output/mosaiked.png ADDED
output/text_recongnize2.txt ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1 6259 - 9409 - 7040 main. 0
2
+ 1 6259 - 9409 - 7040 main. 0
3
+ 2 3H10 - 4 0
4
+ 2 3H10 - 4 0
5
+ 3 0
6
+ 3 0
7
+ 4 0
8
+ 4 0
9
+ 5 032 - 990 - / 일0110 - 750 - - 0
10
+ 5 032 - 990 - / 일0110 - 750 - - 0
11
+ 6 " 0
12
+ 6 " 0
13
+ 7 μ •κ°€λ„κ΄‘μ‹œλ‹Ήμ•½κ΅¬λŒμ—°λ™1 - 1λ²ˆμ§€ 1
14
+ 7 μ •κ°€λ„κ΄‘μ‹œλ‹Ήμ•½κ΅¬λŒμ—°λ™1 - 1λ²ˆμ§€ 1
15
+ 8 덕은1 - 10 1
16
+ 8 덕은1 - 10 1
17
+ 9 5, 000 0
18
+ 9 5, 000 0
19
+ 10 딷써 - 00ν•„μ†Œμ„Έ1 0
20
+ 10 딷써 - 00ν•„μ†Œμ„Έ1 0
21
+ 11 λͺ…ν™” 0
22
+ 11 λͺ…ν™” 0
23
+ 12 Iμœ¨ν™”μ‹œλ•…κ΅¬ν˜„κ΅¬ν™©μ „ν™104 - 1011λ²ˆμ‹œ 1
24
+ 12 Iμœ¨ν™”μ‹œλ•…κ΅¬ν˜„κ΅¬ν™©μ „ν™104 - 1011λ²ˆμ‹œ 1
25
+ 13 은린면화 : 624. 8μ² κ³Όμž₯일ꡰ림 : 산뢀싀전리 1
26
+ 13 은린면화 : 624. 8μ² κ³Όμž₯일ꡰ림 : 산뢀싀전리 1
27
+ 14 - 650 / 010 / 0101 - 2021 0
28
+ 14 - 650 / 010 / 0101 - 2021 0
29
+ 15 μ ‘μš°μž…μž : 2019. 12. 02 0
30
+ 15 μ ‘μš°μž…μž : 2019. 12. 02 0
31
+ 16 838 / - 252 / SIN / SI - 255 - 2 / 0
32
+ 16 838 / - 252 / SIN / SI - 255 - 2 / 0
33
+ 17 곡리λ‹₯μœ‘λ³€μ—½ 1획 1ν˜„μ§€ ( ν™˜κ΄€μ‹€ν™”λ‹Ήμ˜ 0
34
+ 17 곡리λ‹₯μœ‘λ³€μ—½ 1획 1ν˜„μ§€ ( ν™˜κ΄€μ‹€ν™”λ‹Ήμ˜ 0
35
+ 18 λŒ€ν’€λ“±μΌ 0
36
+ 18 λŒ€ν’€λ“±μΌ 0
37
+ 19 λ²Όλ₯Έ ( 용쑰인영기격50원 ) 0
38
+ 19 λ²Όλ₯Έ ( 용쑰인영기격50원 ) 0
39
+ 20 κΈ°μ–‘ν™” 0
40
+ 20 κΈ°μ–‘ν™” 0
readme.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 1. λ³Έ μ½”λ“œλŠ” cuda device 기반으둜 μ„€κ³„λ˜μ—ˆμŠ΅λ‹ˆλ‹€.
2
+ 2. μš°λ¦¬κ°€ λ§Œλ“  파일 : main.py,mosaik.py,ner.py,recognize.py,seg.py, sr.py
3
+ 3.
recognize.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel, AutoTokenizer
2
+ import unicodedata
3
+
4
+
5
+ def recongize(img):
6
+ processor = TrOCRProcessor.from_pretrained("trocr_weight")
7
+ model = VisionEncoderDecoderModel.from_pretrained("trocr_weight")
8
+ tokenizer = AutoTokenizer.from_pretrained("trocr_weight")
9
+
10
+ pixel_values = processor(img, return_tensors="pt").pixel_values
11
+ generated_ids = model.generate(pixel_values, max_length=64)
12
+ generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
13
+ generated_text = unicodedata.normalize("NFC", generated_text)
14
+ return generated_text
refinenet.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Copyright (c) 2019-present NAVER Corp.
3
+ MIT License
4
+ """
5
+
6
+ # -*- coding: utf-8 -*-
7
+ import torch
8
+ import torch.nn as nn
9
+ import torch.nn.functional as F
10
+ from torch.autograd import Variable
11
+ from basenet.vgg16_bn import init_weights
12
+
13
+
14
+ class RefineNet(nn.Module):
15
+ def __init__(self):
16
+ super(RefineNet, self).__init__()
17
+
18
+ self.last_conv = nn.Sequential(
19
+ nn.Conv2d(34, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True),
20
+ nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True),
21
+ nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True)
22
+ )
23
+
24
+ self.aspp1 = nn.Sequential(
25
+ nn.Conv2d(64, 128, kernel_size=3, dilation=6, padding=6), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
26
+ nn.Conv2d(128, 128, kernel_size=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
27
+ nn.Conv2d(128, 1, kernel_size=1)
28
+ )
29
+
30
+ self.aspp2 = nn.Sequential(
31
+ nn.Conv2d(64, 128, kernel_size=3, dilation=12, padding=12), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
32
+ nn.Conv2d(128, 128, kernel_size=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
33
+ nn.Conv2d(128, 1, kernel_size=1)
34
+ )
35
+
36
+ self.aspp3 = nn.Sequential(
37
+ nn.Conv2d(64, 128, kernel_size=3, dilation=18, padding=18), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
38
+ nn.Conv2d(128, 128, kernel_size=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
39
+ nn.Conv2d(128, 1, kernel_size=1)
40
+ )
41
+
42
+ self.aspp4 = nn.Sequential(
43
+ nn.Conv2d(64, 128, kernel_size=3, dilation=24, padding=24), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
44
+ nn.Conv2d(128, 128, kernel_size=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
45
+ nn.Conv2d(128, 1, kernel_size=1)
46
+ )
47
+
48
+ init_weights(self.last_conv.modules())
49
+ init_weights(self.aspp1.modules())
50
+ init_weights(self.aspp2.modules())
51
+ init_weights(self.aspp3.modules())
52
+ init_weights(self.aspp4.modules())
53
+
54
+ def forward(self, y, upconv4):
55
+ refine = torch.cat([y.permute(0,3,1,2), upconv4], dim=1)
56
+ refine = self.last_conv(refine)
57
+
58
+ aspp1 = self.aspp1(refine)
59
+ aspp2 = self.aspp2(refine)
60
+ aspp3 = self.aspp3(refine)
61
+ aspp4 = self.aspp4(refine)
62
+
63
+ #out = torch.add([aspp1, aspp2, aspp3, aspp4], dim=1)
64
+ out = aspp1 + aspp2 + aspp3 + aspp4
65
+ return out.permute(0, 2, 3, 1) # , refine.permute(0,2,3,1)
seg.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dino.predict import predict_img,mask_to_image
2
+ import torch
3
+ from PIL import Image
4
+ import numpy as np
5
+ import cv2
6
+
7
+ # 솑μž₯뢀뢄을 crop ν•˜κΈ° μœ„ν•œ μ½”λ“œ
8
+ # dinov2 λͺ¨λΈμ„ segmentation λͺ¨λΈλ‘œ ν™œμš©ν•˜μ˜€κ³  이λ₯Ό train ν•˜μ—¬μ„œ pretrained weightλ₯Ό weight 폴더에 μ €μž₯ν•˜κ³  μ•„λž˜μ™€ 같이 loadν•©λ‹ˆλ‹€.
9
+
10
+ def segmentation(img):
11
+ device=torch.device("cuda")
12
+ net=torch.load("weights/dinov2_model.pth")
13
+
14
+ mask_values=[[0, 0, 0], [255, 255, 255]]
15
+
16
+ mask=predict_img(net,img,device,scale_factor=1,out_threshold=0.5)
17
+ result = mask_to_image(mask, mask_values)
18
+ result=np.array(result)
19
+
20
+
21
+ return result
22
+
23
+
24
+ # def calculate_white_area_percentage(mask):
25
+ # total_pixels = mask.size
26
+ # print(total_pixels)
27
+
28
+ # white_pixels = np.sum(np.all(mask >100, axis=-1))
29
+
30
+ # white_area_percentage = (white_pixels / total_pixels) * 100
31
+
32
+ # return white_area_percentage*3
33
+
34
+
35
+ # μœ„ segmentation 을 ν†΅ν•΄μ„œ crop 된 뢀뢄이 μ΄λ―Έμ§€λ‚΄μ—μ„œ λͺ‡ν”„λ‘œ μ°¨μ§€ν•˜λŠ”μ§€ κ³„μ‚°ν•©λ‹ˆλ‹€.
36
+ # μ•„λž˜ μ½”λ“œλŠ” ν•˜μ–€μƒ‰ 픽셀이 μ—°μ†μ μœΌλ‘œ μ΄μ–΄μ Έμ„œ λ§Œλ“€μ–΄μ§„ 덩어리가 μ „μ²΄μ—μ„œ λͺ‡ν”„λ‘œ μ°¨μ§€ν•˜λŠ”μ§€ κ³„μ‚°ν•©λ‹ˆλ‹€.
37
+ # μ•„λž˜ μ½”λ“œλŠ” 덩어리(솑μž₯으둜 μΆ”μ •) 듀이 2개 이상이어도 μ μš©ν• μˆ˜ μžˆμŠ΅λ‹ˆλ‹€.
38
+ def mask_percentage(mask_path):
39
+
40
+ image = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
41
+
42
+ ret, threshold = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
43
+
44
+ contours, hierarchy = cv2.findContours(threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
45
+ total_area = image.shape[0] * image.shape[1]
46
+ contours_list=contours
47
+
48
+ contour_areas = [cv2.contourArea(contour) for contour in contours]
49
+
50
+
51
+ percentages = [(area / total_area) * 100 for area in contour_areas]
52
+ percentage_list=[]
53
+ for i, percentage in enumerate(percentages):
54
+ percentage_list.append(percentage)
55
+ return contours_list,percentage_list
56
+
57
+
58
+
sr/__pycache__/sr.cpython-310.pyc ADDED
Binary file (606 Bytes). View file
 
sr/esrgan ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit 362a0316878f41dbdfbb23657b450c3353de5acf
sr/sr.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from PIL import Image
3
+ import numpy as np
4
+ from .esrgan.RealESRGAN import RealESRGAN
5
+ def sr(img):
6
+ # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
7
+ device=torch.device("cuda")
8
+ model = RealESRGAN(device, scale=2)
9
+ model.load_weights('weights/RealESRGAN_x2.pth', download=True)
10
+
11
+
12
+ img=Image.fromarray(img)
13
+ sr_image = model.predict(img)
14
+ sr_image=np.array(sr_image)
15
+ return sr_image
temporal_mask/mask.png ADDED
text_area/new_1.png ADDED
text_area/new_10.png ADDED
text_area/new_11.png ADDED
text_area/new_12.png ADDED
text_area/new_13.png ADDED
text_area/new_14.png ADDED
text_area/new_15.png ADDED
text_area/new_16.png ADDED