Spaces:
Build error
Build error
Initial Commit
Browse filesThis view is limited to 50 files because it contains too many changes. Β
See raw diff
- __pycache__/craft.cpython-310.pyc +0 -0
- __pycache__/craft_utils.cpython-310.pyc +0 -0
- __pycache__/file_utils.cpython-310.pyc +0 -0
- __pycache__/imgproc.cpython-310.pyc +0 -0
- __pycache__/mosaik.cpython-310.pyc +0 -0
- __pycache__/ner.cpython-310.pyc +0 -0
- __pycache__/recognize.cpython-310.pyc +0 -0
- __pycache__/refinenet.cpython-310.pyc +0 -0
- __pycache__/seg.cpython-310.pyc +0 -0
- basenet/__init__.py +0 -0
- basenet/__pycache__/__init__.cpython-310.pyc +0 -0
- basenet/__pycache__/vgg16_bn.cpython-310.pyc +0 -0
- basenet/vgg16_bn.py +72 -0
- craft.py +76 -0
- craft_utils.py +217 -0
- cropped2.png +0 -0
- dino/__pycache__/predict.cpython-310.pyc +0 -0
- dino/dino/__init__.py +1 -0
- dino/dino/__pycache__/__init__.cpython-310.pyc +0 -0
- dino/dino/__pycache__/model.cpython-310.pyc +0 -0
- dino/dino/__pycache__/modules.cpython-310.pyc +0 -0
- dino/dino/__pycache__/parts.cpython-310.pyc +0 -0
- dino/dino/model.py +47 -0
- dino/dino/parts.py +67 -0
- dino/predict.py +76 -0
- file_utils.py +77 -0
- imgproc.py +70 -0
- input/1.png +0 -0
- input/2.png +0 -0
- main.py +321 -0
- mosaik.py +29 -0
- ner.py +102 -0
- output/mosaiked.png +0 -0
- output/text_recongnize2.txt +40 -0
- readme.txt +3 -0
- recognize.py +14 -0
- refinenet.py +65 -0
- seg.py +58 -0
- sr/__pycache__/sr.cpython-310.pyc +0 -0
- sr/esrgan +1 -0
- sr/sr.py +15 -0
- temporal_mask/mask.png +0 -0
- text_area/new_1.png +0 -0
- text_area/new_10.png +0 -0
- text_area/new_11.png +0 -0
- text_area/new_12.png +0 -0
- text_area/new_13.png +0 -0
- text_area/new_14.png +0 -0
- text_area/new_15.png +0 -0
- text_area/new_16.png +0 -0
__pycache__/craft.cpython-310.pyc
ADDED
Binary file (2.4 kB). View file
|
|
__pycache__/craft_utils.cpython-310.pyc
ADDED
Binary file (5.7 kB). View file
|
|
__pycache__/file_utils.cpython-310.pyc
ADDED
Binary file (2.51 kB). View file
|
|
__pycache__/imgproc.cpython-310.pyc
ADDED
Binary file (2.1 kB). View file
|
|
__pycache__/mosaik.cpython-310.pyc
ADDED
Binary file (718 Bytes). View file
|
|
__pycache__/ner.cpython-310.pyc
ADDED
Binary file (914 Bytes). View file
|
|
__pycache__/recognize.cpython-310.pyc
ADDED
Binary file (736 Bytes). View file
|
|
__pycache__/refinenet.cpython-310.pyc
ADDED
Binary file (1.95 kB). View file
|
|
__pycache__/seg.cpython-310.pyc
ADDED
Binary file (1.53 kB). View file
|
|
basenet/__init__.py
ADDED
File without changes
|
basenet/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (164 Bytes). View file
|
|
basenet/__pycache__/vgg16_bn.cpython-310.pyc
ADDED
Binary file (2.27 kB). View file
|
|
basenet/vgg16_bn.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from collections import namedtuple
|
2 |
+
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
import torch.nn.init as init
|
6 |
+
from torchvision import models
|
7 |
+
|
8 |
+
def init_weights(modules):
|
9 |
+
for m in modules:
|
10 |
+
if isinstance(m, nn.Conv2d):
|
11 |
+
init.xavier_uniform_(m.weight.data)
|
12 |
+
if m.bias is not None:
|
13 |
+
m.bias.data.zero_()
|
14 |
+
elif isinstance(m, nn.BatchNorm2d):
|
15 |
+
m.weight.data.fill_(1)
|
16 |
+
m.bias.data.zero_()
|
17 |
+
elif isinstance(m, nn.Linear):
|
18 |
+
m.weight.data.normal_(0, 0.01)
|
19 |
+
m.bias.data.zero_()
|
20 |
+
|
21 |
+
class vgg16_bn(torch.nn.Module):
|
22 |
+
def __init__(self, pretrained=True, freeze=True):
|
23 |
+
super(vgg16_bn, self).__init__()
|
24 |
+
|
25 |
+
vgg_pretrained_features = models.vgg16_bn(pretrained=pretrained).features
|
26 |
+
self.slice1 = torch.nn.Sequential()
|
27 |
+
self.slice2 = torch.nn.Sequential()
|
28 |
+
self.slice3 = torch.nn.Sequential()
|
29 |
+
self.slice4 = torch.nn.Sequential()
|
30 |
+
self.slice5 = torch.nn.Sequential()
|
31 |
+
for x in range(12): # conv2_2
|
32 |
+
self.slice1.add_module(str(x), vgg_pretrained_features[x])
|
33 |
+
for x in range(12, 19): # conv3_3
|
34 |
+
self.slice2.add_module(str(x), vgg_pretrained_features[x])
|
35 |
+
for x in range(19, 29): # conv4_3
|
36 |
+
self.slice3.add_module(str(x), vgg_pretrained_features[x])
|
37 |
+
for x in range(29, 39): # conv5_3
|
38 |
+
self.slice4.add_module(str(x), vgg_pretrained_features[x])
|
39 |
+
|
40 |
+
# fc6, fc7 without atrous conv
|
41 |
+
self.slice5 = torch.nn.Sequential(
|
42 |
+
nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
|
43 |
+
nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6),
|
44 |
+
nn.Conv2d(1024, 1024, kernel_size=1)
|
45 |
+
)
|
46 |
+
|
47 |
+
if not pretrained:
|
48 |
+
init_weights(self.slice1.modules())
|
49 |
+
init_weights(self.slice2.modules())
|
50 |
+
init_weights(self.slice3.modules())
|
51 |
+
init_weights(self.slice4.modules())
|
52 |
+
|
53 |
+
init_weights(self.slice5.modules()) # no pretrained model for fc6 and fc7
|
54 |
+
|
55 |
+
if freeze:
|
56 |
+
for param in self.slice1.parameters(): # only first conv
|
57 |
+
param.requires_grad= False
|
58 |
+
|
59 |
+
def forward(self, X):
|
60 |
+
h = self.slice1(X)
|
61 |
+
h_relu2_2 = h
|
62 |
+
h = self.slice2(h)
|
63 |
+
h_relu3_2 = h
|
64 |
+
h = self.slice3(h)
|
65 |
+
h_relu4_3 = h
|
66 |
+
h = self.slice4(h)
|
67 |
+
h_relu5_3 = h
|
68 |
+
h = self.slice5(h)
|
69 |
+
h_fc7 = h
|
70 |
+
vgg_outputs = namedtuple("VggOutputs", ['fc7', 'relu5_3', 'relu4_3', 'relu3_2', 'relu2_2'])
|
71 |
+
out = vgg_outputs(h_fc7, h_relu5_3, h_relu4_3, h_relu3_2, h_relu2_2)
|
72 |
+
return out
|
craft.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
import torch.nn.functional as F
|
5 |
+
|
6 |
+
from basenet.vgg16_bn import vgg16_bn, init_weights
|
7 |
+
|
8 |
+
class double_conv(nn.Module):
|
9 |
+
def __init__(self, in_ch, mid_ch, out_ch):
|
10 |
+
super(double_conv, self).__init__()
|
11 |
+
self.conv = nn.Sequential(
|
12 |
+
nn.Conv2d(in_ch + mid_ch, mid_ch, kernel_size=1),
|
13 |
+
nn.BatchNorm2d(mid_ch),
|
14 |
+
nn.ReLU(inplace=True),
|
15 |
+
nn.Conv2d(mid_ch, out_ch, kernel_size=3, padding=1),
|
16 |
+
nn.BatchNorm2d(out_ch),
|
17 |
+
nn.ReLU(inplace=True)
|
18 |
+
)
|
19 |
+
|
20 |
+
def forward(self, x):
|
21 |
+
x = self.conv(x)
|
22 |
+
return x
|
23 |
+
|
24 |
+
|
25 |
+
class CRAFT(nn.Module):
|
26 |
+
def __init__(self, pretrained=False, freeze=False):
|
27 |
+
super(CRAFT, self).__init__()
|
28 |
+
|
29 |
+
""" Base network """
|
30 |
+
self.basenet = vgg16_bn(pretrained, freeze)
|
31 |
+
|
32 |
+
""" U network """
|
33 |
+
self.upconv1 = double_conv(1024, 512, 256)
|
34 |
+
self.upconv2 = double_conv(512, 256, 128)
|
35 |
+
self.upconv3 = double_conv(256, 128, 64)
|
36 |
+
self.upconv4 = double_conv(128, 64, 32)
|
37 |
+
|
38 |
+
num_class = 2
|
39 |
+
self.conv_cls = nn.Sequential(
|
40 |
+
nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.ReLU(inplace=True),
|
41 |
+
nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.ReLU(inplace=True),
|
42 |
+
nn.Conv2d(32, 16, kernel_size=3, padding=1), nn.ReLU(inplace=True),
|
43 |
+
nn.Conv2d(16, 16, kernel_size=1), nn.ReLU(inplace=True),
|
44 |
+
nn.Conv2d(16, num_class, kernel_size=1),
|
45 |
+
)
|
46 |
+
|
47 |
+
init_weights(self.upconv1.modules())
|
48 |
+
init_weights(self.upconv2.modules())
|
49 |
+
init_weights(self.upconv3.modules())
|
50 |
+
init_weights(self.upconv4.modules())
|
51 |
+
init_weights(self.conv_cls.modules())
|
52 |
+
|
53 |
+
def forward(self, x):
|
54 |
+
""" Base network """
|
55 |
+
sources = self.basenet(x)
|
56 |
+
|
57 |
+
""" U network """
|
58 |
+
y = torch.cat([sources[0], sources[1]], dim=1)
|
59 |
+
y = self.upconv1(y)
|
60 |
+
|
61 |
+
y = F.interpolate(y, size=sources[2].size()[2:], mode='bilinear', align_corners=False)
|
62 |
+
y = torch.cat([y, sources[2]], dim=1)
|
63 |
+
y = self.upconv2(y)
|
64 |
+
|
65 |
+
y = F.interpolate(y, size=sources[3].size()[2:], mode='bilinear', align_corners=False)
|
66 |
+
y = torch.cat([y, sources[3]], dim=1)
|
67 |
+
y = self.upconv3(y)
|
68 |
+
|
69 |
+
y = F.interpolate(y, size=sources[4].size()[2:], mode='bilinear', align_corners=False)
|
70 |
+
y = torch.cat([y, sources[4]], dim=1)
|
71 |
+
feature = self.upconv4(y)
|
72 |
+
|
73 |
+
y = self.conv_cls(feature)
|
74 |
+
|
75 |
+
return y.permute(0,2,3,1), feature
|
76 |
+
|
craft_utils.py
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
import cv2
|
5 |
+
import math
|
6 |
+
|
7 |
+
def warpCoord(Minv, pt):
|
8 |
+
out = np.matmul(Minv, (pt[0], pt[1], 1))
|
9 |
+
return np.array([out[0]/out[2], out[1]/out[2]])
|
10 |
+
|
11 |
+
|
12 |
+
def getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text):
|
13 |
+
linkmap = linkmap.copy()
|
14 |
+
textmap = textmap.copy()
|
15 |
+
img_h, img_w = textmap.shape
|
16 |
+
|
17 |
+
ret, text_score = cv2.threshold(textmap, low_text, 1, 0)
|
18 |
+
ret, link_score = cv2.threshold(linkmap, link_threshold, 1, 0)
|
19 |
+
|
20 |
+
text_score_comb = np.clip(text_score + link_score, 0, 1)
|
21 |
+
nLabels, labels, stats, centroids = cv2.connectedComponentsWithStats(text_score_comb.astype(np.uint8), connectivity=4)
|
22 |
+
|
23 |
+
det = []
|
24 |
+
mapper = []
|
25 |
+
for k in range(1,nLabels):
|
26 |
+
size = stats[k, cv2.CC_STAT_AREA]
|
27 |
+
if size < 10: continue
|
28 |
+
|
29 |
+
if np.max(textmap[labels==k]) < text_threshold: continue
|
30 |
+
|
31 |
+
segmap = np.zeros(textmap.shape, dtype=np.uint8)
|
32 |
+
segmap[labels==k] = 255
|
33 |
+
segmap[np.logical_and(link_score==1, text_score==0)] = 0
|
34 |
+
x, y = stats[k, cv2.CC_STAT_LEFT], stats[k, cv2.CC_STAT_TOP]
|
35 |
+
w, h = stats[k, cv2.CC_STAT_WIDTH], stats[k, cv2.CC_STAT_HEIGHT]
|
36 |
+
niter = int(math.sqrt(size * min(w, h) / (w * h)) * 2)
|
37 |
+
sx, ex, sy, ey = x - niter, x + w + niter + 1, y - niter, y + h + niter + 1
|
38 |
+
if sx < 0 : sx = 0
|
39 |
+
if sy < 0 : sy = 0
|
40 |
+
if ex >= img_w: ex = img_w
|
41 |
+
if ey >= img_h: ey = img_h
|
42 |
+
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(1 + niter, 1 + niter))
|
43 |
+
segmap[sy:ey, sx:ex] = cv2.dilate(segmap[sy:ey, sx:ex], kernel)
|
44 |
+
|
45 |
+
np_contours = np.roll(np.array(np.where(segmap!=0)),1,axis=0).transpose().reshape(-1,2)
|
46 |
+
rectangle = cv2.minAreaRect(np_contours)
|
47 |
+
box = cv2.boxPoints(rectangle)
|
48 |
+
|
49 |
+
w, h = np.linalg.norm(box[0] - box[1]), np.linalg.norm(box[1] - box[2])
|
50 |
+
box_ratio = max(w, h) / (min(w, h) + 1e-5)
|
51 |
+
if abs(1 - box_ratio) <= 0.1:
|
52 |
+
l, r = min(np_contours[:,0]), max(np_contours[:,0])
|
53 |
+
t, b = min(np_contours[:,1]), max(np_contours[:,1])
|
54 |
+
box = np.array([[l, t], [r, t], [r, b], [l, b]], dtype=np.float32)
|
55 |
+
|
56 |
+
startidx = box.sum(axis=1).argmin()
|
57 |
+
box = np.roll(box, 4-startidx, 0)
|
58 |
+
box = np.array(box)
|
59 |
+
|
60 |
+
det.append(box)
|
61 |
+
mapper.append(k)
|
62 |
+
|
63 |
+
return det, labels, mapper
|
64 |
+
|
65 |
+
def getPoly_core(boxes, labels, mapper, linkmap):
|
66 |
+
num_cp = 5
|
67 |
+
max_len_ratio = 0.7
|
68 |
+
expand_ratio = 1.45
|
69 |
+
max_r = 2.0
|
70 |
+
step_r = 0.2
|
71 |
+
|
72 |
+
polys = []
|
73 |
+
for k, box in enumerate(boxes):
|
74 |
+
w, h = int(np.linalg.norm(box[0] - box[1]) + 1), int(np.linalg.norm(box[1] - box[2]) + 1)
|
75 |
+
if w < 10 or h < 10:
|
76 |
+
polys.append(None); continue
|
77 |
+
|
78 |
+
tar = np.float32([[0,0],[w,0],[w,h],[0,h]])
|
79 |
+
M = cv2.getPerspectiveTransform(box, tar)
|
80 |
+
word_label = cv2.warpPerspective(labels, M, (w, h), flags=cv2.INTER_NEAREST)
|
81 |
+
try:
|
82 |
+
Minv = np.linalg.inv(M)
|
83 |
+
except:
|
84 |
+
polys.append(None); continue
|
85 |
+
|
86 |
+
cur_label = mapper[k]
|
87 |
+
word_label[word_label != cur_label] = 0
|
88 |
+
word_label[word_label > 0] = 1
|
89 |
+
|
90 |
+
cp = []
|
91 |
+
max_len = -1
|
92 |
+
for i in range(w):
|
93 |
+
region = np.where(word_label[:,i] != 0)[0]
|
94 |
+
if len(region) < 2 : continue
|
95 |
+
cp.append((i, region[0], region[-1]))
|
96 |
+
length = region[-1] - region[0] + 1
|
97 |
+
if length > max_len: max_len = length
|
98 |
+
|
99 |
+
if h * max_len_ratio < max_len:
|
100 |
+
polys.append(None); continue
|
101 |
+
|
102 |
+
tot_seg = num_cp * 2 + 1
|
103 |
+
seg_w = w / tot_seg
|
104 |
+
pp = [None] * num_cp
|
105 |
+
cp_section = [[0, 0]] * tot_seg
|
106 |
+
seg_height = [0] * num_cp
|
107 |
+
seg_num = 0
|
108 |
+
num_sec = 0
|
109 |
+
prev_h = -1
|
110 |
+
for i in range(0,len(cp)):
|
111 |
+
(x, sy, ey) = cp[i]
|
112 |
+
if (seg_num + 1) * seg_w <= x and seg_num <= tot_seg:
|
113 |
+
# average previous segment
|
114 |
+
if num_sec == 0: break
|
115 |
+
cp_section[seg_num] = [cp_section[seg_num][0] / num_sec, cp_section[seg_num][1] / num_sec]
|
116 |
+
num_sec = 0
|
117 |
+
|
118 |
+
# reset variables
|
119 |
+
seg_num += 1
|
120 |
+
prev_h = -1
|
121 |
+
|
122 |
+
# accumulate center points
|
123 |
+
cy = (sy + ey) * 0.5
|
124 |
+
cur_h = ey - sy + 1
|
125 |
+
cp_section[seg_num] = [cp_section[seg_num][0] + x, cp_section[seg_num][1] + cy]
|
126 |
+
num_sec += 1
|
127 |
+
|
128 |
+
if seg_num % 2 == 0: continue # No polygon area
|
129 |
+
|
130 |
+
if prev_h < cur_h:
|
131 |
+
pp[int((seg_num - 1)/2)] = (x, cy)
|
132 |
+
seg_height[int((seg_num - 1)/2)] = cur_h
|
133 |
+
prev_h = cur_h
|
134 |
+
|
135 |
+
# processing last segment
|
136 |
+
if num_sec != 0:
|
137 |
+
cp_section[-1] = [cp_section[-1][0] / num_sec, cp_section[-1][1] / num_sec]
|
138 |
+
|
139 |
+
# pass if num of pivots is not sufficient or segment widh is smaller than character height
|
140 |
+
if None in pp or seg_w < np.max(seg_height) * 0.25:
|
141 |
+
polys.append(None); continue
|
142 |
+
|
143 |
+
# calc median maximum of pivot points
|
144 |
+
half_char_h = np.median(seg_height) * expand_ratio / 2
|
145 |
+
|
146 |
+
# calc gradiant and apply to make horizontal pivots
|
147 |
+
new_pp = []
|
148 |
+
for i, (x, cy) in enumerate(pp):
|
149 |
+
dx = cp_section[i * 2 + 2][0] - cp_section[i * 2][0]
|
150 |
+
dy = cp_section[i * 2 + 2][1] - cp_section[i * 2][1]
|
151 |
+
if dx == 0: # gradient if zero
|
152 |
+
new_pp.append([x, cy - half_char_h, x, cy + half_char_h])
|
153 |
+
continue
|
154 |
+
rad = - math.atan2(dy, dx)
|
155 |
+
c, s = half_char_h * math.cos(rad), half_char_h * math.sin(rad)
|
156 |
+
new_pp.append([x - s, cy - c, x + s, cy + c])
|
157 |
+
|
158 |
+
# get edge points to cover character heatmaps
|
159 |
+
isSppFound, isEppFound = False, False
|
160 |
+
grad_s = (pp[1][1] - pp[0][1]) / (pp[1][0] - pp[0][0]) + (pp[2][1] - pp[1][1]) / (pp[2][0] - pp[1][0])
|
161 |
+
grad_e = (pp[-2][1] - pp[-1][1]) / (pp[-2][0] - pp[-1][0]) + (pp[-3][1] - pp[-2][1]) / (pp[-3][0] - pp[-2][0])
|
162 |
+
for r in np.arange(0.5, max_r, step_r):
|
163 |
+
dx = 2 * half_char_h * r
|
164 |
+
if not isSppFound:
|
165 |
+
line_img = np.zeros(word_label.shape, dtype=np.uint8)
|
166 |
+
dy = grad_s * dx
|
167 |
+
p = np.array(new_pp[0]) - np.array([dx, dy, dx, dy])
|
168 |
+
cv2.line(line_img, (int(p[0]), int(p[1])), (int(p[2]), int(p[3])), 1, thickness=1)
|
169 |
+
if np.sum(np.logical_and(word_label, line_img)) == 0 or r + 2 * step_r >= max_r:
|
170 |
+
spp = p
|
171 |
+
isSppFound = True
|
172 |
+
if not isEppFound:
|
173 |
+
line_img = np.zeros(word_label.shape, dtype=np.uint8)
|
174 |
+
dy = grad_e * dx
|
175 |
+
p = np.array(new_pp[-1]) + np.array([dx, dy, dx, dy])
|
176 |
+
cv2.line(line_img, (int(p[0]), int(p[1])), (int(p[2]), int(p[3])), 1, thickness=1)
|
177 |
+
if np.sum(np.logical_and(word_label, line_img)) == 0 or r + 2 * step_r >= max_r:
|
178 |
+
epp = p
|
179 |
+
isEppFound = True
|
180 |
+
if isSppFound and isEppFound:
|
181 |
+
break
|
182 |
+
|
183 |
+
if not (isSppFound and isEppFound):
|
184 |
+
polys.append(None); continue
|
185 |
+
|
186 |
+
poly = []
|
187 |
+
poly.append(warpCoord(Minv, (spp[0], spp[1])))
|
188 |
+
for p in new_pp:
|
189 |
+
poly.append(warpCoord(Minv, (p[0], p[1])))
|
190 |
+
poly.append(warpCoord(Minv, (epp[0], epp[1])))
|
191 |
+
poly.append(warpCoord(Minv, (epp[2], epp[3])))
|
192 |
+
for p in reversed(new_pp):
|
193 |
+
poly.append(warpCoord(Minv, (p[2], p[3])))
|
194 |
+
poly.append(warpCoord(Minv, (spp[2], spp[3])))
|
195 |
+
|
196 |
+
# add to final result
|
197 |
+
polys.append(np.array(poly))
|
198 |
+
|
199 |
+
return polys
|
200 |
+
|
201 |
+
def getDetBoxes(textmap, linkmap, text_threshold, link_threshold, low_text, poly=False):
|
202 |
+
boxes, labels, mapper = getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text)
|
203 |
+
|
204 |
+
if poly:
|
205 |
+
polys = getPoly_core(boxes, labels, mapper, linkmap)
|
206 |
+
else:
|
207 |
+
polys = [None] * len(boxes)
|
208 |
+
|
209 |
+
return boxes, polys
|
210 |
+
|
211 |
+
def adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net = 2):
|
212 |
+
if len(polys) > 0:
|
213 |
+
polys = np.array(polys)
|
214 |
+
for k in range(len(polys)):
|
215 |
+
if polys[k] is not None:
|
216 |
+
polys[k] *= (ratio_w * ratio_net, ratio_h * ratio_net)
|
217 |
+
return polys
|
cropped2.png
ADDED
![]() |
dino/__pycache__/predict.cpython-310.pyc
ADDED
Binary file (1.89 kB). View file
|
|
dino/dino/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from .model import Dinov2
|
dino/dino/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (202 Bytes). View file
|
|
dino/dino/__pycache__/model.cpython-310.pyc
ADDED
Binary file (1.63 kB). View file
|
|
dino/dino/__pycache__/modules.cpython-310.pyc
ADDED
Binary file (2.58 kB). View file
|
|
dino/dino/__pycache__/parts.cpython-310.pyc
ADDED
Binary file (2.58 kB). View file
|
|
dino/dino/model.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from .parts import *
|
3 |
+
|
4 |
+
|
5 |
+
class Dinov2(nn.Module):
|
6 |
+
def __init__(self, n_channels, n_classes, bilinear=False):
|
7 |
+
super(Dinov2, self).__init__()
|
8 |
+
self.n_channels = n_channels
|
9 |
+
self.n_classes = n_classes
|
10 |
+
self.bilinear = bilinear
|
11 |
+
|
12 |
+
self.inc = (DoubleConv(n_channels, 64))
|
13 |
+
self.down1 = (Down(64, 128))
|
14 |
+
self.down2 = (Down(128, 256))
|
15 |
+
self.down3 = (Down(256, 512))
|
16 |
+
factor = 2 if bilinear else 1
|
17 |
+
self.down4 = (Down(512, 1024 // factor))
|
18 |
+
self.up1 = (Up(1024, 512 // factor, bilinear))
|
19 |
+
self.up2 = (Up(512, 256 // factor, bilinear))
|
20 |
+
self.up3 = (Up(256, 128 // factor, bilinear))
|
21 |
+
self.up4 = (Up(128, 64, bilinear))
|
22 |
+
self.outc = (OutConv(64, n_classes))
|
23 |
+
|
24 |
+
def forward(self, x):
|
25 |
+
x1 = self.inc(x)
|
26 |
+
x2 = self.down1(x1)
|
27 |
+
x3 = self.down2(x2)
|
28 |
+
x4 = self.down3(x3)
|
29 |
+
x5 = self.down4(x4)
|
30 |
+
x = self.up1(x5, x4)
|
31 |
+
x = self.up2(x, x3)
|
32 |
+
x = self.up3(x, x2)
|
33 |
+
x = self.up4(x, x1)
|
34 |
+
logits = self.outc(x)
|
35 |
+
return logits
|
36 |
+
|
37 |
+
def use_checkpointing(self):
|
38 |
+
self.inc = torch.utils.checkpoint(self.inc)
|
39 |
+
self.down1 = torch.utils.checkpoint(self.down1)
|
40 |
+
self.down2 = torch.utils.checkpoint(self.down2)
|
41 |
+
self.down3 = torch.utils.checkpoint(self.down3)
|
42 |
+
self.down4 = torch.utils.checkpoint(self.down4)
|
43 |
+
self.up1 = torch.utils.checkpoint(self.up1)
|
44 |
+
self.up2 = torch.utils.checkpoint(self.up2)
|
45 |
+
self.up3 = torch.utils.checkpoint(self.up3)
|
46 |
+
self.up4 = torch.utils.checkpoint(self.up4)
|
47 |
+
self.outc = torch.utils.checkpoint(self.outc)
|
dino/dino/parts.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
|
5 |
+
|
6 |
+
class DoubleConv(nn.Module):
|
7 |
+
|
8 |
+
def __init__(self, in_channels, out_channels, mid_channels=None):
|
9 |
+
super().__init__()
|
10 |
+
if not mid_channels:
|
11 |
+
mid_channels = out_channels
|
12 |
+
self.double_conv = nn.Sequential(
|
13 |
+
nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
|
14 |
+
nn.BatchNorm2d(mid_channels),
|
15 |
+
nn.ReLU(inplace=True),
|
16 |
+
nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
|
17 |
+
nn.BatchNorm2d(out_channels),
|
18 |
+
nn.ReLU(inplace=True)
|
19 |
+
)
|
20 |
+
|
21 |
+
def forward(self, x):
|
22 |
+
return self.double_conv(x)
|
23 |
+
|
24 |
+
|
25 |
+
class Down(nn.Module):
|
26 |
+
|
27 |
+
def __init__(self, in_channels, out_channels):
|
28 |
+
super().__init__()
|
29 |
+
self.maxpool_conv = nn.Sequential(
|
30 |
+
nn.MaxPool2d(2),
|
31 |
+
DoubleConv(in_channels, out_channels)
|
32 |
+
)
|
33 |
+
|
34 |
+
def forward(self, x):
|
35 |
+
return self.maxpool_conv(x)
|
36 |
+
|
37 |
+
|
38 |
+
class Up(nn.Module):
|
39 |
+
|
40 |
+
def __init__(self, in_channels, out_channels, bilinear=True):
|
41 |
+
super().__init__()
|
42 |
+
|
43 |
+
if bilinear:
|
44 |
+
self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
|
45 |
+
self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
|
46 |
+
else:
|
47 |
+
self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
|
48 |
+
self.conv = DoubleConv(in_channels, out_channels)
|
49 |
+
|
50 |
+
def forward(self, x1, x2):
|
51 |
+
x1 = self.up(x1)
|
52 |
+
diffY = x2.size()[2] - x1.size()[2]
|
53 |
+
diffX = x2.size()[3] - x1.size()[3]
|
54 |
+
|
55 |
+
x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
|
56 |
+
diffY // 2, diffY - diffY // 2])
|
57 |
+
x = torch.cat([x2, x1], dim=1)
|
58 |
+
return self.conv(x)
|
59 |
+
|
60 |
+
|
61 |
+
class OutConv(nn.Module):
|
62 |
+
def __init__(self, in_channels, out_channels):
|
63 |
+
super(OutConv, self).__init__()
|
64 |
+
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
|
65 |
+
|
66 |
+
def forward(self, x):
|
67 |
+
return self.conv(x)
|
dino/predict.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import numpy as np
|
3 |
+
import torch
|
4 |
+
import torch.nn.functional as F
|
5 |
+
from PIL import Image
|
6 |
+
|
7 |
+
|
8 |
+
def preprocess(mask_values, pil_img, scale, is_mask):
|
9 |
+
pil_img=Image.fromarray(pil_img)
|
10 |
+
w, h = pil_img.size
|
11 |
+
newW, newH = int(scale * w), int(scale * h)
|
12 |
+
pil_img = pil_img.resize((newW, newH))
|
13 |
+
img = np.asarray(pil_img)
|
14 |
+
|
15 |
+
if is_mask:
|
16 |
+
mask = np.zeros((newH, newW), dtype=np.int64)
|
17 |
+
for i, v in enumerate(mask_values):
|
18 |
+
if img.ndim == 2:
|
19 |
+
mask[img == v] = i
|
20 |
+
else:
|
21 |
+
mask[(img == v).all(-1)] = i
|
22 |
+
|
23 |
+
return mask
|
24 |
+
|
25 |
+
else:
|
26 |
+
if img.ndim == 2:
|
27 |
+
img = img[np.newaxis, ...]
|
28 |
+
else:
|
29 |
+
img = img.transpose((2, 0, 1))
|
30 |
+
|
31 |
+
if (img > 1).any():
|
32 |
+
img = img / 255.0
|
33 |
+
|
34 |
+
return img
|
35 |
+
def predict_img(net,
|
36 |
+
full_img,
|
37 |
+
device,
|
38 |
+
scale_factor=1,
|
39 |
+
out_threshold=0.5):
|
40 |
+
net.eval()
|
41 |
+
img = torch.from_numpy(preprocess(None, full_img, scale_factor, is_mask=False))
|
42 |
+
img = img.unsqueeze(0)
|
43 |
+
img = img.to(device=device, dtype=torch.float32)
|
44 |
+
|
45 |
+
with torch.no_grad():
|
46 |
+
output = net(img).cpu()
|
47 |
+
|
48 |
+
if net.n_classes > 1:
|
49 |
+
mask = output.argmax(dim=1)
|
50 |
+
else:
|
51 |
+
mask = torch.sigmoid(output) > out_threshold
|
52 |
+
|
53 |
+
return mask[0].long().squeeze().numpy()
|
54 |
+
|
55 |
+
|
56 |
+
|
57 |
+
|
58 |
+
|
59 |
+
|
60 |
+
def mask_to_image(mask: np.ndarray, mask_values):
|
61 |
+
if isinstance(mask_values[0], list):
|
62 |
+
out = np.zeros((mask.shape[-2], mask.shape[-1], len(mask_values[0])), dtype=np.uint8)
|
63 |
+
elif mask_values == [0, 1]:
|
64 |
+
out = np.zeros((mask.shape[-2], mask.shape[-1]), dtype=bool)
|
65 |
+
else:
|
66 |
+
out = np.zeros((mask.shape[-2], mask.shape[-1]), dtype=np.uint8)
|
67 |
+
|
68 |
+
if mask.ndim == 3:
|
69 |
+
mask = np.argmax(mask, axis=0)
|
70 |
+
|
71 |
+
for i, v in enumerate(mask_values):
|
72 |
+
out[mask == i] = v
|
73 |
+
|
74 |
+
return Image.fromarray(out)
|
75 |
+
|
76 |
+
|
file_utils.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
import os
|
3 |
+
import numpy as np
|
4 |
+
import cv2
|
5 |
+
import imgproc
|
6 |
+
from mosaik import mosaik
|
7 |
+
# borrowed from https://github.com/lengstrom/fast-style-transfer/blob/master/src/utils.py
|
8 |
+
def get_files(img_dir):
|
9 |
+
imgs, masks, xmls = list_files(img_dir)
|
10 |
+
return imgs, masks, xmls
|
11 |
+
|
12 |
+
def list_files(in_path):
|
13 |
+
img_files = []
|
14 |
+
mask_files = []
|
15 |
+
gt_files = []
|
16 |
+
for (dirpath, dirnames, filenames) in os.walk(in_path):
|
17 |
+
for file in filenames:
|
18 |
+
filename, ext = os.path.splitext(file)
|
19 |
+
ext = str.lower(ext)
|
20 |
+
if ext == '.jpg' or ext == '.jpeg' or ext == '.gif' or ext == '.png' or ext == '.pgm':
|
21 |
+
img_files.append(os.path.join(dirpath, file))
|
22 |
+
elif ext == '.bmp':
|
23 |
+
mask_files.append(os.path.join(dirpath, file))
|
24 |
+
elif ext == '.xml' or ext == '.gt' or ext == '.txt':
|
25 |
+
gt_files.append(os.path.join(dirpath, file))
|
26 |
+
elif ext == '.zip':
|
27 |
+
continue
|
28 |
+
# img_files.sort()
|
29 |
+
# mask_files.sort()
|
30 |
+
# gt_files.sort()
|
31 |
+
return img_files, mask_files, gt_files
|
32 |
+
|
33 |
+
def saveResult(img_file, img, boxes, dirname='./result/', verticals=None, texts=None):
|
34 |
+
""" save text detection result one by one
|
35 |
+
Args:
|
36 |
+
img_file (str): image file name
|
37 |
+
img (array): raw image context
|
38 |
+
boxes (array): array of result file
|
39 |
+
Shape: [num_detections, 4] for BB output / [num_detections, 4] for QUAD output
|
40 |
+
Return:
|
41 |
+
None
|
42 |
+
"""
|
43 |
+
img = np.array(img)
|
44 |
+
|
45 |
+
# make result file list
|
46 |
+
filename, file_ext = os.path.splitext(os.path.basename(img_file))
|
47 |
+
|
48 |
+
# result directory
|
49 |
+
res_file = dirname + "res_" + filename + '.txt'
|
50 |
+
res_img_file = dirname + "res_" + filename + '.jpg'
|
51 |
+
|
52 |
+
if not os.path.isdir(dirname):
|
53 |
+
os.mkdir(dirname)
|
54 |
+
|
55 |
+
with open(res_file, 'w') as f:
|
56 |
+
for i, box in enumerate(boxes):
|
57 |
+
poly = np.array(box).astype(np.int32).reshape((-1))
|
58 |
+
strResult = ','.join([str(p) for p in poly]) + '\r\n'
|
59 |
+
f.write(strResult)
|
60 |
+
|
61 |
+
poly = poly.reshape(-1, 2)
|
62 |
+
cv2.polylines(img, [poly.reshape((-1, 1, 2))], True, color=(0, 0, 255), thickness=2)
|
63 |
+
ptColor = (0, 255, 255)
|
64 |
+
if verticals is not None:
|
65 |
+
if verticals[i]:
|
66 |
+
ptColor = (255, 0, 0)
|
67 |
+
|
68 |
+
if texts is not None:
|
69 |
+
font = cv2.FONT_HERSHEY_SIMPLEX
|
70 |
+
font_scale = 0.5
|
71 |
+
cv2.putText(img, "{}".format(texts[i]), (poly[0][0]+1, poly[0][1]+1), font, font_scale, (0, 0, 0), thickness=1)
|
72 |
+
cv2.putText(img, "{}".format(texts[i]), tuple(poly[0]), font, font_scale, (0, 255, 255), thickness=1)
|
73 |
+
|
74 |
+
# Save result image
|
75 |
+
cv2.imwrite(res_img_file, img)
|
76 |
+
return img
|
77 |
+
|
imgproc.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Copyright (c) 2019-present NAVER Corp.
|
3 |
+
MIT License
|
4 |
+
"""
|
5 |
+
|
6 |
+
# -*- coding: utf-8 -*-
|
7 |
+
import numpy as np
|
8 |
+
from skimage import io
|
9 |
+
import cv2
|
10 |
+
|
11 |
+
def loadImage(img_file):
|
12 |
+
img = io.imread(img_file) # RGB order
|
13 |
+
if img.shape[0] == 2: img = img[0]
|
14 |
+
if len(img.shape) == 2 : img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
|
15 |
+
if img.shape[2] == 4: img = img[:,:,:3]
|
16 |
+
img = np.array(img)
|
17 |
+
|
18 |
+
return img
|
19 |
+
|
20 |
+
def normalizeMeanVariance(in_img, mean=(0.485, 0.456, 0.406), variance=(0.229, 0.224, 0.225)):
|
21 |
+
# should be RGB order
|
22 |
+
img = in_img.copy().astype(np.float32)
|
23 |
+
|
24 |
+
img -= np.array([mean[0] * 255.0, mean[1] * 255.0, mean[2] * 255.0], dtype=np.float32)
|
25 |
+
img /= np.array([variance[0] * 255.0, variance[1] * 255.0, variance[2] * 255.0], dtype=np.float32)
|
26 |
+
return img
|
27 |
+
|
28 |
+
def denormalizeMeanVariance(in_img, mean=(0.485, 0.456, 0.406), variance=(0.229, 0.224, 0.225)):
|
29 |
+
# should be RGB order
|
30 |
+
img = in_img.copy()
|
31 |
+
img *= variance
|
32 |
+
img += mean
|
33 |
+
img *= 255.0
|
34 |
+
img = np.clip(img, 0, 255).astype(np.uint8)
|
35 |
+
return img
|
36 |
+
|
37 |
+
def resize_aspect_ratio(img, square_size, interpolation, mag_ratio=1):
|
38 |
+
height, width, channel = img.shape
|
39 |
+
|
40 |
+
# magnify image size
|
41 |
+
target_size = mag_ratio * max(height, width)
|
42 |
+
|
43 |
+
# set original image size
|
44 |
+
if target_size > square_size:
|
45 |
+
target_size = square_size
|
46 |
+
|
47 |
+
ratio = target_size / max(height, width)
|
48 |
+
|
49 |
+
target_h, target_w = int(height * ratio), int(width * ratio)
|
50 |
+
proc = cv2.resize(img, (target_w, target_h), interpolation = interpolation)
|
51 |
+
|
52 |
+
|
53 |
+
# make canvas and paste image
|
54 |
+
target_h32, target_w32 = target_h, target_w
|
55 |
+
if target_h % 32 != 0:
|
56 |
+
target_h32 = target_h + (32 - target_h % 32)
|
57 |
+
if target_w % 32 != 0:
|
58 |
+
target_w32 = target_w + (32 - target_w % 32)
|
59 |
+
resized = np.zeros((target_h32, target_w32, channel), dtype=np.float32)
|
60 |
+
resized[0:target_h, 0:target_w, :] = proc
|
61 |
+
target_h, target_w = target_h32, target_w32
|
62 |
+
|
63 |
+
size_heatmap = (int(target_w/2), int(target_h/2))
|
64 |
+
|
65 |
+
return resized, ratio, size_heatmap
|
66 |
+
|
67 |
+
def cvt2HeatmapImg(img):
|
68 |
+
img = (np.clip(img, 0, 1) * 255).astype(np.uint8)
|
69 |
+
img = cv2.applyColorMap(img, cv2.COLORMAP_JET)
|
70 |
+
return img
|
input/1.png
ADDED
![]() |
input/2.png
ADDED
![]() |
main.py
ADDED
@@ -0,0 +1,321 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from recognize import recongize
|
2 |
+
from ner import ner
|
3 |
+
import os
|
4 |
+
import time
|
5 |
+
import argparse
|
6 |
+
from sr.sr import sr
|
7 |
+
import torch
|
8 |
+
from scipy.ndimage import gaussian_filter
|
9 |
+
from PIL import Image
|
10 |
+
import numpy as np
|
11 |
+
import torch.nn as nn
|
12 |
+
import torch.backends.cudnn as cudnn
|
13 |
+
from torch.autograd import Variable
|
14 |
+
from mosaik import mosaik
|
15 |
+
from PIL import Image
|
16 |
+
import cv2
|
17 |
+
from skimage import io
|
18 |
+
import numpy as np
|
19 |
+
import craft_utils
|
20 |
+
import imgproc
|
21 |
+
import file_utils
|
22 |
+
from seg import segmentation,mask_percentage
|
23 |
+
from craft import CRAFT
|
24 |
+
from collections import OrderedDict
|
25 |
+
import gradio as gr
|
26 |
+
from refinenet import RefineNet
|
27 |
+
|
28 |
+
|
29 |
+
# craft, refine λͺ¨λΈ λΆλ¬μ€λ μ½λ
|
30 |
+
def copyStateDict(state_dict):
|
31 |
+
if list(state_dict.keys())[0].startswith("module"):
|
32 |
+
start_idx = 1
|
33 |
+
else:
|
34 |
+
start_idx = 0
|
35 |
+
new_state_dict = OrderedDict()
|
36 |
+
for k, v in state_dict.items():
|
37 |
+
name = ".".join(k.split(".")[start_idx:])
|
38 |
+
new_state_dict[name] = v
|
39 |
+
return new_state_dict
|
40 |
+
|
41 |
+
def str2bool(v):
|
42 |
+
return v.lower() in ("yes", "y", "true", "t", "1")
|
43 |
+
|
44 |
+
parser = argparse.ArgumentParser(description='CRAFT Text Detection')
|
45 |
+
parser.add_argument('--trained_model', default='weights/craft_mlt_25k.pth', type=str, help='μ¬μ νμ΅ craft λͺ¨λΈ')
|
46 |
+
parser.add_argument('--text_threshold', default=0.7, type=float, help='text confidence threshold')
|
47 |
+
parser.add_argument('--low_text', default=0.4, type=float, help='text low-bound score')
|
48 |
+
parser.add_argument('--link_threshold', default=0.4, type=float, help='link confidence threshold')
|
49 |
+
parser.add_argument('--cuda', default=True, type=str2bool, help='Use cuda for inference')
|
50 |
+
parser.add_argument('--canvas_size', default=1280, type=int, help='image size for inference')
|
51 |
+
parser.add_argument('--mag_ratio', default=1.5, type=float, help='image magnification ratio')
|
52 |
+
parser.add_argument('--poly', default=False, action='store_true', help='enable polygon type')
|
53 |
+
parser.add_argument('--show_time', default=False, action='store_true', help='show processing time')
|
54 |
+
parser.add_argument('--test_folder', default='data/', type=str, help='folder path to input images')
|
55 |
+
parser.add_argument('--refine', default=True, help='enable link refiner')
|
56 |
+
parser.add_argument('--image_path', default="input/1.png", help='input image')
|
57 |
+
parser.add_argument('--refiner_model', default='weights/craft_refiner_CTW1500.pth', type=str, help='pretrained refiner model')
|
58 |
+
parser.add_argument('--result_path', default="result.png", help='result image')
|
59 |
+
parser.add_argument('--percent', default=25, type=int,help='percent of invoice in full image frame')
|
60 |
+
args = parser.parse_args()
|
61 |
+
|
62 |
+
|
63 |
+
image_list, _, _ = file_utils.get_files(args.test_folder)
|
64 |
+
|
65 |
+
|
66 |
+
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly, refine_net=None):
|
67 |
+
t0 = time.time()
|
68 |
+
|
69 |
+
img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio)
|
70 |
+
ratio_h = ratio_w = 1 / target_ratio
|
71 |
+
|
72 |
+
x = imgproc.normalizeMeanVariance(img_resized)
|
73 |
+
x = torch.from_numpy(x).permute(2, 0, 1)
|
74 |
+
x = Variable(x.unsqueeze(0))
|
75 |
+
if cuda:
|
76 |
+
x = x.cuda()
|
77 |
+
|
78 |
+
with torch.no_grad():
|
79 |
+
y, feature = net(x)
|
80 |
+
|
81 |
+
score_text = y[0,:,:,0].cpu().data.numpy()
|
82 |
+
score_link = y[0,:,:,1].cpu().data.numpy()
|
83 |
+
|
84 |
+
if refine_net is not None:
|
85 |
+
with torch.no_grad():
|
86 |
+
y_refiner = refine_net(y, feature)
|
87 |
+
score_link = y_refiner[0,:,:,0].cpu().data.numpy()
|
88 |
+
|
89 |
+
t0 = time.time() - t0
|
90 |
+
t1 = time.time()
|
91 |
+
|
92 |
+
boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly)
|
93 |
+
|
94 |
+
boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
|
95 |
+
polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
|
96 |
+
for k in range(len(polys)):
|
97 |
+
if polys[k] is None: polys[k] = boxes[k]
|
98 |
+
|
99 |
+
t1 = time.time() - t1
|
100 |
+
|
101 |
+
# render results (optional)
|
102 |
+
render_img = score_text.copy()
|
103 |
+
render_img = np.hstack((render_img, score_link))
|
104 |
+
ret_score_text = imgproc.cvt2HeatmapImg(render_img)
|
105 |
+
|
106 |
+
|
107 |
+
return boxes, polys, ret_score_text
|
108 |
+
|
109 |
+
def text_detect(image,net,refine_net):
|
110 |
+
|
111 |
+
bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, refine_net)
|
112 |
+
|
113 |
+
|
114 |
+
return bboxes
|
115 |
+
|
116 |
+
|
117 |
+
def get_box_from_refer(reference_boxes):
|
118 |
+
|
119 |
+
real_boxes=[]
|
120 |
+
for box in reference_boxes:
|
121 |
+
|
122 |
+
real_boxes.append(box//2)
|
123 |
+
|
124 |
+
return real_boxes
|
125 |
+
def get_min_max(box):
|
126 |
+
xlist=[]
|
127 |
+
ylist=[]
|
128 |
+
for coor in box:
|
129 |
+
xlist.append(coor[0])
|
130 |
+
ylist.append(coor[1])
|
131 |
+
return min(xlist),max(xlist),min(ylist),max(ylist)
|
132 |
+
|
133 |
+
def main(image_path0):
|
134 |
+
# 1λ¨κ³
|
135 |
+
|
136 |
+
# ==> craft λͺ¨λΈκ³Ό refinenet λͺ¨λΈμ λΆλ¬μ€κ³ cuda device μ μΉνλλ€.
|
137 |
+
|
138 |
+
net = CRAFT()
|
139 |
+
if args.cuda:
|
140 |
+
net.load_state_dict(copyStateDict(torch.load(args.trained_model)))
|
141 |
+
|
142 |
+
if args.cuda:
|
143 |
+
net = net.cuda()
|
144 |
+
cudnn.benchmark = False
|
145 |
+
|
146 |
+
net.eval()
|
147 |
+
|
148 |
+
refine_net = None
|
149 |
+
if args.refine:
|
150 |
+
refine_net = RefineNet()
|
151 |
+
if args.cuda:
|
152 |
+
refine_net.load_state_dict(copyStateDict(torch.load(args.refiner_model)))
|
153 |
+
refine_net = refine_net.cuda()
|
154 |
+
|
155 |
+
|
156 |
+
refine_net.eval()
|
157 |
+
args.poly = True
|
158 |
+
|
159 |
+
# 2λ¨κ³
|
160 |
+
|
161 |
+
# gradio λΉμΉΈμ μ΄λ―Έμ§λ₯Ό λ£κ³ A μ μ
λ ₯λ©λλ€.
|
162 |
+
|
163 |
+
A=image_path0
|
164 |
+
image_list=[]
|
165 |
+
image_list.append(A)
|
166 |
+
for k, image_path in enumerate(image_list):
|
167 |
+
|
168 |
+
|
169 |
+
|
170 |
+
image = imgproc.loadImage(image_path)
|
171 |
+
if image.shape[2]>3:
|
172 |
+
image=image[:,:,0:3]
|
173 |
+
|
174 |
+
original_image=image
|
175 |
+
# μ΄λ―Έμ§μμ μ‘μ₯λΆλΆλ§ dinov2 λͺ¨λΈλ‘ segmentation μ ν©λλ€.
|
176 |
+
|
177 |
+
image2=segmentation(image)
|
178 |
+
image3=Image.fromarray(image2)
|
179 |
+
image3.save("temporal_mask/mask.png")
|
180 |
+
# λ§μ€ν¬μ΄λ―Έμ§(white pixel, black background)λ₯Ό λ§λλλ€.
|
181 |
+
# μ λ§μ€ν¬ μ΄λ―Έμ§μμ κ° λ©μ΄λ¦¬λ€(μ‘μ₯μΌλ‘ μΆμ )μ΄ μ 체 μ΄λ―Έμ§λ΄μμ λͺνλ‘μ°¨μ§νλμ§ κ³μ°ν©λλ€.
|
182 |
+
contours_list,percentage_list=mask_percentage("temporal_mask/mask.png")
|
183 |
+
normal_image_list=[]
|
184 |
+
|
185 |
+
small_coordinate_list=[]
|
186 |
+
original_coordinate_list=[]
|
187 |
+
|
188 |
+
|
189 |
+
#3λ¨κ³
|
190 |
+
|
191 |
+
|
192 |
+
# percentage listμ κ²½μ° μ‘μ₯μΌλ‘ μΆμ λλ λμΉλ€μ νΌμΌνΈλ₯Ό λͺ¨μλμκ²μ΄κ³
|
193 |
+
# contours listλ μ΄λ―Έμ§λ΄μμ μ‘μ₯μΌλ‘ μΆμ λλ λμΉλ€μ΄ ν¬λ‘λμ΄μ μ λ ¬λ 리μ€νΈμ
λλ€.
|
194 |
+
# μ : percentatge list μ 첫λ²μ§Έ μμλ contours listμ 첫λ²μ§Έ μμμ percentage
|
195 |
+
|
196 |
+
for index,percentage in enumerate(percentage_list):
|
197 |
+
|
198 |
+
if 5<percentage:
|
199 |
+
|
200 |
+
# percentage κ° μλ―Έμ§λ΄μμ 5νλ‘ λλ κ²λ€μ normal listλ‘ ν¬ν¨λ©λλ€.
|
201 |
+
# normal listμμλ μ΄λ―Έμ§λ΄μμ μΆ©λΆν ν° λμΉλ€(μ‘μ₯μΌλ‘ μΆμ ) μ λͺ¨μλμμ΅λλ€.
|
202 |
+
# 5νλ‘ λ―Έλ§μΈκ²λ€μ small coordinate listμ ν¬ν¨λκ³ λ§€μ° μμ λμΉλ‘ κ°μ£Όν©λλ€.
|
203 |
+
# λ§€μ°μμ λμΉμ κ²½μ° zoom inμ νμλ λμΉ(μ‘μ₯μΌλ‘ μΆμ )λ΄ κΈμκ° κ±°μ 보μ΄μ§μμμ λ°λΌμ λμΉ μ 체λ₯Ό mosaikν©λλ€.
|
204 |
+
|
205 |
+
|
206 |
+
contour=contours_list[index]
|
207 |
+
|
208 |
+
x_list=[]
|
209 |
+
y_list=[]
|
210 |
+
contour2=list(contour)
|
211 |
+
|
212 |
+
for r in contour2:
|
213 |
+
r2=r[0]
|
214 |
+
x_list.append(r2[0])
|
215 |
+
y_list.append(r2[1])
|
216 |
+
x_min=min(x_list)
|
217 |
+
y_min=min(y_list)
|
218 |
+
x_max=max(x_list)
|
219 |
+
y_max=max(y_list)
|
220 |
+
original_coordinate_list.append([y_min,y_max,x_min,x_max])
|
221 |
+
image2=original_image[y_min:y_max,x_min:x_max,:]
|
222 |
+
normal_image_list.append(image2)
|
223 |
+
|
224 |
+
|
225 |
+
#
|
226 |
+
else:
|
227 |
+
contour=contours_list[index]
|
228 |
+
|
229 |
+
x_list=[]
|
230 |
+
y_list=[]
|
231 |
+
contour2=list(contour)
|
232 |
+
|
233 |
+
for r in contour2:
|
234 |
+
r2=r[0]
|
235 |
+
x_list.append(r2[0])
|
236 |
+
y_list.append(r2[1])
|
237 |
+
x_min=min(x_list)
|
238 |
+
y_min=min(y_list)
|
239 |
+
x_max=max(x_list)
|
240 |
+
y_max=max(y_list)
|
241 |
+
small_coordinate_list.append([y_min,y_max,x_min,x_max]) #μ‘μ₯ 5νλ‘λ―Έλ§μ μ’ν
|
242 |
+
|
243 |
+
|
244 |
+
# 4λ¨κ³ (λ§€μ°μμ μ‘μ₯)
|
245 |
+
|
246 |
+
# small coordinate listμμ λ§€μ°μμ μ‘μ₯λ€μ΄ λͺ¨μ¬μ Έμμ§λ§ listμμ μμκ° μμΌλ©΄ 5λ¨κ³λ‘ λ°λ‘κ°λλ€.
|
247 |
+
# λ°λ‘ κ°μ§μμκ²½μ°(list μμμ μ΅μνλ) mosaik λ₯Ό ν΅ν΄μ μ 체μ΄λ―Έμ§μμ μμ λμΉμ ν΄λΉνλ μ’νλ€μ λͺ¨λ λͺ¨μμ΄ν¬ν©λλ€.
|
248 |
+
|
249 |
+
if len(small_coordinate_list)>0:
|
250 |
+
original_image=mosaik(original_image,small_coordinate_list)
|
251 |
+
else:
|
252 |
+
pass
|
253 |
+
|
254 |
+
# 5λ¨κ³ (μ΄λμ λ μ¬μ΄μ¦ μλ μ‘μ₯)
|
255 |
+
|
256 |
+
# normal image listμμ μ μ ν ν¬κΈ°μ μ‘μ₯(μ€ νλ©΄ κΈμ 보μ΄λ) λ€μ΄ μμ΅λλ€.
|
257 |
+
# craft μ
μ₯μμ text μμΉλ₯Ό return ν μ μκ²λ ν¬λ‘λ μ‘μ₯μ esrgan μΌλ‘ νμ§κ°μ ν©λλ€.
|
258 |
+
# νμ§κ°μ λ μ‘μ₯μ craftμ λ£μ΄μ μ ννκ² text μ’νλ€μ λͺ¨λ ꡬν©λλ€.
|
259 |
+
# μ’νλ₯Ό ꡬν λ νμ§ μ’μ μ‘μ₯μ΄λ―Έμ§μ μ’νλ₯Ό κ·Έλλ‘ return νμ§ μκ³ μλ³Έ μ‘μ₯μ΄λ―Έμ§μ λ§μΆμ΄μ scale(//2) νκ³ μ΅μ’
μ’νλ₯Ό ꡬν©λλ€.
|
260 |
+
|
261 |
+
for index,normal_image in enumerate(normal_image_list):
|
262 |
+
reference_image=sr(normal_image)
|
263 |
+
reference_boxes=text_detect(reference_image,net=net,refine_net=refine_net)
|
264 |
+
boxes=get_box_from_refer(reference_boxes)
|
265 |
+
for index2,box in enumerate(boxes):
|
266 |
+
xmin,xmax,ymin,ymax=get_min_max(box)
|
267 |
+
|
268 |
+
text_area=normal_image[int(ymin):int(ymax),int(xmin):int(xmax),:]
|
269 |
+
text_area=Image.fromarray(text_area)
|
270 |
+
os.makedirs("text_area",exist_ok=True)
|
271 |
+
text_area.save(f"text_area/new_{index2+1}.png")
|
272 |
+
|
273 |
+
|
274 |
+
# 6λ¨κ³ (text recognize, ner)
|
275 |
+
|
276 |
+
# μ μ’νλ€μ ν΅ν΄μ μ‘μ₯ λ΄μμ λ°μ€λ€μ ν¬λ‘ν©λλ€.
|
277 |
+
# ν¬λ‘λ μ‘μ₯λ΄ λΆλΆ(ν¬λ‘λ λ°μ€ , μ¦ text μλκ³³μΌλ‘ μΆμ λλκ³³) μ trocr μλ£μ΅λλ€.
|
278 |
+
# trocrμ μμλ΄μ μΆμ λλ textλ₯Ό 보μ¬μ€λλ€.
|
279 |
+
# textλ₯Ό ko electra μλ£μ΄μ ν΄λΉ μμμμλ textκ° κ°μΈμ 보μΈμ§μλμ§ νλ³ν©λλ€.
|
280 |
+
# μ‘μ₯λ΄ ν΄λΉ μμκ° κ°μΈμ 보λ‘(λ μ΄λΈ :1) μΆμ λ κ²½μ° λͺ¨μμ΄ν¬λ₯Όν©λλ€.
|
281 |
+
# λͺ¨μμ΄ν¬λΌκ³ νλ³ν κ²½μ° ν΄λΉμμμ μ’νλ₯Ό μ‘μ₯μ΄λ―Έμ§μ λ§λ μ’νλ‘ λ³ννκ³ κ·Έ μ’νμ ν΄λΉνλ λΆλΆμ λͺ¨μμ΄ν¬ν©λλ€.
|
282 |
+
# λΆλΆμ μΌλ‘ λͺ¨μμ΄ν¬λ μ‘μ₯μ΄λ―Έμ§λ₯Ό μ 체μ΄λ―Έμ§(μ‘μ₯μ ν¬ν¨νλ μ΄λ―Έμ§)μ λΆμ
λλ€.
|
283 |
+
|
284 |
+
text=recongize(text_area)
|
285 |
+
label=ner(text)
|
286 |
+
with open("output/text_recongnize2.txt","a") as recognized:
|
287 |
+
recognized.writelines(str(index2+1))
|
288 |
+
recognized.writelines(" ")
|
289 |
+
recognized.writelines(str(text))
|
290 |
+
recognized.writelines(" ")
|
291 |
+
recognized.writelines(str(label))
|
292 |
+
recognized.writelines("\n")
|
293 |
+
recognized.close()
|
294 |
+
print("done")
|
295 |
+
if label==1:
|
296 |
+
A=normal_image[int(ymin):int(ymax),int(xmin):int(xmax),:]
|
297 |
+
normal_image[int(ymin):int(ymax),int(xmin):int(xmax),:] = gaussian_filter(A, sigma=16)
|
298 |
+
|
299 |
+
else:
|
300 |
+
pass
|
301 |
+
a,b,c,d=original_coordinate_list[index]
|
302 |
+
original_image[a:b,c:d,:]=normal_image
|
303 |
+
original_image=Image.fromarray(original_image)
|
304 |
+
original_image.save("output/mosaiked.png")
|
305 |
+
print("masked complete")
|
306 |
+
return original_image
|
307 |
+
|
308 |
+
|
309 |
+
# if __name__ == '__main__':
|
310 |
+
|
311 |
+
|
312 |
+
|
313 |
+
# iface = gr.Interface(
|
314 |
+
# fn=main,
|
315 |
+
# inputs=gr.Image(type="filepath", label="Invoice Image"),
|
316 |
+
# outputs=gr.Image(type="pil", label="Masked Invoice Image"),
|
317 |
+
# live=True
|
318 |
+
# )
|
319 |
+
|
320 |
+
# iface.launch()
|
321 |
+
|
mosaik.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from PIL import Image
|
3 |
+
from scipy.ndimage.filters import gaussian_filter
|
4 |
+
import cv2
|
5 |
+
import numpy as np
|
6 |
+
from scipy.ndimage import gaussian_filter
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
from matplotlib.patches import Polygon
|
9 |
+
|
10 |
+
def mosaik(img,bboxes):
|
11 |
+
for box in bboxes:
|
12 |
+
#[y_min,y_max,x_min,x_max]) #
|
13 |
+
|
14 |
+
cropped=img[box[0]:box[1],box[2]:box[3],:]
|
15 |
+
|
16 |
+
|
17 |
+
cropped=np.array(cropped)
|
18 |
+
cropped = gaussian_filter(cropped, sigma=16)
|
19 |
+
img[box[0]:box[1],box[2]:box[3],:]=cropped
|
20 |
+
|
21 |
+
|
22 |
+
return img
|
23 |
+
|
24 |
+
|
25 |
+
|
26 |
+
|
27 |
+
|
28 |
+
|
29 |
+
|
ner.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
2 |
+
from transformers import pipeline
|
3 |
+
from collections import defaultdict
|
4 |
+
import torch
|
5 |
+
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
6 |
+
device = torch.device("cuda")
|
7 |
+
tokenizer = AutoTokenizer.from_pretrained("Leo97/KoELECTRA-small-v3-modu-ner")
|
8 |
+
model = AutoModelForTokenClassification.from_pretrained("Leo97/KoELECTRA-small-v3-modu-ner")
|
9 |
+
model.to(device)
|
10 |
+
|
11 |
+
|
12 |
+
def check_entity(entities):
|
13 |
+
for entity_info in entities:
|
14 |
+
entity_value = entity_info.get('entity', '').upper()
|
15 |
+
if 'LC' in entity_value or 'PS' in entity_value:
|
16 |
+
return 1
|
17 |
+
return 0
|
18 |
+
def ner(example):
|
19 |
+
ner = pipeline("ner", model=model, tokenizer=tokenizer,device=device)
|
20 |
+
ner_results = ner(example)
|
21 |
+
ner_results=check_entity(ner_results)
|
22 |
+
return ner_results
|
23 |
+
|
24 |
+
|
25 |
+
|
26 |
+
# νλ
|
27 |
+
# def find_longest_value_key(input_dict):
|
28 |
+
# max_length = 0
|
29 |
+
# max_length_keys = []
|
30 |
+
|
31 |
+
# for key, value in input_dict.items():
|
32 |
+
# current_length = len(value)
|
33 |
+
# if current_length > max_length:
|
34 |
+
# max_length = current_length
|
35 |
+
# max_length_keys = [key]
|
36 |
+
# elif current_length == max_length:
|
37 |
+
# max_length_keys.append(key)
|
38 |
+
|
39 |
+
# if len(max_length_keys) == 1:
|
40 |
+
# return 0
|
41 |
+
# else:
|
42 |
+
# return 1
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
# def find_longest_value_key2(input_dict):
|
47 |
+
# if not input_dict:
|
48 |
+
# return None
|
49 |
+
|
50 |
+
# max_key = max(input_dict, key=lambda k: len(input_dict[k]))
|
51 |
+
# return max_key
|
52 |
+
|
53 |
+
|
54 |
+
# def find_most_frequent_entity(entities):
|
55 |
+
# entity_counts = defaultdict(list)
|
56 |
+
|
57 |
+
# for item in entities:
|
58 |
+
# split_entity = item['entity'].split('-')
|
59 |
+
|
60 |
+
# entity_type = split_entity[1]
|
61 |
+
# entity_counts[entity_type].append(item['score'])
|
62 |
+
# number=find_longest_value_key(entity_counts)
|
63 |
+
# if number==1:
|
64 |
+
# max_entities = []
|
65 |
+
# max_score_average = -1
|
66 |
+
|
67 |
+
# for entity, scores in entity_counts.items():
|
68 |
+
# score_average = sum(scores) / len(scores)
|
69 |
+
|
70 |
+
# if score_average > max_score_average:
|
71 |
+
# max_entities = [entity]
|
72 |
+
# max_score_average = score_average
|
73 |
+
# elif score_average == max_score_average:
|
74 |
+
# max_entities.append(entity)
|
75 |
+
# if len(max_entities)>0:
|
76 |
+
# return max_entities if len(max_entities) > 1 else max_entities[0]
|
77 |
+
# else:
|
78 |
+
# return "Do not mosaik"
|
79 |
+
# else:
|
80 |
+
# A=find_longest_value_key2(entity_counts)
|
81 |
+
|
82 |
+
# return A
|
83 |
+
|
84 |
+
|
85 |
+
|
86 |
+
|
87 |
+
# νλλΌλ ps λ lc κ° μμΌλ©΄ λ°λ‘ ps , lc κΊΌλ΄κΈ°
|
88 |
+
|
89 |
+
|
90 |
+
# label=filtering(ner_results)
|
91 |
+
# if label.find("PS")>-1 or label.find("LC")>-1:
|
92 |
+
# return 1
|
93 |
+
# else:
|
94 |
+
# return 0
|
95 |
+
#print(ner("νκΈΈλ"))
|
96 |
+
|
97 |
+
|
98 |
+
|
99 |
+
|
100 |
+
#label=check_label(example)
|
101 |
+
|
102 |
+
|
output/mosaiked.png
ADDED
![]() |
output/text_recongnize2.txt
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
1 6259 - 9409 - 7040 main. 0
|
2 |
+
1 6259 - 9409 - 7040 main. 0
|
3 |
+
2 3H10 - 4 0
|
4 |
+
2 3H10 - 4 0
|
5 |
+
3 0
|
6 |
+
3 0
|
7 |
+
4 0
|
8 |
+
4 0
|
9 |
+
5 032 - 990 - / μΌ0110 - 750 - - 0
|
10 |
+
5 032 - 990 - / μΌ0110 - 750 - - 0
|
11 |
+
6 " 0
|
12 |
+
6 " 0
|
13 |
+
7 μ κ°λκ΄μλΉμ½κ΅¬λμ°λ1 - 1λ²μ§ 1
|
14 |
+
7 μ κ°λκ΄μλΉμ½κ΅¬λμ°λ1 - 1λ²μ§ 1
|
15 |
+
8 λμ1 - 10 1
|
16 |
+
8 λμ1 - 10 1
|
17 |
+
9 5, 000 0
|
18 |
+
9 5, 000 0
|
19 |
+
10 λ·μ¨ - 00νμμΈ1 0
|
20 |
+
10 λ·μ¨ - 00νμμΈ1 0
|
21 |
+
11 λͺ
ν 0
|
22 |
+
11 λͺ
ν 0
|
23 |
+
12 Iμ¨νμλ
ꡬνꡬν©μ ν104 - 1011λ²μ 1
|
24 |
+
12 Iμ¨νμλ
ꡬνꡬν©μ ν104 - 1011λ²μ 1
|
25 |
+
13 μλ¦°λ©΄ν : 624. 8μ² κ³Όμ₯μΌκ΅°λ¦Ό : μ°λΆμ€μ 리 1
|
26 |
+
13 μλ¦°λ©΄ν : 624. 8μ² κ³Όμ₯μΌκ΅°λ¦Ό : μ°λΆμ€μ 리 1
|
27 |
+
14 - 650 / 010 / 0101 - 2021 0
|
28 |
+
14 - 650 / 010 / 0101 - 2021 0
|
29 |
+
15 μ μ°μ
μ : 2019. 12. 02 0
|
30 |
+
15 μ μ°μ
μ : 2019. 12. 02 0
|
31 |
+
16 838 / - 252 / SIN / SI - 255 - 2 / 0
|
32 |
+
16 838 / - 252 / SIN / SI - 255 - 2 / 0
|
33 |
+
17 곡리λ₯μ‘λ³μ½ 1ν 1νμ§ ( νκ΄μ€νλΉμ 0
|
34 |
+
17 곡리λ₯μ‘λ³μ½ 1ν 1νμ§ ( νκ΄μ€νλΉμ 0
|
35 |
+
18 λνλ±μΌ 0
|
36 |
+
18 λνλ±μΌ 0
|
37 |
+
19 λ²Όλ₯Έ ( μ©μ‘°μΈμ기격50μ ) 0
|
38 |
+
19 λ²Όλ₯Έ ( μ©μ‘°μΈμ기격50μ ) 0
|
39 |
+
20 κΈ°μν 0
|
40 |
+
20 κΈ°μν 0
|
readme.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
1. λ³Έ μ½λλ cuda device κΈ°λ°μΌλ‘ μ€κ³λμμ΅λλ€.
|
2 |
+
2. μ°λ¦¬κ° λ§λ νμΌ : main.py,mosaik.py,ner.py,recognize.py,seg.py, sr.py
|
3 |
+
3.
|
recognize.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import TrOCRProcessor, VisionEncoderDecoderModel, AutoTokenizer
|
2 |
+
import unicodedata
|
3 |
+
|
4 |
+
|
5 |
+
def recongize(img):
|
6 |
+
processor = TrOCRProcessor.from_pretrained("trocr_weight")
|
7 |
+
model = VisionEncoderDecoderModel.from_pretrained("trocr_weight")
|
8 |
+
tokenizer = AutoTokenizer.from_pretrained("trocr_weight")
|
9 |
+
|
10 |
+
pixel_values = processor(img, return_tensors="pt").pixel_values
|
11 |
+
generated_ids = model.generate(pixel_values, max_length=64)
|
12 |
+
generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
13 |
+
generated_text = unicodedata.normalize("NFC", generated_text)
|
14 |
+
return generated_text
|
refinenet.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Copyright (c) 2019-present NAVER Corp.
|
3 |
+
MIT License
|
4 |
+
"""
|
5 |
+
|
6 |
+
# -*- coding: utf-8 -*-
|
7 |
+
import torch
|
8 |
+
import torch.nn as nn
|
9 |
+
import torch.nn.functional as F
|
10 |
+
from torch.autograd import Variable
|
11 |
+
from basenet.vgg16_bn import init_weights
|
12 |
+
|
13 |
+
|
14 |
+
class RefineNet(nn.Module):
|
15 |
+
def __init__(self):
|
16 |
+
super(RefineNet, self).__init__()
|
17 |
+
|
18 |
+
self.last_conv = nn.Sequential(
|
19 |
+
nn.Conv2d(34, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True),
|
20 |
+
nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True),
|
21 |
+
nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True)
|
22 |
+
)
|
23 |
+
|
24 |
+
self.aspp1 = nn.Sequential(
|
25 |
+
nn.Conv2d(64, 128, kernel_size=3, dilation=6, padding=6), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
|
26 |
+
nn.Conv2d(128, 128, kernel_size=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
|
27 |
+
nn.Conv2d(128, 1, kernel_size=1)
|
28 |
+
)
|
29 |
+
|
30 |
+
self.aspp2 = nn.Sequential(
|
31 |
+
nn.Conv2d(64, 128, kernel_size=3, dilation=12, padding=12), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
|
32 |
+
nn.Conv2d(128, 128, kernel_size=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
|
33 |
+
nn.Conv2d(128, 1, kernel_size=1)
|
34 |
+
)
|
35 |
+
|
36 |
+
self.aspp3 = nn.Sequential(
|
37 |
+
nn.Conv2d(64, 128, kernel_size=3, dilation=18, padding=18), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
|
38 |
+
nn.Conv2d(128, 128, kernel_size=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
|
39 |
+
nn.Conv2d(128, 1, kernel_size=1)
|
40 |
+
)
|
41 |
+
|
42 |
+
self.aspp4 = nn.Sequential(
|
43 |
+
nn.Conv2d(64, 128, kernel_size=3, dilation=24, padding=24), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
|
44 |
+
nn.Conv2d(128, 128, kernel_size=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
|
45 |
+
nn.Conv2d(128, 1, kernel_size=1)
|
46 |
+
)
|
47 |
+
|
48 |
+
init_weights(self.last_conv.modules())
|
49 |
+
init_weights(self.aspp1.modules())
|
50 |
+
init_weights(self.aspp2.modules())
|
51 |
+
init_weights(self.aspp3.modules())
|
52 |
+
init_weights(self.aspp4.modules())
|
53 |
+
|
54 |
+
def forward(self, y, upconv4):
|
55 |
+
refine = torch.cat([y.permute(0,3,1,2), upconv4], dim=1)
|
56 |
+
refine = self.last_conv(refine)
|
57 |
+
|
58 |
+
aspp1 = self.aspp1(refine)
|
59 |
+
aspp2 = self.aspp2(refine)
|
60 |
+
aspp3 = self.aspp3(refine)
|
61 |
+
aspp4 = self.aspp4(refine)
|
62 |
+
|
63 |
+
#out = torch.add([aspp1, aspp2, aspp3, aspp4], dim=1)
|
64 |
+
out = aspp1 + aspp2 + aspp3 + aspp4
|
65 |
+
return out.permute(0, 2, 3, 1) # , refine.permute(0,2,3,1)
|
seg.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dino.predict import predict_img,mask_to_image
|
2 |
+
import torch
|
3 |
+
from PIL import Image
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
|
7 |
+
# μ‘μ₯λΆλΆμ crop νκΈ° μν μ½λ
|
8 |
+
# dinov2 λͺ¨λΈμ segmentation λͺ¨λΈλ‘ νμ©νμκ³ μ΄λ₯Ό train νμ¬μ pretrained weightλ₯Ό weight ν΄λμ μ μ₯νκ³ μλμ κ°μ΄ loadν©λλ€.
|
9 |
+
|
10 |
+
def segmentation(img):
|
11 |
+
device=torch.device("cuda")
|
12 |
+
net=torch.load("weights/dinov2_model.pth")
|
13 |
+
|
14 |
+
mask_values=[[0, 0, 0], [255, 255, 255]]
|
15 |
+
|
16 |
+
mask=predict_img(net,img,device,scale_factor=1,out_threshold=0.5)
|
17 |
+
result = mask_to_image(mask, mask_values)
|
18 |
+
result=np.array(result)
|
19 |
+
|
20 |
+
|
21 |
+
return result
|
22 |
+
|
23 |
+
|
24 |
+
# def calculate_white_area_percentage(mask):
|
25 |
+
# total_pixels = mask.size
|
26 |
+
# print(total_pixels)
|
27 |
+
|
28 |
+
# white_pixels = np.sum(np.all(mask >100, axis=-1))
|
29 |
+
|
30 |
+
# white_area_percentage = (white_pixels / total_pixels) * 100
|
31 |
+
|
32 |
+
# return white_area_percentage*3
|
33 |
+
|
34 |
+
|
35 |
+
# μ segmentation μ ν΅ν΄μ crop λ λΆλΆμ΄ μ΄λ―Έμ§λ΄μμ λͺνλ‘ μ°¨μ§νλμ§ κ³μ°ν©λλ€.
|
36 |
+
# μλ μ½λλ νμμ ν½μ
μ΄ μ°μμ μΌλ‘ μ΄μ΄μ Έμ λ§λ€μ΄μ§ λ©μ΄λ¦¬κ° μ 체μμ λͺνλ‘ μ°¨μ§νλμ§ κ³μ°ν©λλ€.
|
37 |
+
# μλ μ½λλ λ©μ΄λ¦¬(μ‘μ₯μΌλ‘ μΆμ ) λ€μ΄ 2κ° μ΄μμ΄μ΄λ μ μ©ν μ μμ΅λλ€.
|
38 |
+
def mask_percentage(mask_path):
|
39 |
+
|
40 |
+
image = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
|
41 |
+
|
42 |
+
ret, threshold = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
|
43 |
+
|
44 |
+
contours, hierarchy = cv2.findContours(threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
45 |
+
total_area = image.shape[0] * image.shape[1]
|
46 |
+
contours_list=contours
|
47 |
+
|
48 |
+
contour_areas = [cv2.contourArea(contour) for contour in contours]
|
49 |
+
|
50 |
+
|
51 |
+
percentages = [(area / total_area) * 100 for area in contour_areas]
|
52 |
+
percentage_list=[]
|
53 |
+
for i, percentage in enumerate(percentages):
|
54 |
+
percentage_list.append(percentage)
|
55 |
+
return contours_list,percentage_list
|
56 |
+
|
57 |
+
|
58 |
+
|
sr/__pycache__/sr.cpython-310.pyc
ADDED
Binary file (606 Bytes). View file
|
|
sr/esrgan
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Subproject commit 362a0316878f41dbdfbb23657b450c3353de5acf
|
sr/sr.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from PIL import Image
|
3 |
+
import numpy as np
|
4 |
+
from .esrgan.RealESRGAN import RealESRGAN
|
5 |
+
def sr(img):
|
6 |
+
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
7 |
+
device=torch.device("cuda")
|
8 |
+
model = RealESRGAN(device, scale=2)
|
9 |
+
model.load_weights('weights/RealESRGAN_x2.pth', download=True)
|
10 |
+
|
11 |
+
|
12 |
+
img=Image.fromarray(img)
|
13 |
+
sr_image = model.predict(img)
|
14 |
+
sr_image=np.array(sr_image)
|
15 |
+
return sr_image
|
temporal_mask/mask.png
ADDED
![]() |
text_area/new_1.png
ADDED
![]() |
text_area/new_10.png
ADDED
![]() |
text_area/new_11.png
ADDED
![]() |
text_area/new_12.png
ADDED
![]() |
text_area/new_13.png
ADDED
![]() |
text_area/new_14.png
ADDED
![]() |
text_area/new_15.png
ADDED
![]() |
text_area/new_16.png
ADDED
![]() |