ArneBinder commited on
Commit
b0fe481
·
verified ·
1 Parent(s): 302f1cf

prepare for https://github.com/ArneBinder/pie-document-level/pull/312

Browse files
Files changed (1) hide show
  1. rendering_utils_displacy.py +0 -217
rendering_utils_displacy.py DELETED
@@ -1,217 +0,0 @@
1
- # This code is mainly taken from
2
- # https://github.com/explosion/spaCy/blob/master/spacy/displacy/templates.py, and from
3
- # https://github.com/explosion/spaCy/blob/master/spacy/displacy/render.py.
4
-
5
- # Setting explicit height and max-width: none on the SVG is required for
6
- # Jupyter to render it properly in a cell
7
-
8
- TPL_DEP_SVG = """
9
- <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="{lang}" id="{id}" class="displacy" width="{width}" height="{height}" direction="{dir}" style="max-width: none; height: {height}px; color: {color}; background: {bg}; font-family: {font}; direction: {dir}">{content}</svg>
10
- """
11
-
12
-
13
- TPL_DEP_WORDS = """
14
- <text class="displacy-token" fill="currentColor" text-anchor="middle" y="{y}">
15
- <tspan class="displacy-word" fill="currentColor" x="{x}">{text}</tspan>
16
- <tspan class="displacy-tag" dy="2em" fill="currentColor" x="{x}">{tag}</tspan>
17
- </text>
18
- """
19
-
20
-
21
- TPL_DEP_WORDS_LEMMA = """
22
- <text class="displacy-token" fill="currentColor" text-anchor="middle" y="{y}">
23
- <tspan class="displacy-word" fill="currentColor" x="{x}">{text}</tspan>
24
- <tspan class="displacy-lemma" dy="2em" fill="currentColor" x="{x}">{lemma}</tspan>
25
- <tspan class="displacy-tag" dy="2em" fill="currentColor" x="{x}">{tag}</tspan>
26
- </text>
27
- """
28
-
29
-
30
- TPL_DEP_ARCS = """
31
- <g class="displacy-arrow">
32
- <path class="displacy-arc" id="arrow-{id}-{i}" stroke-width="{stroke}px" d="{arc}" fill="none" stroke="currentColor"/>
33
- <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
34
- <textPath xlink:href="#arrow-{id}-{i}" class="displacy-label" startOffset="50%" side="{label_side}" fill="currentColor" text-anchor="middle">{label}</textPath>
35
- </text>
36
- <path class="displacy-arrowhead" d="{head}" fill="currentColor"/>
37
- </g>
38
- """
39
-
40
-
41
- TPL_FIGURE = """
42
- <figure style="margin-bottom: 6rem">{content}</figure>
43
- """
44
-
45
- TPL_TITLE = """
46
- <h2 style="margin: 0">{title}</h2>
47
- """
48
-
49
-
50
- TPL_ENTS = """
51
- <div class="entities" style="line-height: 2.5; direction: {dir}">{content}</div>
52
- """
53
-
54
-
55
- TPL_ENT = """
56
- <mark class="entity" style="background: {bg}; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
57
- {text}
58
- <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem">{label}</span>
59
- </mark>
60
- """
61
-
62
- TPL_ENT_RTL = """
63
- <mark class="entity" style="background: {bg}; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em">
64
- {text}
65
- <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-right: 0.5rem">{label}</span>
66
- </mark>
67
- """
68
-
69
-
70
- TPL_PAGE = """
71
- <!DOCTYPE html>
72
- <html lang="{lang}">
73
- <head>
74
- <title>displaCy</title>
75
- </head>
76
-
77
- <body style="font-size: 16px; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; padding: 4rem 2rem; direction: {dir}">{content}</body>
78
- </html>
79
- """
80
-
81
-
82
- DEFAULT_LANG = "en"
83
- DEFAULT_DIR = "ltr"
84
-
85
-
86
- def minify_html(html):
87
- """Perform a template-specific, rudimentary HTML minification for displaCy.
88
- Disclaimer: NOT a general-purpose solution, only removes indentation and
89
- newlines.
90
-
91
- html (unicode): Markup to minify.
92
- RETURNS (unicode): "Minified" HTML.
93
- """
94
- return html.strip().replace(" ", "").replace("\n", "")
95
-
96
-
97
- def escape_html(text):
98
- """Replace <, >, &, " with their HTML encoded representation. Intended to prevent HTML errors
99
- in rendered displaCy markup.
100
-
101
- text (unicode): The original text. RETURNS (unicode): Equivalent text to be safely used within
102
- HTML.
103
- """
104
- text = text.replace("&", "&amp;")
105
- text = text.replace("<", "&lt;")
106
- text = text.replace(">", "&gt;")
107
- text = text.replace('"', "&quot;")
108
- return text
109
-
110
-
111
- class EntityRenderer(object):
112
- """Render named entities as HTML."""
113
-
114
- style = "ent"
115
-
116
- def __init__(self, options={}):
117
- """Initialise dependency renderer.
118
-
119
- options (dict): Visualiser-specific options (colors, ents)
120
- """
121
- colors = {
122
- "ORG": "#7aecec",
123
- "PRODUCT": "#bfeeb7",
124
- "GPE": "#feca74",
125
- "LOC": "#ff9561",
126
- "PERSON": "#aa9cfc",
127
- "NORP": "#c887fb",
128
- "FACILITY": "#9cc9cc",
129
- "EVENT": "#ffeb80",
130
- "LAW": "#ff8197",
131
- "LANGUAGE": "#ff8197",
132
- "WORK_OF_ART": "#f0d0ff",
133
- "DATE": "#bfe1d9",
134
- "TIME": "#bfe1d9",
135
- "MONEY": "#e4e7d2",
136
- "QUANTITY": "#e4e7d2",
137
- "ORDINAL": "#e4e7d2",
138
- "CARDINAL": "#e4e7d2",
139
- "PERCENT": "#e4e7d2",
140
- }
141
- # user_colors = registry.displacy_colors.get_all()
142
- # for user_color in user_colors.values():
143
- # colors.update(user_color)
144
- colors.update(options.get("colors", {}))
145
- self.default_color = "#ddd"
146
- self.colors = colors
147
- self.ents = options.get("ents", None)
148
- self.direction = DEFAULT_DIR
149
- self.lang = DEFAULT_LANG
150
-
151
- template = options.get("template")
152
- if template:
153
- self.ent_template = template
154
- else:
155
- if self.direction == "rtl":
156
- self.ent_template = TPL_ENT_RTL
157
- else:
158
- self.ent_template = TPL_ENT
159
-
160
- def render(self, parsed, page=False, minify=False):
161
- """Render complete markup.
162
-
163
- parsed (list): Dependency parses to render. page (bool): Render parses wrapped as full HTML
164
- page. minify (bool): Minify HTML markup. RETURNS (unicode): Rendered HTML markup.
165
- """
166
- rendered = []
167
- for i, p in enumerate(parsed):
168
- if i == 0:
169
- settings = p.get("settings", {})
170
- self.direction = settings.get("direction", DEFAULT_DIR)
171
- self.lang = settings.get("lang", DEFAULT_LANG)
172
- rendered.append(self.render_ents(p["text"], p["ents"], p.get("title")))
173
- if page:
174
- docs = "".join([TPL_FIGURE.format(content=doc) for doc in rendered])
175
- markup = TPL_PAGE.format(content=docs, lang=self.lang, dir=self.direction)
176
- else:
177
- markup = "".join(rendered)
178
- if minify:
179
- return minify_html(markup)
180
- return markup
181
-
182
- def render_ents(self, text, spans, title):
183
- """Render entities in text.
184
-
185
- text (unicode): Original text. spans (list): Individual entity spans and their start, end
186
- and label. title (unicode or None): Document title set in Doc.user_data['title'].
187
- """
188
- markup = ""
189
- offset = 0
190
- for span in spans:
191
- label = span["label"]
192
- start = span["start"]
193
- end = span["end"]
194
- additional_params = span.get("params", {})
195
- entity = escape_html(text[start:end])
196
- fragments = text[offset:start].split("\n")
197
- for i, fragment in enumerate(fragments):
198
- markup += escape_html(fragment)
199
- if len(fragments) > 1 and i != len(fragments) - 1:
200
- markup += "<br/>"
201
- if self.ents is None or label.upper() in self.ents:
202
- color = self.colors.get(label.upper(), self.default_color)
203
- ent_settings = {"label": label, "text": entity, "bg": color}
204
- ent_settings.update(additional_params)
205
- markup += self.ent_template.format(**ent_settings)
206
- else:
207
- markup += entity
208
- offset = end
209
- fragments = text[offset:].split("\n")
210
- for i, fragment in enumerate(fragments):
211
- markup += escape_html(fragment)
212
- if len(fragments) > 1 and i != len(fragments) - 1:
213
- markup += "<br/>"
214
- markup = TPL_ENTS.format(content=markup, dir=self.direction)
215
- if title:
216
- markup = TPL_TITLE.format(title=title) + markup
217
- return markup