Update spaCy pipeline
Browse files- README.md +2 -2
- __pycache__/use_custom_tokenizer.cpython-312.pyc +0 -0
- base_transformer/model +1 -1
- config.cfg +1 -4
- de_trf_nrp-any-py3-none-any.whl +2 -2
- meta.json +0 -2
- ner_transformer/model +1 -1
- use_custom_tokenizer.py +5 -6
README.md
CHANGED
@@ -26,8 +26,8 @@ model-index:
|
|
26 |
| **Name** | `de_trf_nrp` |
|
27 |
| **Version** | `0.0.0` |
|
28 |
| **spaCy** | `>=3.8.3,<3.9.0` |
|
29 |
-
| **Default Pipeline** | `ner_transformer`, `ner`, `
|
30 |
-
| **Components** | `ner_transformer`, `ner`, `
|
31 |
| **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
|
32 |
| **Sources** | n/a |
|
33 |
| **License** | n/a |
|
|
|
26 |
| **Name** | `de_trf_nrp` |
|
27 |
| **Version** | `0.0.0` |
|
28 |
| **spaCy** | `>=3.8.3,<3.9.0` |
|
29 |
+
| **Default Pipeline** | `ner_transformer`, `ner`, `base_transformer`, `morphologizer`, `tagger`, `parser`, `trainable_lemmatizer` |
|
30 |
+
| **Components** | `ner_transformer`, `ner`, `base_transformer`, `morphologizer`, `tagger`, `parser`, `trainable_lemmatizer` |
|
31 |
| **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
|
32 |
| **Sources** | n/a |
|
33 |
| **License** | n/a |
|
__pycache__/use_custom_tokenizer.cpython-312.pyc
ADDED
Binary file (1.04 kB). View file
|
|
base_transformer/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 440759145
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2b982402ec283f46b2fdfcac055537916109a1782b6b35369bfded04fcbdee4
|
3 |
size 440759145
|
config.cfg
CHANGED
@@ -10,7 +10,7 @@ seed = 17
|
|
10 |
|
11 |
[nlp]
|
12 |
lang = "de"
|
13 |
-
pipeline = ["ner_transformer","ner","
|
14 |
batch_size = 512
|
15 |
disabled = []
|
16 |
before_creation = null
|
@@ -43,9 +43,6 @@ use_fast = true
|
|
43 |
|
44 |
[components.base_transformer.model.transformer_config]
|
45 |
|
46 |
-
[components.merge_entities]
|
47 |
-
factory = "merge_entities"
|
48 |
-
|
49 |
[components.morphologizer]
|
50 |
factory = "morphologizer"
|
51 |
extend = false
|
|
|
10 |
|
11 |
[nlp]
|
12 |
lang = "de"
|
13 |
+
pipeline = ["ner_transformer","ner","base_transformer","morphologizer","tagger","parser","trainable_lemmatizer"]
|
14 |
batch_size = 512
|
15 |
disabled = []
|
16 |
before_creation = null
|
|
|
43 |
|
44 |
[components.base_transformer.model.transformer_config]
|
45 |
|
|
|
|
|
|
|
46 |
[components.morphologizer]
|
47 |
factory = "morphologizer"
|
48 |
extend = false
|
de_trf_nrp-any-py3-none-any.whl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3ceaf71530479286554f5f67226794741a6a306a84011daa39a2fa9c87709a2
|
3 |
+
size 865702938
|
meta.json
CHANGED
@@ -1830,7 +1830,6 @@
|
|
1830 |
"pipeline":[
|
1831 |
"ner_transformer",
|
1832 |
"ner",
|
1833 |
-
"merge_entities",
|
1834 |
"base_transformer",
|
1835 |
"morphologizer",
|
1836 |
"tagger",
|
@@ -1840,7 +1839,6 @@
|
|
1840 |
"components":[
|
1841 |
"ner_transformer",
|
1842 |
"ner",
|
1843 |
-
"merge_entities",
|
1844 |
"base_transformer",
|
1845 |
"morphologizer",
|
1846 |
"tagger",
|
|
|
1830 |
"pipeline":[
|
1831 |
"ner_transformer",
|
1832 |
"ner",
|
|
|
1833 |
"base_transformer",
|
1834 |
"morphologizer",
|
1835 |
"tagger",
|
|
|
1839 |
"components":[
|
1840 |
"ner_transformer",
|
1841 |
"ner",
|
|
|
1842 |
"base_transformer",
|
1843 |
"morphologizer",
|
1844 |
"tagger",
|
ner_transformer/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 440759145
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83cbeeb733016f569fda192d17aef837b9870753c705785de7e4f3b55822f941
|
3 |
size 440759145
|
use_custom_tokenizer.py
CHANGED
@@ -1,13 +1,12 @@
|
|
1 |
from spacy.util import registry
|
2 |
-
|
3 |
-
|
4 |
-
custom_tokenizer,
|
5 |
-
)
|
6 |
-
|
7 |
|
8 |
@registry.tokenizers("customize_tokenizer")
|
9 |
def make_customize_tokenizer():
|
10 |
def customize_tokenizer(nlp):
|
11 |
-
|
|
|
|
|
12 |
|
13 |
return customize_tokenizer
|
|
|
1 |
from spacy.util import registry
|
2 |
+
from spacy.tokenizer import Tokenizer
|
3 |
+
import pathlib
|
|
|
|
|
|
|
4 |
|
5 |
@registry.tokenizers("customize_tokenizer")
|
6 |
def make_customize_tokenizer():
|
7 |
def customize_tokenizer(nlp):
|
8 |
+
tokenizer = Tokenizer(nlp.vocab)
|
9 |
+
script_dir = pathlib.Path(__file__).parent.resolve()
|
10 |
+
return tokenizer.from_disk(script_dir / "tokenizer")
|
11 |
|
12 |
return customize_tokenizer
|