Lazy-Val commited on
Commit
1501920
·
verified ·
1 Parent(s): 9a13c7f

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -26,8 +26,8 @@ model-index:
26
  | **Name** | `de_trf_nrp` |
27
  | **Version** | `0.0.0` |
28
  | **spaCy** | `>=3.8.3,<3.9.0` |
29
- | **Default Pipeline** | `ner_transformer`, `ner`, `merge_entities`, `base_transformer`, `morphologizer`, `tagger`, `parser`, `trainable_lemmatizer` |
30
- | **Components** | `ner_transformer`, `ner`, `merge_entities`, `base_transformer`, `morphologizer`, `tagger`, `parser`, `trainable_lemmatizer` |
31
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
32
  | **Sources** | n/a |
33
  | **License** | n/a |
 
26
  | **Name** | `de_trf_nrp` |
27
  | **Version** | `0.0.0` |
28
  | **spaCy** | `>=3.8.3,<3.9.0` |
29
+ | **Default Pipeline** | `ner_transformer`, `ner`, `base_transformer`, `morphologizer`, `tagger`, `parser`, `trainable_lemmatizer` |
30
+ | **Components** | `ner_transformer`, `ner`, `base_transformer`, `morphologizer`, `tagger`, `parser`, `trainable_lemmatizer` |
31
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
32
  | **Sources** | n/a |
33
  | **License** | n/a |
__pycache__/use_custom_tokenizer.cpython-312.pyc ADDED
Binary file (1.04 kB). View file
 
base_transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c7c7401f3ae369d31fccbbc168d7139b9dab09437344c82baa2778b0482d871
3
  size 440759145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2b982402ec283f46b2fdfcac055537916109a1782b6b35369bfded04fcbdee4
3
  size 440759145
config.cfg CHANGED
@@ -10,7 +10,7 @@ seed = 17
10
 
11
  [nlp]
12
  lang = "de"
13
- pipeline = ["ner_transformer","ner","merge_entities","base_transformer","morphologizer","tagger","parser","trainable_lemmatizer"]
14
  batch_size = 512
15
  disabled = []
16
  before_creation = null
@@ -43,9 +43,6 @@ use_fast = true
43
 
44
  [components.base_transformer.model.transformer_config]
45
 
46
- [components.merge_entities]
47
- factory = "merge_entities"
48
-
49
  [components.morphologizer]
50
  factory = "morphologizer"
51
  extend = false
 
10
 
11
  [nlp]
12
  lang = "de"
13
+ pipeline = ["ner_transformer","ner","base_transformer","morphologizer","tagger","parser","trainable_lemmatizer"]
14
  batch_size = 512
15
  disabled = []
16
  before_creation = null
 
43
 
44
  [components.base_transformer.model.transformer_config]
45
 
 
 
 
46
  [components.morphologizer]
47
  factory = "morphologizer"
48
  extend = false
de_trf_nrp-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19d025131312ebc27af7588726406e6d54c697a0c050e2c761a776aab105c256
3
- size 865701680
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3ceaf71530479286554f5f67226794741a6a306a84011daa39a2fa9c87709a2
3
+ size 865702938
meta.json CHANGED
@@ -1830,7 +1830,6 @@
1830
  "pipeline":[
1831
  "ner_transformer",
1832
  "ner",
1833
- "merge_entities",
1834
  "base_transformer",
1835
  "morphologizer",
1836
  "tagger",
@@ -1840,7 +1839,6 @@
1840
  "components":[
1841
  "ner_transformer",
1842
  "ner",
1843
- "merge_entities",
1844
  "base_transformer",
1845
  "morphologizer",
1846
  "tagger",
 
1830
  "pipeline":[
1831
  "ner_transformer",
1832
  "ner",
 
1833
  "base_transformer",
1834
  "morphologizer",
1835
  "tagger",
 
1839
  "components":[
1840
  "ner_transformer",
1841
  "ner",
 
1842
  "base_transformer",
1843
  "morphologizer",
1844
  "tagger",
ner_transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dada82ed46518721927ad87b0662e3602c24c5642e62a10a87400fa9383855d8
3
  size 440759145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83cbeeb733016f569fda192d17aef837b9870753c705785de7e4f3b55822f941
3
  size 440759145
use_custom_tokenizer.py CHANGED
@@ -1,13 +1,12 @@
1
  from spacy.util import registry
2
-
3
- from commercial_registry_ner.spacy.custom_tokenizer.custom_tokenizer import (
4
- custom_tokenizer,
5
- )
6
-
7
 
8
  @registry.tokenizers("customize_tokenizer")
9
  def make_customize_tokenizer():
10
  def customize_tokenizer(nlp):
11
- return custom_tokenizer(nlp)
 
 
12
 
13
  return customize_tokenizer
 
1
  from spacy.util import registry
2
+ from spacy.tokenizer import Tokenizer
3
+ import pathlib
 
 
 
4
 
5
  @registry.tokenizers("customize_tokenizer")
6
  def make_customize_tokenizer():
7
  def customize_tokenizer(nlp):
8
+ tokenizer = Tokenizer(nlp.vocab)
9
+ script_dir = pathlib.Path(__file__).parent.resolve()
10
+ return tokenizer.from_disk(script_dir / "tokenizer")
11
 
12
  return customize_tokenizer