Spaces:
Build error
Build error
Commit
·
35dce23
1
Parent(s):
d65f544
add dolphin
Browse files- model.py +51 -1
- requirements.txt +1 -1
model.py
CHANGED
@@ -164,7 +164,11 @@ def get_pretrained_model(
|
|
164 |
decoding_method: str,
|
165 |
num_active_paths: int,
|
166 |
) -> Union[sherpa.OfflineRecognizer, sherpa.OnlineRecognizer]:
|
167 |
-
if repo_id in
|
|
|
|
|
|
|
|
|
168 |
return chinese_models[repo_id](
|
169 |
repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
|
170 |
)
|
@@ -921,6 +925,43 @@ def _get_alimeeting_pre_trained_model(
|
|
921 |
return recognizer
|
922 |
|
923 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
924 |
@lru_cache(maxsize=10)
|
925 |
def _get_wenet_model(
|
926 |
repo_id: str,
|
@@ -1780,6 +1821,13 @@ english_models = {
|
|
1780 |
"csukuangfj/wenet-english-model": _get_wenet_model,
|
1781 |
}
|
1782 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1783 |
chinese_english_mixed_models = {
|
1784 |
"csukuangfj/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16": _get_fire_red_asr_models,
|
1785 |
"csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20": _get_streaming_zipformer_pre_trained_model,
|
@@ -1843,6 +1891,7 @@ thai_models = {
|
|
1843 |
|
1844 |
|
1845 |
all_models = {
|
|
|
1846 |
**chinese_models,
|
1847 |
**english_models,
|
1848 |
**chinese_english_mixed_models,
|
@@ -1860,6 +1909,7 @@ all_models = {
|
|
1860 |
}
|
1861 |
|
1862 |
language_to_models = {
|
|
|
1863 |
"超多种中文方言": list(chinese_dialect_models.keys()),
|
1864 |
"Chinese": list(chinese_models.keys()),
|
1865 |
"English": list(english_models.keys()),
|
|
|
164 |
decoding_method: str,
|
165 |
num_active_paths: int,
|
166 |
) -> Union[sherpa.OfflineRecognizer, sherpa.OnlineRecognizer]:
|
167 |
+
if repo_id in multi_lingual_models:
|
168 |
+
return multi_lingual_models[repo_id](
|
169 |
+
repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
|
170 |
+
)
|
171 |
+
elif repo_id in chinese_models:
|
172 |
return chinese_models[repo_id](
|
173 |
repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
|
174 |
)
|
|
|
925 |
return recognizer
|
926 |
|
927 |
|
928 |
+
@lru_cache(maxsize=4)
|
929 |
+
def _get_dolphin_ctc_models(repo_id: str, decoding_method: str, num_active_paths: int):
|
930 |
+
assert repo_id in [
|
931 |
+
"csukuangfj/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02",
|
932 |
+
"csukuangfj/sherpa-onnx-dolphin-small-ctc-multi-lang-int8-2025-04-02",
|
933 |
+
"csukuangfj/sherpa-onnx-dolphin-base-ctc-multi-lang-2025-04-02",
|
934 |
+
"csukuangfj/sherpa-onnx-dolphin-small-ctc-multi-lang-2025-04-02",
|
935 |
+
], repo_id
|
936 |
+
|
937 |
+
if repo_id in [
|
938 |
+
"csukuangfj/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02",
|
939 |
+
"csukuangfj/sherpa-onnx-dolphin-small-ctc-multi-lang-int8-2025-04-02",
|
940 |
+
]:
|
941 |
+
use_int8 = True
|
942 |
+
else:
|
943 |
+
use_int8 = False
|
944 |
+
|
945 |
+
nn_model = _get_nn_model_filename(
|
946 |
+
repo_id=repo_id,
|
947 |
+
filename="model.int8.onnx" if use_int8 else "model.onnx",
|
948 |
+
subfolder=".",
|
949 |
+
)
|
950 |
+
tokens = _get_token_filename(
|
951 |
+
repo_id=repo_id,
|
952 |
+
filename="tokens.txt",
|
953 |
+
subfolder=".",
|
954 |
+
)
|
955 |
+
|
956 |
+
recognizer = sherpa_onnx.OfflineRecognizer.from_dolphin_ctc(
|
957 |
+
tokens=tokens,
|
958 |
+
model=nn_model,
|
959 |
+
num_threads=2,
|
960 |
+
)
|
961 |
+
|
962 |
+
return recognizer
|
963 |
+
|
964 |
+
|
965 |
@lru_cache(maxsize=10)
|
966 |
def _get_wenet_model(
|
967 |
repo_id: str,
|
|
|
1821 |
"csukuangfj/wenet-english-model": _get_wenet_model,
|
1822 |
}
|
1823 |
|
1824 |
+
multi_lingual_models = {
|
1825 |
+
"csukuangfj/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02": _get_dolphin_ctc_models,
|
1826 |
+
"csukuangfj/sherpa-onnx-dolphin-small-ctc-multi-lang-int8-2025-04-02": _get_dolphin_ctc_models,
|
1827 |
+
"csukuangfj/sherpa-onnx-dolphin-base-ctc-multi-lang-2025-04-02": _get_dolphin_ctc_models,
|
1828 |
+
"csukuangfj/sherpa-onnx-dolphin-small-ctc-multi-lang-2025-04-02": _get_dolphin_ctc_models,
|
1829 |
+
}
|
1830 |
+
|
1831 |
chinese_english_mixed_models = {
|
1832 |
"csukuangfj/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16": _get_fire_red_asr_models,
|
1833 |
"csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20": _get_streaming_zipformer_pre_trained_model,
|
|
|
1891 |
|
1892 |
|
1893 |
all_models = {
|
1894 |
+
**multi_lingual_models,
|
1895 |
**chinese_models,
|
1896 |
**english_models,
|
1897 |
**chinese_english_mixed_models,
|
|
|
1909 |
}
|
1910 |
|
1911 |
language_to_models = {
|
1912 |
+
"Multi-lingual (east aisa)": list(multi_lingual_models.keys()),
|
1913 |
"超多种中文方言": list(chinese_dialect_models.keys()),
|
1914 |
"Chinese": list(chinese_models.keys()),
|
1915 |
"English": list(english_models.keys()),
|
requirements.txt
CHANGED
@@ -10,6 +10,6 @@ numpy
|
|
10 |
|
11 |
huggingface_hub
|
12 |
|
13 |
-
https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/cpu/1.
|
14 |
|
15 |
#sherpa-onnx>=1.10.36
|
|
|
10 |
|
11 |
huggingface_hub
|
12 |
|
13 |
+
https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/cpu/1.11.2/sherpa_onnx-1.11.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
14 |
|
15 |
#sherpa-onnx>=1.10.36
|