Bofeng Huang
commited on
Update README
Browse files
README.md
CHANGED
@@ -18,7 +18,7 @@ widget:
|
|
18 |
pipeline_tag: automatic-speech-recognition
|
19 |
---
|
20 |
|
21 |
-
# Whisper-Large-V3-Distil-Multi4
|
22 |
|
23 |
A multilingual distilled Whisper model with 2 decoder layers, supporting 4 European languages: English, French, Spanish, and German.
|
24 |
|
@@ -46,7 +46,7 @@ device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
46 |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
47 |
|
48 |
# Load model
|
49 |
-
model_name_or_path = "bofenghuang/whisper-large-v3-distil-multi4"
|
50 |
processor = AutoProcessor.from_pretrained(model_name_or_path)
|
51 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name_or_path, torch_dtype=torch_dtype)
|
52 |
model.to(device)
|
@@ -101,8 +101,8 @@ print(transcription)
|
|
101 |
| openai/whisper-large-v3-turbo | 10.20 | 11.74 | 11.78 | 6.13 | 2.95 | 1.98 | 3.94 | 10.11 |
|
102 |
| distil-whisper/distil-large-v3 | 8.93 | 12.41 | 7.72 | 7.59 | 3.25 | 2.42 | 5.11 | 10.08 |
|
103 |
| distil-whisper/distil-large-v3.5 | 8.65 | 11.07 | 7.54 | 6.74 | 2.86 | 2.28 | 4.94 | 9.84 |
|
104 |
-
| bofenghuang/whisper-large-v3-distil-multi4 | 8.88 | 11.33 | 7.60 | 6.97 | 3.03 | 2.51 | 5.24 | 10.12 |
|
105 |
-
| bofenghuang/whisper-large-v3-distil-multi7 | 9.36 | 11.32 | 7.65 | 7.02 | 2.99 | 2.46 | 5.24 | 10.06 |
|
106 |
|
107 |
### French
|
108 |
|
@@ -111,8 +111,8 @@ print(transcription)
|
|
111 |
| openai/whisper-large-v3 | 10.98 | 4.69 | 11.15 | 8.67 | 7.51 | 5.4 | 9.87 | 8.97 | 9 | 8.01 |
|
112 |
| openai/whisper-large-v3-turbo | 12.41 | 5.1 | 12.21 | 9.87 | 8.37 | 5.48 | 10.12 | 9 | 8.49 | 8.39 |
|
113 |
| bofenghuang/whisper_large_v3_distil_fr_v0.2 | 11.1 | 5 | 10.68 | 8.75 | 7.09 | 6.35 | 9.44 | 9.84 | 8.94 | 8.93 |
|
114 |
-
| bofenghuang/whisper-large-v3-distil-multi4 | 11.96 | 6.04 | 11.07 | 9.16 | 7.99 | 7.10 | 10.42 | 12.61 | 9.06 | 11.75 |
|
115 |
-
| bofenghuang/whisper-large-v3-distil-multi7 | 12.19 | 6.2 | 11.29 | 9.13 | 8.26 | 7.17 | 10.04 | 12.26 | 8.93 | 11.56 |
|
116 |
|
117 |
### Spanish
|
118 |
|
@@ -120,8 +120,8 @@ print(transcription)
|
|
120 |
| ------------------------------------------ | ----- | ---- | --------- | ----- | ------ | ------------------- | ---------------------- | ------------- | ---------------- |
|
121 |
| openai/whisper-large-v3 | 4.91 | 3.97 | 11.06 | 6.52 | 4.22 | 10.85 | 10.36 | 5.90 | 5.22 |
|
122 |
| openai/whisper-large-v3-turbo | 5.74 | 4.41 | 16.02 | 6.66 | 4.59 | 11.55 | 10.68 | 6.46 | 5.41 |
|
123 |
-
| bofenghuang/whisper-large-v3-distil-multi4 | 5.58 | 4.34 | 8.52 | 7.43 | 5.20 | 11.26 | 13.43 | 5.69 | 8.95 |
|
124 |
-
| bofenghuang/whisper-large-v3-distil-multi7 | 5.70 | 4.35 | 8.55 | 7.56 | 5.15 | 11.45 | 13.54 | 5.84 | 8.27 |
|
125 |
|
126 |
### German
|
127 |
|
@@ -129,5 +129,5 @@ print(transcription)
|
|
129 |
| ------------------------------------------ | ----- | ---- | --------- | ----- | ------ | ------------------- | ---------------------- | ------------- | ---------------- |
|
130 |
| openai/whisper-large-v3 | 6.11 | 5.60 | 17.75 | 19.63 | 5.92 | 11.21 | 10.35 | 17.64 | 17.76 |
|
131 |
| openai/whisper-large-v3-turbo | 7.45 | 6.43 | 20.48 | 20.00 | 6.45 | 10.57 | 9.70 | 18.04 | 18.37 |
|
132 |
-
| bofenghuang/whisper-large-v3-distil-multi4 | 7.31 | 6.45 | 12.41 | 21.48 | 8.20 | 11.04 | 13.55 | 19.54 | 21.76 |
|
133 |
-
| bofenghuang/whisper-large-v3-distil-multi7 | 7.57 | 6.67 | 12.42 | 21.95 | 8.28 | 11.21 | 13.84 | 19.90 | 21.67 |
|
|
|
18 |
pipeline_tag: automatic-speech-recognition
|
19 |
---
|
20 |
|
21 |
+
# Whisper-Large-V3-Distil-Multi4-v0.2
|
22 |
|
23 |
A multilingual distilled Whisper model with 2 decoder layers, supporting 4 European languages: English, French, Spanish, and German.
|
24 |
|
|
|
46 |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
47 |
|
48 |
# Load model
|
49 |
+
model_name_or_path = "bofenghuang/whisper-large-v3-distil-multi4-v0.2"
|
50 |
processor = AutoProcessor.from_pretrained(model_name_or_path)
|
51 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name_or_path, torch_dtype=torch_dtype)
|
52 |
model.to(device)
|
|
|
101 |
| openai/whisper-large-v3-turbo | 10.20 | 11.74 | 11.78 | 6.13 | 2.95 | 1.98 | 3.94 | 10.11 |
|
102 |
| distil-whisper/distil-large-v3 | 8.93 | 12.41 | 7.72 | 7.59 | 3.25 | 2.42 | 5.11 | 10.08 |
|
103 |
| distil-whisper/distil-large-v3.5 | 8.65 | 11.07 | 7.54 | 6.74 | 2.86 | 2.28 | 4.94 | 9.84 |
|
104 |
+
| bofenghuang/whisper-large-v3-distil-multi4-v0.2 | 8.88 | 11.33 | 7.60 | 6.97 | 3.03 | 2.51 | 5.24 | 10.12 |
|
105 |
+
| bofenghuang/whisper-large-v3-distil-multi7-v0.2 | 9.36 | 11.32 | 7.65 | 7.02 | 2.99 | 2.46 | 5.24 | 10.06 |
|
106 |
|
107 |
### French
|
108 |
|
|
|
111 |
| openai/whisper-large-v3 | 10.98 | 4.69 | 11.15 | 8.67 | 7.51 | 5.4 | 9.87 | 8.97 | 9 | 8.01 |
|
112 |
| openai/whisper-large-v3-turbo | 12.41 | 5.1 | 12.21 | 9.87 | 8.37 | 5.48 | 10.12 | 9 | 8.49 | 8.39 |
|
113 |
| bofenghuang/whisper_large_v3_distil_fr_v0.2 | 11.1 | 5 | 10.68 | 8.75 | 7.09 | 6.35 | 9.44 | 9.84 | 8.94 | 8.93 |
|
114 |
+
| bofenghuang/whisper-large-v3-distil-multi4-v0.2 | 11.96 | 6.04 | 11.07 | 9.16 | 7.99 | 7.10 | 10.42 | 12.61 | 9.06 | 11.75 |
|
115 |
+
| bofenghuang/whisper-large-v3-distil-multi7-v0.2 | 12.19 | 6.2 | 11.29 | 9.13 | 8.26 | 7.17 | 10.04 | 12.26 | 8.93 | 11.56 |
|
116 |
|
117 |
### Spanish
|
118 |
|
|
|
120 |
| ------------------------------------------ | ----- | ---- | --------- | ----- | ------ | ------------------- | ---------------------- | ------------- | ---------------- |
|
121 |
| openai/whisper-large-v3 | 4.91 | 3.97 | 11.06 | 6.52 | 4.22 | 10.85 | 10.36 | 5.90 | 5.22 |
|
122 |
| openai/whisper-large-v3-turbo | 5.74 | 4.41 | 16.02 | 6.66 | 4.59 | 11.55 | 10.68 | 6.46 | 5.41 |
|
123 |
+
| bofenghuang/whisper-large-v3-distil-multi4-v0.2 | 5.58 | 4.34 | 8.52 | 7.43 | 5.20 | 11.26 | 13.43 | 5.69 | 8.95 |
|
124 |
+
| bofenghuang/whisper-large-v3-distil-multi7-v0.2 | 5.70 | 4.35 | 8.55 | 7.56 | 5.15 | 11.45 | 13.54 | 5.84 | 8.27 |
|
125 |
|
126 |
### German
|
127 |
|
|
|
129 |
| ------------------------------------------ | ----- | ---- | --------- | ----- | ------ | ------------------- | ---------------------- | ------------- | ---------------- |
|
130 |
| openai/whisper-large-v3 | 6.11 | 5.60 | 17.75 | 19.63 | 5.92 | 11.21 | 10.35 | 17.64 | 17.76 |
|
131 |
| openai/whisper-large-v3-turbo | 7.45 | 6.43 | 20.48 | 20.00 | 6.45 | 10.57 | 9.70 | 18.04 | 18.37 |
|
132 |
+
| bofenghuang/whisper-large-v3-distil-multi4-v0.2 | 7.31 | 6.45 | 12.41 | 21.48 | 8.20 | 11.04 | 13.55 | 19.54 | 21.76 |
|
133 |
+
| bofenghuang/whisper-large-v3-distil-multi7-v0.2 | 7.57 | 6.67 | 12.42 | 21.95 | 8.28 | 11.21 | 13.84 | 19.90 | 21.67 |
|