Safetensors
wav2vec2-bert
indiejoseph commited on
Commit
3652907
·
verified ·
1 Parent(s): 6d58498

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +59 -0
README.md ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ datasets:
4
+ - mozilla-foundation/common_voice_17_0
5
+ base_model:
6
+ - facebook/w2v-bert-2.0
7
+ ---
8
+
9
+ # Fine-Tune Wav2Vec Bert 2.0 for Jyutping Recogition
10
+
11
+ ## Inference
12
+
13
+ Please clone the [repo](https://github.com/hon9kon9ize/wav2vec2bert-jyutping) and follow the instructions to run the inference.
14
+
15
+ ```python
16
+ from model import Wav2Vec2BertForCantonese
17
+ from transformers import Wav2Vec2BertProcessor, SeamlessM4TFeatureExtractor, Wav2Vec2CTCTokenizer
18
+ import librosa
19
+
20
+ model_id = "hon9kon9ize/wav2vec2bert-jyutping"
21
+
22
+ tokenizer = Wav2Vec2CTCTokenizer(
23
+ "vocab.json", unk_token="[UNK]", pad_token="[PAD]", word_delimiter_token="|"
24
+ )
25
+ tone_tokenizer = Wav2Vec2CTCTokenizer(
26
+ "tone_vocab.json",
27
+ unk_token="[UNK]",
28
+ pad_token="[PAD]",
29
+ word_delimiter_token="|",
30
+ )
31
+
32
+ # load processor
33
+ feature_extractor = SeamlessM4TFeatureExtractor.from_pretrained(model_id)
34
+ processor = Wav2Vec2BertProcessor(
35
+ feature_extractor=feature_extractor, tokenizer=tokenizer
36
+ )
37
+
38
+ model = Wav2Vec2BertForCantonese.from_pretrained(
39
+ model_id,
40
+ attention_dropout=0.2,
41
+ hidden_dropout=0.2,
42
+ feat_proj_dropout=0.0,
43
+ mask_time_prob=0.0,
44
+ layerdrop=0.0,
45
+ add_adapter=True,
46
+ ctc_loss_reduction="mean",
47
+ pad_token_id=processor.tokenizer.pad_token_id,
48
+ vocab_size=len(processor.tokenizer),
49
+ ).eval().cuda()
50
+
51
+ test_audio = "test.wav"
52
+
53
+ audio_input, _ = librosa.load(test_audio, sr=16_000)
54
+ input_features = processor(audio_input, return_tensors="pt", sampling_rate=16_000).input_features[0]
55
+
56
+ output = model.inference(input_features=input_features.unsqueeze(0).cuda(), processor=processor, tone_tokenizer=tone_tokenizer)
57
+
58
+ print(output) # maa4 maa1 go3 jiu4 jiu2 jiu4 jiu4 juk6 zeoi3
59
+ ```