ftshijt commited on
Commit
c1ed71e
·
1 Parent(s): 00c66b2

update versa setup with build in profile

Browse files
Files changed (3) hide show
  1. .profile +6 -0
  2. app.py +627 -89
  3. build.sh +58 -0
.profile ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # This file is executed during setup phase of the Hugging Face Space
3
+
4
+ # Execute build.sh to install VERSA and its dependencies
5
+ chmod +x build.sh
6
+ ./build.sh
app.py CHANGED
@@ -11,49 +11,263 @@ import matplotlib.pyplot as plt
11
  import time
12
  from pathlib import Path
13
 
14
- # Check if VERSA is installed, if not, clone and install it
15
  VERSA_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "versa")
 
 
16
 
17
- def setup_versa():
18
- """Set up VERSA and its dependencies if not already installed"""
 
19
  if not os.path.exists(VERSA_ROOT):
20
- print("VERSA not found. Installing...")
21
- # Clone VERSA repository
22
- subprocess.run(
23
- ["git", "clone", "https://github.com/shinjiwlab/versa.git", VERSA_ROOT],
24
- check=True
25
- )
26
-
27
- # Install VERSA
28
- subprocess.run(
29
- ["pip", "install", "-e", VERSA_ROOT],
30
- check=True
31
- )
32
-
33
- print("VERSA installed successfully!")
34
- else:
35
- print("VERSA already installed.")
36
-
37
- # Install VERSA if not already installed
38
- setup_versa()
39
 
40
- # VERSA paths
41
- VERSA_BIN = os.path.join(VERSA_ROOT, "versa", "bin", "scorer.py")
42
- VERSA_CONFIG_DIR = os.path.join(VERSA_ROOT, "egs")
 
 
 
 
43
 
44
  # Create data directory if it doesn't exist
45
  DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
46
  UPLOAD_DIR = os.path.join(DATA_DIR, "uploads")
47
  RESULTS_DIR = os.path.join(DATA_DIR, "results")
 
48
 
49
- for directory in [DATA_DIR, UPLOAD_DIR, RESULTS_DIR]:
50
  os.makedirs(directory, exist_ok=True)
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  # Find available metric configs
53
  def get_available_metrics():
54
  """Get list of available metrics from VERSA config directory"""
55
  metrics = []
56
 
 
 
 
 
57
  # Get all YAML files from the egs directory
58
  for root, _, files in os.walk(VERSA_CONFIG_DIR):
59
  for file in files:
@@ -63,32 +277,217 @@ def get_available_metrics():
63
  rel_path = os.path.relpath(path, VERSA_CONFIG_DIR)
64
  metrics.append(rel_path)
65
 
 
 
 
 
 
 
 
 
66
  return sorted(metrics)
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  # Get metric description from YAML file
69
  def get_metric_description(metric_path):
70
  """Get description of a metric from its YAML file"""
71
- full_path = os.path.join(VERSA_CONFIG_DIR, metric_path)
 
 
 
 
 
 
 
 
 
72
  try:
73
  with open(full_path, 'r') as f:
74
  config = yaml.safe_load(f)
75
- return config.get('description', 'No description available')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  except Exception as e:
77
  return f"Could not load description: {str(e)}"
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  # Process audio files and run VERSA evaluation
80
  def evaluate_audio(gt_file, pred_file, metric_config, include_timestamps=False):
81
  """Evaluate audio files using VERSA"""
 
 
 
82
  if gt_file is None or pred_file is None:
83
- return "Please upload both ground truth and prediction audio files."
 
 
 
 
 
 
 
 
84
 
85
  # Create temp directory for results
86
  with tempfile.TemporaryDirectory() as temp_dir:
87
  output_file = os.path.join(temp_dir, "result.json")
88
 
89
- # Full path to metric config
90
- metric_config_path = os.path.join(VERSA_CONFIG_DIR, metric_config)
91
-
92
  # Build command
93
  cmd = [
94
  sys.executable, VERSA_BIN,
@@ -133,68 +532,207 @@ def create_gradio_demo():
133
  """Create the Gradio demo interface"""
134
  available_metrics = get_available_metrics()
135
  default_metric = "speech.yaml" if "speech.yaml" in available_metrics else available_metrics[0] if available_metrics else None
 
136
 
137
  with gr.Blocks(title="VERSA Speech & Audio Evaluation Demo") as demo:
138
  gr.Markdown("# VERSA: Versatile Evaluation of Speech and Audio")
139
- gr.Markdown("Upload audio files to evaluate them using VERSA metrics.")
140
 
 
141
  with gr.Row():
142
- with gr.Column():
143
- gt_audio = gr.Audio(label="Ground Truth Audio", type="filepath", sources=["upload", "microphone"])
144
- pred_audio = gr.Audio(label="Prediction Audio", type="filepath", sources=["upload", "microphone"])
145
-
146
- metric_dropdown = gr.Dropdown(
147
- choices=available_metrics,
148
- label="Evaluation Metric",
149
- value=default_metric
150
- )
151
-
152
- metric_description = gr.Textbox(
153
- label="Metric Description",
154
- value=get_metric_description(default_metric) if default_metric else "",
155
- interactive=False
156
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
- include_timestamps = gr.Checkbox(
159
- label="Include Timestamps in Results",
160
- value=False
161
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
- eval_button = gr.Button("Evaluate")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
- with gr.Column():
166
- results_table = gr.Dataframe(label="Evaluation Results")
167
- raw_json = gr.Code(language="json", label="Raw Results")
168
-
169
- # Event handlers
170
- def update_metric_description(metric_path):
171
- return get_metric_description(metric_path)
172
-
173
- metric_dropdown.change(
174
- fn=update_metric_description,
175
- inputs=[metric_dropdown],
176
- outputs=[metric_description]
177
- )
178
-
179
- eval_button.click(
180
- fn=evaluate_audio,
181
- inputs=[gt_audio, pred_audio, metric_dropdown, include_timestamps],
182
- outputs=[results_table, raw_json]
183
- )
184
-
185
- gr.Markdown("""
186
- ## About VERSA
187
-
188
- VERSA (Versatile Evaluation of Speech and Audio) is a toolkit dedicated to collecting evaluation metrics in speech and audio quality. It provides a comprehensive connection to cutting-edge evaluation techniques and is tightly integrated with ESPnet.
189
-
190
- With full installation, VERSA offers over 60 metrics with 700+ metric variations based on different configurations. These metrics encompass evaluations utilizing diverse external resources, including matching and non-matching reference audio, text transcriptions, and text captions.
191
-
192
- Learn more at [VERSA GitHub Repository](https://github.com/shinjiwlab/versa)
193
- """)
194
-
195
- return demo
196
-
197
- # Launch the app
198
- if __name__ == "__main__":
199
- demo = create_gradio_demo()
200
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  import time
12
  from pathlib import Path
13
 
14
+ # VERSA paths - these should be set up during the build phase
15
  VERSA_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "versa")
16
+ VERSA_BIN = os.path.join(VERSA_ROOT, "versa", "bin", "scorer.py")
17
+ VERSA_CONFIG_DIR = os.path.join(VERSA_ROOT, "egs")
18
 
19
+ # Check if VERSA is installed
20
+ def check_versa_installation():
21
+ """Check if VERSA is properly installed"""
22
  if not os.path.exists(VERSA_ROOT):
23
+ return False, "VERSA directory not found. The build process may have failed."
24
+
25
+ if not os.path.exists(VERSA_BIN):
26
+ return False, "VERSA binary not found. The installation may be incomplete."
27
+
28
+ if not os.path.exists(VERSA_CONFIG_DIR):
29
+ return False, "VERSA configuration directory not found. The installation may be incomplete."
30
+
31
+ # Check if the .installation_complete file exists (created by build.sh)
32
+ if not os.path.exists(os.path.join(VERSA_ROOT, ".installation_complete")):
33
+ return False, "VERSA installation indicator file not found. The build process may have failed."
34
+
35
+ return True, "VERSA is properly installed."
 
 
 
 
 
 
36
 
37
+ # Check VERSA installation at startup
38
+ versa_installed, versa_status = check_versa_installation()
39
+ if not versa_installed:
40
+ print(f"WARNING: {versa_status}")
41
+ print("The application may not function correctly without VERSA.")
42
+ else:
43
+ print("VERSA installation verified successfully.")
44
 
45
  # Create data directory if it doesn't exist
46
  DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
47
  UPLOAD_DIR = os.path.join(DATA_DIR, "uploads")
48
  RESULTS_DIR = os.path.join(DATA_DIR, "results")
49
+ CONFIG_DIR = os.path.join(DATA_DIR, "configs")
50
 
51
+ for directory in [DATA_DIR, UPLOAD_DIR, RESULTS_DIR, CONFIG_DIR]:
52
  os.makedirs(directory, exist_ok=True)
53
 
54
+ # Save the default universal metrics YAML file
55
+ UNIVERSAL_METRICS_YAML = os.path.join(CONFIG_DIR, "universal_metrics.yaml")
56
+ if not os.path.exists(UNIVERSAL_METRICS_YAML):
57
+ with open(UNIVERSAL_METRICS_YAML, 'w') as f:
58
+ f.write("""# Universal Metrics Configuration for Versa
59
+ # This file contains the configuration for various universal metrics used in speech quality assessment.
60
+
61
+ # visqol metric
62
+ # -- visqol: visual quality of speech
63
+ - name: visqol
64
+ model: default
65
+
66
+ # Word error rate with ESPnet-OWSM model
67
+ # More model_tag can be from the ESPnet huggingface https://huggingface.co/espnet .
68
+ # The default model is `espnet/owsm_v3.1_ebf`.
69
+ # --lid: the nbest language tag
70
+ - name: lid
71
+ model_tag: default
72
+ nbest: 1
73
+
74
+ # nomad (reference-based) metric
75
+ # -- nomad: nomad reference-based model
76
+ - name: nomad
77
+ model_cache: versa_cache/nomad_pt-models
78
+
79
+ # srmr related metrics
80
+ # -- srmr: speech-to-reverberation modulation energy ratio
81
+ - name: srmr
82
+ n_cochlear_filters: 23
83
+ low_freq: 125
84
+ min_cf: 4
85
+ max_cf: 128
86
+ fast: True
87
+ norm: False
88
+
89
+ # Emotion similarity calculated based on emo2vec
90
+ # --emo2vec_similarity: the emotion similarity with emo2vec
91
+ - name: emo2vec_similarity
92
+
93
+ # noresqa related metrics
94
+ # -- noresqa: non-matching reference based speech quality assessment
95
+ - name: noresqa
96
+ metric_type: 1 #0: NORESQA-score, 1: NORESQA-MOS
97
+
98
+ # pysepm related metrics
99
+ # -- pysepm_fwsegsnr: frequency-weighted segmental SNR
100
+ # -- pysepm_llr: Log likelihood ratio
101
+ # -- pysepm_wss: weighted spectral slope
102
+ # -- pysepm_cd: cepstral distance objective speech quality measure
103
+ # -- pysepm_Csig, pysepm_Cbak, pysepm_Covl: composite objective speech quality
104
+ # -- pysepm_csii_high, pysepm_csii_mid, pysepm_csii_low: coherence and speech intelligibility index
105
+ # -- pysepm_ncm: normalized-covariance measure
106
+ - name: pysepm
107
+
108
+ # nisqa score for speech quality assessment
109
+ # -- nisqa_mos_pred: NISQA MOS prediction
110
+ # -- nisqa_noi_pred: NISQA noise prediction
111
+ # -- nisqa_dis_pred: NISQA distortion prediction
112
+ # -- nisqa_col_pred: NISQA color prediction
113
+ # --nisqa_loud_pred: NISQA loudness prediction
114
+ # NOTE(jiatong): pretrain model can be downloaded with `./tools/setup_nisqa.sh`
115
+ - name: nisqa
116
+ nisqa_model_path: ./tools/NISQA/weights/nisqa.tar
117
+
118
+ # discrete speech metrics
119
+ # -- speech_bert: speech bert score
120
+ # -- speech_bleu: speech bleu score
121
+ # -- speech_token_distance: speech token distance score
122
+ - name: discrete_speech
123
+
124
+ # mcd f0 related metrics
125
+ # -- mcd: mel cepstral distortion
126
+ # -- f0_corr: f0 correlation
127
+ # -- f0_rmse: f0 root mean square error
128
+ - name: mcd_f0
129
+ f0min: 40
130
+ f0max: 800
131
+ mcep_shift: 5
132
+ mcep_fftl: 1024
133
+ mcep_dim: 39
134
+ mcep_alpha: 0.466
135
+ seq_mismatch_tolerance: 0.1
136
+ power_threshold: -20
137
+ dtw: false
138
+
139
+ # An overall model on MOS-bench from Sheet toolkit
140
+ # --sheet_ssqa: the mos prediction from sheet_ssqa
141
+ - name: sheet_ssqa
142
+
143
+ # pesq related metrics
144
+ # -- pesq: perceptual evaluation of speech quality
145
+ - name: pesq
146
+
147
+ # stoi related metrics
148
+ # -- stoi: short-time objective intelligibility
149
+ - name: stoi
150
+
151
+ # pseudo subjective metrics
152
+ # -- utmos: UT-MOS score
153
+ # -- dnsmos: DNS-MOS score
154
+ # -- plcmos: PLC-MOS score
155
+ # -- aecmos: AEC-MOS score
156
+ - name: pseudo_mos
157
+ predictor_types: ["utmos", "dnsmos", "plcmos", "singmos", "utmosv2"]
158
+ predictor_args:
159
+ utmos:
160
+ fs: 16000
161
+ dnsmos:
162
+ fs: 16000
163
+ plcmos:
164
+ fs: 16000
165
+ singmos:
166
+ fs: 16000
167
+ utmosv2:
168
+ fs: 16000
169
+
170
+ # Word error rate with OpenAI-Whisper model
171
+ # -- whisper_wer: word error rate of openai-whisper
172
+ - name: whisper_wer
173
+ model_tag: default
174
+ beam_size: 1
175
+ text_cleaner: whisper_basic
176
+
177
+ # scoreq (reference-based) metric
178
+ # -- scoreq_ref: scoreq reference-based model
179
+ - name: scoreq_ref
180
+ data_domain: natrual
181
+ model_cache: versa_cache/scoreq_pt-models
182
+
183
+ # scoreq (non-reference-based) metric
184
+ # -- scoreq_nr: scoreq non-reference-based model
185
+ - name: scoreq_nr
186
+ data_domain: natural
187
+ model_cache: versa_cache/scoreq_pt-models
188
+
189
+ # Speech Enhancement-based Metrics
190
+ # model tag can be any ESPnet-SE huggingface repo
191
+ # -- se_si_snr: the SI-SNR from a rerference speech enhancement model
192
+ - name: se_snr
193
+ model_tag: default
194
+
195
+ # PAM: Prompting Audio-Language Models for Audio Quality Assessment
196
+ # https://github.com/soham97/PAM/tree/main
197
+
198
+ - name: pam
199
+ repro: true
200
+ cache_dir: versa_cache/pam
201
+ io: soundfile
202
+ # TEXT ENCODER CONFIG
203
+ text_model: 'gpt2'
204
+ text_len: 77
205
+ transformer_embed_dim: 768
206
+ freeze_text_encoder_weights: True
207
+ # AUDIO ENCODER CONFIG
208
+ audioenc_name: 'HTSAT'
209
+ out_emb: 768
210
+ sampling_rate: 44100
211
+ duration: 7
212
+ fmin: 50
213
+ fmax: 8000 #14000
214
+ n_fft: 1024 # 1028
215
+ hop_size: 320
216
+ mel_bins: 64
217
+ window_size: 1024
218
+ # PROJECTION SPACE CONFIG
219
+ d_proj: 1024
220
+ temperature: 0.003
221
+ # TRAINING AND EVALUATION CONFIG
222
+ num_classes: 527
223
+ batch_size: 1024
224
+ demo: False
225
+
226
+ # Speaking rate calculating
227
+ # --speaking_rate: correct matching words/character counts
228
+ - name: speaking_rate
229
+ model_tag: default
230
+ beam_size: 1
231
+ text_cleaner: whisper_basic
232
+
233
+ # Audiobox Aesthetics (Unified automatic quality assessment for speech, music, and sound.)
234
+ - name: audiobox_aesthetics
235
+ batch_size: 1
236
+ cache_dir: versa_cache/audiobox
237
+
238
+ # ASR-match calculating
239
+ # --asr_match_error_rate: correct matching words/character counts
240
+ - name: asr_match
241
+ model_tag: default
242
+ beam_size: 1
243
+ text_cleaner: whisper_basic
244
+
245
+ # speaker related metrics
246
+ # -- spk_similarity: speaker cosine similarity
247
+ - name: speaker
248
+ model_tag: default
249
+
250
+ # asvspoof related metrics
251
+ # -- asvspoof_score: evaluate how the generated speech is likely to be classifiied by a deepfake classifier
252
+ - name: asvspoof_score
253
+
254
+ # signal related metrics
255
+ # -- sir: signal to interference ratio
256
+ # -- sar: signal to artifact ratio
257
+ # -- sdr: signal to distortion ratio
258
+ # -- ci-sdr: scale-invariant signal to distortion ratio
259
+ # -- si-snri: scale-invariant signal to noise ratio improvement
260
+ - name: signal_metric""")
261
+
262
  # Find available metric configs
263
  def get_available_metrics():
264
  """Get list of available metrics from VERSA config directory"""
265
  metrics = []
266
 
267
+ if not versa_installed:
268
+ # If VERSA is not installed, return an empty list
269
+ return metrics
270
+
271
  # Get all YAML files from the egs directory
272
  for root, _, files in os.walk(VERSA_CONFIG_DIR):
273
  for file in files:
 
277
  rel_path = os.path.relpath(path, VERSA_CONFIG_DIR)
278
  metrics.append(rel_path)
279
 
280
+ # Add custom configs
281
+ for root, _, files in os.walk(CONFIG_DIR):
282
+ for file in files:
283
+ if file.endswith('.yaml'):
284
+ path = os.path.join(root, file)
285
+ rel_path = f"custom/{os.path.basename(path)}"
286
+ metrics.append(rel_path)
287
+
288
  return sorted(metrics)
289
 
290
+ # Get all available metric names
291
+ def get_available_metric_names():
292
+ """Get list of all available metric names in VERSA"""
293
+ metric_names = set()
294
+
295
+ if not versa_installed:
296
+ # If VERSA is not installed, return an empty list
297
+ return []
298
+
299
+ # First check the universal metrics file
300
+ if os.path.exists(UNIVERSAL_METRICS_YAML):
301
+ try:
302
+ with open(UNIVERSAL_METRICS_YAML, 'r') as f:
303
+ config = yaml.safe_load(f)
304
+ if isinstance(config, list):
305
+ for item in config:
306
+ if isinstance(item, dict) and 'name' in item:
307
+ metric_names.add(item['name'])
308
+ except Exception:
309
+ pass
310
+
311
+ # Then parse all YAML files to extract additional metric names
312
+ for root, _, files in os.walk(VERSA_CONFIG_DIR):
313
+ for file in files:
314
+ if file.endswith('.yaml'):
315
+ path = os.path.join(root, file)
316
+ try:
317
+ with open(path, 'r') as f:
318
+ config = yaml.safe_load(f)
319
+ if isinstance(config, list):
320
+ for item in config:
321
+ if isinstance(item, dict) and 'name' in item:
322
+ metric_names.add(item['name'])
323
+ except Exception:
324
+ pass
325
+
326
+ return sorted(list(metric_names))
327
+
328
  # Get metric description from YAML file
329
  def get_metric_description(metric_path):
330
  """Get description of a metric from its YAML file"""
331
+ if not versa_installed:
332
+ return "VERSA is not installed. Metric descriptions are unavailable."
333
+
334
+ if metric_path.startswith("custom/"):
335
+ # Handle custom metrics
336
+ filename = metric_path.split("/")[1]
337
+ full_path = os.path.join(CONFIG_DIR, filename)
338
+ else:
339
+ full_path = os.path.join(VERSA_CONFIG_DIR, metric_path)
340
+
341
  try:
342
  with open(full_path, 'r') as f:
343
  config = yaml.safe_load(f)
344
+
345
+ # Check if the config has a description field
346
+ if isinstance(config, dict) and 'description' in config:
347
+ return config.get('description', 'No description available')
348
+
349
+ # If it's a list of metrics, return a summary
350
+ if isinstance(config, list):
351
+ metric_names = []
352
+ for item in config:
353
+ if isinstance(item, dict) and 'name' in item:
354
+ metric_names.append(item['name'])
355
+
356
+ if metric_names:
357
+ return f"Contains metrics: {', '.join(metric_names)}"
358
+
359
+ return "No description available"
360
  except Exception as e:
361
  return f"Could not load description: {str(e)}"
362
 
363
+ # Create custom metric config file
364
+ def create_custom_metric_config(selected_metrics, metric_parameters):
365
+ """Create a custom metric configuration file from selected metrics"""
366
+ if not versa_installed:
367
+ return None, "VERSA is not installed. Cannot create custom metric configuration."
368
+
369
+ if not selected_metrics:
370
+ return None, "Please select at least one metric"
371
+
372
+ # Load universal metrics as reference
373
+ universal_metrics = []
374
+ try:
375
+ with open(UNIVERSAL_METRICS_YAML, 'r') as f:
376
+ universal_metrics = yaml.safe_load(f)
377
+ except Exception as e:
378
+ return None, f"Error loading universal metrics: {str(e)}"
379
+
380
+ # Create new metric config
381
+ custom_metrics = []
382
+ for metric_name in selected_metrics:
383
+ # Find the metric in universal metrics
384
+ for metric in universal_metrics:
385
+ if metric.get('name') == metric_name:
386
+ # Add the metric to custom metrics
387
+ custom_metrics.append(metric.copy())
388
+ break
389
+
390
+ # Apply any custom parameters from metric_parameters
391
+ if metric_parameters:
392
+ try:
393
+ params = yaml.safe_load(metric_parameters)
394
+ if isinstance(params, dict):
395
+ for metric in custom_metrics:
396
+ metric_name = metric.get('name')
397
+ if metric_name in params and isinstance(params[metric_name], dict):
398
+ # Update metric parameters
399
+ metric.update(params[metric_name])
400
+ except Exception as e:
401
+ return None, f"Error parsing metric parameters: {str(e)}"
402
+
403
+ # Create a custom YAML file
404
+ timestamp = int(time.time())
405
+ custom_yaml_path = os.path.join(CONFIG_DIR, f"custom_metrics_{timestamp}.yaml")
406
+
407
+ try:
408
+ with open(custom_yaml_path, 'w') as f:
409
+ yaml.dump(custom_metrics, f, default_flow_style=False)
410
+
411
+ return f"custom/{os.path.basename(custom_yaml_path)}", f"Custom metric configuration created successfully with {len(custom_metrics)} metrics"
412
+ except Exception as e:
413
+ return None, f"Error creating custom metric configuration: {str(e)}"
414
+
415
+ # Load metric config file
416
+ def load_metric_config(config_path):
417
+ """Load a metric configuration file and return its content"""
418
+ if not versa_installed and not config_path.startswith("custom/"):
419
+ return "VERSA is not installed. Cannot load metric configuration."
420
+
421
+ if config_path.startswith("custom/"):
422
+ # Handle custom metrics
423
+ filename = config_path.split("/")[1]
424
+ full_path = os.path.join(CONFIG_DIR, filename)
425
+ else:
426
+ full_path = os.path.join(VERSA_CONFIG_DIR, config_path)
427
+
428
+ try:
429
+ with open(full_path, 'r') as f:
430
+ content = f.read()
431
+
432
+ return content
433
+ except Exception as e:
434
+ return f"Error loading metric configuration: {str(e)}"
435
+
436
+ # Save uploaded YAML file
437
+ def save_uploaded_yaml(file_obj):
438
+ """Save an uploaded YAML file to the configs directory"""
439
+ if file_obj is None:
440
+ return None, "No file uploaded"
441
+
442
+ try:
443
+ # Get the file name and create a new path
444
+ filename = os.path.basename(file_obj.name)
445
+ if not filename.endswith('.yaml'):
446
+ filename += '.yaml'
447
+
448
+ # Ensure unique filename
449
+ timestamp = int(time.time())
450
+ yaml_path = os.path.join(CONFIG_DIR, f"uploaded_{timestamp}_{filename}")
451
+
452
+ # Copy the file
453
+ with open(file_obj.name, 'rb') as src, open(yaml_path, 'wb') as dst:
454
+ dst.write(src.read())
455
+
456
+ # Validate YAML format
457
+ with open(yaml_path, 'r') as f:
458
+ yaml.safe_load(f)
459
+
460
+ return f"custom/{os.path.basename(yaml_path)}", f"YAML file uploaded successfully as {os.path.basename(yaml_path)}"
461
+ except yaml.YAMLError:
462
+ if os.path.exists(yaml_path):
463
+ os.remove(yaml_path)
464
+ return None, "Invalid YAML format. Please check your file."
465
+ except Exception as e:
466
+ if os.path.exists(yaml_path):
467
+ os.remove(yaml_path)
468
+ return None, f"Error uploading YAML file: {str(e)}"
469
+
470
  # Process audio files and run VERSA evaluation
471
  def evaluate_audio(gt_file, pred_file, metric_config, include_timestamps=False):
472
  """Evaluate audio files using VERSA"""
473
+ if not versa_installed:
474
+ return None, "VERSA is not installed. Evaluation cannot be performed."
475
+
476
  if gt_file is None or pred_file is None:
477
+ return None, "Please upload both ground truth and prediction audio files."
478
+
479
+ # Determine the metric config path
480
+ if metric_config.startswith("custom/"):
481
+ # Handle custom metrics
482
+ filename = metric_config.split("/")[1]
483
+ metric_config_path = os.path.join(CONFIG_DIR, filename)
484
+ else:
485
+ metric_config_path = os.path.join(VERSA_CONFIG_DIR, metric_config)
486
 
487
  # Create temp directory for results
488
  with tempfile.TemporaryDirectory() as temp_dir:
489
  output_file = os.path.join(temp_dir, "result.json")
490
 
 
 
 
491
  # Build command
492
  cmd = [
493
  sys.executable, VERSA_BIN,
 
532
  """Create the Gradio demo interface"""
533
  available_metrics = get_available_metrics()
534
  default_metric = "speech.yaml" if "speech.yaml" in available_metrics else available_metrics[0] if available_metrics else None
535
+ metric_names = get_available_metric_names()
536
 
537
  with gr.Blocks(title="VERSA Speech & Audio Evaluation Demo") as demo:
538
  gr.Markdown("# VERSA: Versatile Evaluation of Speech and Audio")
 
539
 
540
+ # Display installation status
541
  with gr.Row():
542
+ installation_status = gr.Textbox(
543
+ value=f"VERSA Installation Status: {'Installed' if versa_installed else 'Not Installed - ' + versa_status}",
544
+ label="Installation Status",
545
+ interactive=False
546
+ )
547
+
548
+ if not versa_installed:
549
+ gr.Markdown(f"""
550
+ ## ⚠️ VERSA Not Installed
551
+
552
+ VERSA does not appear to be properly installed. The build process may have failed.
553
+ Please check the build logs in the Factory tab of your Hugging Face Space.
554
+
555
+ Error: {versa_status}
556
+ """)
557
+ else:
558
+ gr.Markdown("Upload audio files and evaluate them using VERSA metrics.")
559
+
560
+ with gr.Tabs() as tabs:
561
+ # Standard evaluation tab
562
+ with gr.TabItem("Standard Evaluation"):
563
+ with gr.Row():
564
+ with gr.Column():
565
+ gt_audio = gr.Audio(label="Ground Truth Audio", type="filepath", sources=["upload", "microphone"])
566
+ pred_audio = gr.Audio(label="Prediction Audio", type="filepath", sources=["upload", "microphone"])
567
+
568
+ metric_dropdown = gr.Dropdown(
569
+ choices=available_metrics,
570
+ label="Evaluation Metric Configuration",
571
+ value=default_metric,
572
+ info="Select a pre-defined or custom metric configuration"
573
+ )
574
+
575
+ with gr.Accordion("Metric Configuration Details", open=False):
576
+ metric_description = gr.Textbox(
577
+ label="Metric Description",
578
+ value=get_metric_description(default_metric) if default_metric else "",
579
+ interactive=False
580
+ )
581
+
582
+ metric_content = gr.Code(
583
+ label="Configuration Content",
584
+ language="yaml",
585
+ value=load_metric_config(default_metric) if default_metric else "",
586
+ interactive=False
587
+ )
588
+
589
+ include_timestamps = gr.Checkbox(
590
+ label="Include Timestamps in Results",
591
+ value=False
592
+ )
593
+
594
+ eval_button = gr.Button("Evaluate")
595
+
596
+ with gr.Column():
597
+ results_table = gr.Dataframe(label="Evaluation Results")
598
+ raw_json = gr.Code(language="json", label="Raw Results")
599
 
600
+ # Custom metrics creation tab
601
+ with gr.TabItem("Create Custom Metrics"):
602
+ with gr.Row():
603
+ with gr.Column():
604
+ gr.Markdown("### Option 1: Select from Available Metrics")
605
+
606
+ metrics_checklist = gr.CheckboxGroup(
607
+ choices=metric_names,
608
+ label="Available Metrics",
609
+ info="Select the metrics you want to include in your custom configuration"
610
+ )
611
+
612
+ metric_params = gr.Code(
613
+ label="Custom Parameters (Optional, YAML format)",
614
+ language="yaml",
615
+ placeholder="""# Example of custom parameters
616
+ # Replace with your own as needed
617
+ pysepm:
618
+ wss_wgt_vec: [1, 2, 3]
619
+ mcd_f0:
620
+ f0min: 50
621
+ f0max: 600""",
622
+ interactive=True
623
+ )
624
+
625
+ create_custom_button = gr.Button("Create Custom Configuration")
626
+ custom_status = gr.Textbox(label="Status", interactive=False)
627
+
628
+ with gr.Column():
629
+ gr.Markdown("### Option 2: Upload Your Own YAML File")
630
+
631
+ uploaded_yaml = gr.File(
632
+ label="Upload YAML Configuration",
633
+ file_types=[".yaml", ".yml"],
634
+ type="filepath"
635
+ )
636
+
637
+ upload_button = gr.Button("Upload Configuration")
638
+ upload_status = gr.Textbox(label="Upload Status", interactive=False)
639
+
640
+ gr.Markdown("### Generated Configuration")
641
+ custom_config_path = gr.Textbox(
642
+ label="Configuration Path",
643
+ interactive=False,
644
+ visible=False
645
+ )
646
+
647
+ custom_config_content = gr.Code(
648
+ label="Configuration Content",
649
+ language="yaml",
650
+ interactive=False
651
+ )
652
 
653
+ # About tab
654
+ with gr.TabItem("About VERSA"):
655
+ gr.Markdown("""
656
+ ## VERSA: Versatile Evaluation of Speech and Audio
657
+
658
+ VERSA is a toolkit dedicated to collecting evaluation metrics in speech and audio quality.
659
+ It provides a comprehensive connection to cutting-edge evaluation techniques and is tightly integrated with ESPnet.
660
+
661
+ With full installation, VERSA offers over 60 metrics with 700+ metric variations based on different configurations.
662
+ These metrics encompass evaluations utilizing diverse external resources, including matching and non-matching
663
+ reference audio, text transcriptions, and text captions.
664
+
665
+ ### Features
666
+
667
+ - Pythonic interface with flexible configuration
668
+ - Support for various audio formats and evaluation scenarios
669
+ - Integration with ESPnet
670
+ - Batch processing capabilities
671
+ - Customizable evaluation metrics
672
+
673
+ ### Citation
674
+
675
+ ```
676
+ @misc{shi2024versaversatileevaluationtoolkit,
677
+ title={VERSA: A Versatile Evaluation Toolkit for Speech, Audio, and Music},
678
+ author={Jiatong Shi and Hye-jin Shim and Jinchuan Tian and Siddhant Arora and Haibin Wu and Darius Petermann and Jia Qi Yip and You Zhang and Yuxun Tang and Wangyou Zhang and Dareen Safar Alharthi and Yichen Huang and Koichi Saito and Jionghao Han and Yiwen Zhao and Chris Donahue and Shinji Watanabe},
679
+ year={2024},
680
+ eprint={2412.17667},
681
+ archivePrefix={arXiv},
682
+ primaryClass={cs.SD},
683
+ url={https://arxiv.org/abs/2412.17667},
684
+ }
685
+ ```
686
+
687
+ Learn more at [VERSA GitHub Repository](https://github.com/shinjiwlab/versa).
688
+ """)
689
 
690
+ # Event handlers
691
+ def update_metric_details(metric_path):
692
+ return get_metric_description(metric_path), load_metric_config(metric_path)
693
+
694
+ metric_dropdown.change(
695
+ fn=update_metric_details,
696
+ inputs=[metric_dropdown],
697
+ outputs=[metric_description, metric_content]
698
+ )
699
+
700
+ eval_button.click(
701
+ fn=evaluate_audio,
702
+ inputs=[gt_audio, pred_audio, metric_dropdown, include_timestamps],
703
+ outputs=[results_table, raw_json]
704
+ )
705
+
706
+ # Create custom metric configuration
707
+ def create_and_update_custom_config(selected_metrics, metric_parameters):
708
+ config_path, status = create_custom_metric_config(selected_metrics, metric_parameters)
709
+ if config_path:
710
+ content = load_metric_config(config_path)
711
+ # Refresh the available metrics list
712
+ metrics_list = get_available_metrics()
713
+ return status, config_path, content, gr.Dropdown.update(choices=metrics_list, value=config_path)
714
+ else:
715
+ return status, None, "", gr.Dropdown.update(choices=get_available_metrics())
716
+
717
+ create_custom_button.click(
718
+ fn=create_and_update_custom_config,
719
+ inputs=[metrics_checklist, metric_params],
720
+ outputs=[custom_status, custom_config_path, custom_config_content, metric_dropdown]
721
+ )
722
+
723
+ # Upload YAML file
724
+ def upload_and_update_yaml(file_obj):
725
+ config_path, status = save_uploaded_yaml(file_obj)
726
+ if config_path:
727
+ content = load_metric_config(config_path)
728
+ # Refresh the available metrics list
729
+ metrics_list = get_available_metrics()
730
+ return status, config_path, content, gr.Dropdown.update(choices=metrics_list, value=config_path)
731
+ else:
732
+ return status, None, "", gr.Dropdown.update(choices=get_available_metrics())
733
+
734
+ upload_button.click(
735
+ fn=upload_and_update_yaml,
736
+ inputs=[uploaded_yaml],
737
+ outputs=[upload_status, custom_config_path, custom_config_content, metric_dropdown]
738
+ )
build.sh ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Build script for Hugging Face Space to install VERSA during the build phase
3
+
4
+ set -e # Exit immediately if a command fails
5
+
6
+ echo "Starting VERSA installation for Hugging Face Space build..."
7
+
8
+ # Install system dependencies (already handled by packages.txt, but double-check)
9
+ echo "Checking system dependencies..."
10
+ if ! command -v git &> /dev/null || ! command -v ffmpeg &> /dev/null; then
11
+ echo "Some system dependencies are missing. Please check packages.txt includes: git build-essential libsndfile1 ffmpeg"
12
+ exit 1
13
+ fi
14
+
15
+ # Set up directory structure
16
+ echo "Setting up directory structure..."
17
+ VERSA_ROOT="$(pwd)/versa"
18
+ DATA_DIR="$(pwd)/data"
19
+ CONFIG_DIR="${DATA_DIR}/configs"
20
+ UPLOAD_DIR="${DATA_DIR}/uploads"
21
+ RESULTS_DIR="${DATA_DIR}/results"
22
+
23
+ mkdir -p "${DATA_DIR}" "${CONFIG_DIR}" "${UPLOAD_DIR}" "${RESULTS_DIR}"
24
+
25
+ # Clone VERSA repository
26
+ echo "Cloning VERSA repository..."
27
+ if [ -d "${VERSA_ROOT}" ]; then
28
+ echo "VERSA directory already exists, updating..."
29
+ cd "${VERSA_ROOT}"
30
+ git pull
31
+ cd ..
32
+ else
33
+ echo "Cloning fresh VERSA repository..."
34
+ git clone https://github.com/wavlab-speech/versa.git "${VERSA_ROOT}"
35
+ fi
36
+
37
+ # Install VERSA
38
+ echo "Installing VERSA and dependencies..."
39
+ cd "${VERSA_ROOT}"
40
+ pip install -e .
41
+
42
+ # Install basic metric dependencies
43
+ echo "Installing basic metric dependencies..."
44
+ # You can add specific metric installers here if needed
45
+ # For example:
46
+ # cd tools/nisqa
47
+ # bash install.sh
48
+ # cd ../..
49
+
50
+ echo "VERSA installation completed successfully!"
51
+
52
+ # Create a file to indicate successful installation
53
+ touch "${VERSA_ROOT}/.installation_complete"
54
+
55
+ # Return to the original directory
56
+ cd ..
57
+
58
+ echo "Build process completed. VERSA is ready for use in the Gradio application."