kemuriririn commited on
Commit
19ae156
·
1 Parent(s): 946a55b

(wip)update html

Browse files
Files changed (3) hide show
  1. models.py +0 -8
  2. templates/arena.html +8 -25
  3. tts.py +26 -4
models.py CHANGED
@@ -433,14 +433,6 @@ def insert_initial_models():
433
  # is_active=False, # ran out of credits
434
  # model_url="https://cartesia.ai/",
435
  # ),
436
- # Model(
437
- # id="spark-tts",
438
- # name="Spark TTS",
439
- # model_type=ModelType.TTS,
440
- # is_open=False,
441
- # is_active=False, # API stopped working
442
- # model_url="https://github.com/SparkAudio/Spark-TTS",
443
- # ),
444
  Model(
445
  id="spark-tts",
446
  name="Spark TTS",
 
433
  # is_active=False, # ran out of credits
434
  # model_url="https://cartesia.ai/",
435
  # ),
 
 
 
 
 
 
 
 
436
  Model(
437
  id="spark-tts",
438
  name="Spark TTS",
templates/arena.html CHANGED
@@ -13,7 +13,7 @@
13
  <div id="tts-tab" class="tab-content active">
14
  <form class="input-container">
15
  <div class="input-group">
16
- <label for="voice-file">上传参考音色:</label>
17
  <input type="file" id="voice-file" accept="audio/*">
18
  <audio id="voice-preview" controls style="display:none;"></audio>
19
  </div>
@@ -992,7 +992,7 @@
992
  <script src="{{ url_for('static', filename='js/waveplayer.js') }}"></script>
993
  <script>
994
  document.addEventListener('DOMContentLoaded', function() {
995
- // 参考音色试听功能
996
  const voiceFileInput = document.getElementById('voice-file');
997
  const voicePreview = document.getElementById('voice-preview');
998
  if (voiceFileInput && voicePreview) {
@@ -1036,7 +1036,7 @@
1036
  let modelNames = { a: '', b: '' };
1037
  let wavePlayers = { a: null, b: null };
1038
  let cachedSentences = []; // To store sentences available in cache
1039
- let hasVoted = false; // 防止重复投票
1040
 
1041
  // Initialize WavePlayers with mobile settings
1042
  wavePlayerContainers.forEach(container => {
@@ -2039,22 +2039,6 @@
2039
  {% block scripts %}
2040
  {{ super() }}
2041
  <script>
2042
- // 1. 参考音色试听功能
2043
- const voiceFileInput = document.getElementById('voice-file');
2044
- const voicePreview = document.getElementById('voice-preview');
2045
- voiceFileInput.addEventListener('change', function() {
2046
- const file = this.files[0];
2047
- if (file) {
2048
- const url = URL.createObjectURL(file);
2049
- voicePreview.src = url;
2050
- voicePreview.style.display = 'inline-block';
2051
- voicePreview.load();
2052
- } else {
2053
- voicePreview.src = '';
2054
- voicePreview.style.display = 'none';
2055
- }
2056
- });
2057
-
2058
  // 2. 阻止输入框Enter触发合成,只允许点击按钮合成
2059
  const ttsForm = document.querySelector('#tts-tab form.input-container');
2060
  const textInput = ttsForm.querySelector('.text-input');
@@ -2062,16 +2046,15 @@ const synthBtn = ttsForm.querySelector('.synth-btn');
2062
 
2063
  textInput.addEventListener('keydown', function(e) {
2064
  if (e.key === 'Enter') {
2065
- e.preventDefault(); // 阻止回车提交
2066
  }
2067
  });
2068
- // 可选:防止form回车自动提交
2069
  ttsForm.addEventListener('submit', function(e) {
2070
- e.preventDefault(); // 阻止默认提交
2071
- // 只有点击合成按钮时才触发合成
2072
  if (document.activeElement === synthBtn || e.submitter === synthBtn) {
2073
- // 这里调用原有的合成逻辑(如有)
2074
- // 例如:triggerSynthesize();
2075
  if (typeof window.triggerSynthesize === 'function') {
2076
  window.triggerSynthesize();
2077
  }
 
13
  <div id="tts-tab" class="tab-content active">
14
  <form class="input-container">
15
  <div class="input-group">
16
+ <label for="voice-file">Upload reference voice:</label>
17
  <input type="file" id="voice-file" accept="audio/*">
18
  <audio id="voice-preview" controls style="display:none;"></audio>
19
  </div>
 
992
  <script src="{{ url_for('static', filename='js/waveplayer.js') }}"></script>
993
  <script>
994
  document.addEventListener('DOMContentLoaded', function() {
995
+ // Reference voice preview function
996
  const voiceFileInput = document.getElementById('voice-file');
997
  const voicePreview = document.getElementById('voice-preview');
998
  if (voiceFileInput && voicePreview) {
 
1036
  let modelNames = { a: '', b: '' };
1037
  let wavePlayers = { a: null, b: null };
1038
  let cachedSentences = []; // To store sentences available in cache
1039
+ let hasVoted = false; // Prevent duplicate voting
1040
 
1041
  // Initialize WavePlayers with mobile settings
1042
  wavePlayerContainers.forEach(container => {
 
2039
  {% block scripts %}
2040
  {{ super() }}
2041
  <script>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2042
  // 2. 阻止输入框Enter触发合成,只允许点击按钮合成
2043
  const ttsForm = document.querySelector('#tts-tab form.input-container');
2044
  const textInput = ttsForm.querySelector('.text-input');
 
2046
 
2047
  textInput.addEventListener('keydown', function(e) {
2048
  if (e.key === 'Enter') {
2049
+ e.preventDefault(); // Prevent Enter submit
2050
  }
2051
  });
2052
+ // Optional: prevent form Enter auto submit
2053
  ttsForm.addEventListener('submit', function(e) {
2054
+ e.preventDefault(); // Prevent default submit
2055
+ // Only trigger synth when clicking the synth button
2056
  if (document.activeElement === synthBtn || e.submitter === synthBtn) {
2057
+ // Call original synth logic if exists
 
2058
  if (typeof window.triggerSynthesize === 'function') {
2059
  window.triggerSynthesize();
2060
  }
tts.py CHANGED
@@ -51,10 +51,10 @@ model_mapping = {
51
  # "provider": "styletts",
52
  # "model": "styletts2",
53
  # },
54
- # "cosyvoice-2.0": {
55
- # "provider": "cosyvoice",
56
- # "model": "cosyvoice_2_0",
57
- # },
58
  # "papla-p1": {
59
  # "provider": "papla",
60
  # "model": "papla_p1",
@@ -225,6 +225,26 @@ def predict_spark_tts(text, reference_audio_path=None):
225
  return result
226
 
227
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  def predict_tts(text, model, reference_audio_path=None):
229
  global client
230
  print(f"Predicting TTS for {model}")
@@ -239,6 +259,8 @@ def predict_tts(text, model, reference_audio_path=None):
239
  return predict_index_tts(text, reference_audio_path)
240
  elif model == "spark-tts":
241
  return predict_spark_tts(text, reference_audio_path)
 
 
242
 
243
  if not model in model_mapping:
244
  raise ValueError(f"Model {model} not found")
 
51
  # "provider": "styletts",
52
  # "model": "styletts2",
53
  # },
54
+ "cosyvoice-2.0": {
55
+ "provider": "cosyvoice",
56
+ "model": "cosyvoice_2_0",
57
+ },
58
  # "papla-p1": {
59
  # "provider": "papla",
60
  # "model": "papla_p1",
 
225
  return result
226
 
227
 
228
+ def predict_cosyvoice_tts(text, reference_audio_path=None):
229
+ from gradio_client import Client, file
230
+ client = Client("https://iic-cosyvoice2-0-5b.ms.show/")
231
+ if not reference_audio_path:
232
+ raise ValueError("cosyvoice-2.0 需要 reference_audio_path")
233
+ prompt_wav = file(reference_audio_path)
234
+ result = client.predict(
235
+ tts_text=text,
236
+ mode_checkbox_group="3s极速复刻",
237
+ prompt_text="",
238
+ prompt_wav_upload=prompt_wav,
239
+ prompt_wav_record=prompt_wav,
240
+ instruct_text="",
241
+ seed=0,
242
+ stream="false",
243
+ api_name="/generate_audio"
244
+ )
245
+ return result
246
+
247
+
248
  def predict_tts(text, model, reference_audio_path=None):
249
  global client
250
  print(f"Predicting TTS for {model}")
 
259
  return predict_index_tts(text, reference_audio_path)
260
  elif model == "spark-tts":
261
  return predict_spark_tts(text, reference_audio_path)
262
+ elif model == "cosyvoice-2.0":
263
+ return predict_cosyvoice_tts(text, reference_audio_path)
264
 
265
  if not model in model_mapping:
266
  raise ValueError(f"Model {model} not found")