Voice-Clone-Router

Paused

App Files Files Community

kemuriririn commited on 15 days ago

Commit

19ae156

1 Parent(s): 946a55b

(wip)update html

Browse files

Files changed (3) hide show

models.py +0 -8
templates/arena.html +8 -25
tts.py +26 -4

models.py CHANGED Viewed

@@ -433,14 +433,6 @@ def insert_initial_models():
         #     is_active=False, # ran out of credits
         #     model_url="https://cartesia.ai/",
         # ),
-        # Model(
-        #     id="spark-tts",
-        #     name="Spark TTS",
-        #     model_type=ModelType.TTS,
-        #     is_open=False,
-        #     is_active=False, # API stopped working
-        #     model_url="https://github.com/SparkAudio/Spark-TTS",
-        # ),
         Model(
             id="spark-tts",
             name="Spark TTS",

         #     is_active=False, # ran out of credits
         #     model_url="https://cartesia.ai/",
         # ),
         Model(
             id="spark-tts",
             name="Spark TTS",

templates/arena.html CHANGED Viewed

@@ -13,7 +13,7 @@
 <div id="tts-tab" class="tab-content active">
     <form class="input-container">
         <div class="input-group">
-            <label for="voice-file">上传参考音色：</label>
             <input type="file" id="voice-file" accept="audio/*">
             <audio id="voice-preview" controls style="display:none;"></audio>
         </div>
@@ -992,7 +992,7 @@
 <script src="{{ url_for('static', filename='js/waveplayer.js') }}"></script>
 <script>
     document.addEventListener('DOMContentLoaded', function() {
-        // 参考音色试听功能
         const voiceFileInput = document.getElementById('voice-file');
         const voicePreview = document.getElementById('voice-preview');
         if (voiceFileInput && voicePreview) {
@@ -1036,7 +1036,7 @@
         let modelNames = { a: '', b: '' };
         let wavePlayers = { a: null, b: null };
         let cachedSentences = []; // To store sentences available in cache
-        let hasVoted = false; // 防止重复投票
         // Initialize WavePlayers with mobile settings
         wavePlayerContainers.forEach(container => {
@@ -2039,22 +2039,6 @@
 {% block scripts %}
 {{ super() }}
 <script>
-// 1. 参考音色试听功能
-const voiceFileInput = document.getElementById('voice-file');
-const voicePreview = document.getElementById('voice-preview');
-voiceFileInput.addEventListener('change', function() {
-    const file = this.files[0];
-    if (file) {
-        const url = URL.createObjectURL(file);
-        voicePreview.src = url;
-        voicePreview.style.display = 'inline-block';
-        voicePreview.load();
-    } else {
-        voicePreview.src = '';
-        voicePreview.style.display = 'none';
-    }
-});
 // 2. 阻止输入框Enter触发合成，只允许点击按钮合成
 const ttsForm = document.querySelector('#tts-tab form.input-container');
 const textInput = ttsForm.querySelector('.text-input');
@@ -2062,16 +2046,15 @@ const synthBtn = ttsForm.querySelector('.synth-btn');
 textInput.addEventListener('keydown', function(e) {
     if (e.key === 'Enter') {
-        e.preventDefault(); // 阻止回车提交
     }
 });
-// 可选：防止form回车自动提交
 ttsForm.addEventListener('submit', function(e) {
-    e.preventDefault(); // 阻止默认提交
-    // 只有点击合成按钮时才触发合成
     if (document.activeElement === synthBtn || e.submitter === synthBtn) {
-        // 这里调用原有的合成逻辑（如有）
-        // 例如：triggerSynthesize();
         if (typeof window.triggerSynthesize === 'function') {
             window.triggerSynthesize();
         }

 <div id="tts-tab" class="tab-content active">
     <form class="input-container">
         <div class="input-group">
+            <label for="voice-file">Upload reference voice:</label>
             <input type="file" id="voice-file" accept="audio/*">
             <audio id="voice-preview" controls style="display:none;"></audio>
         </div>
 <script src="{{ url_for('static', filename='js/waveplayer.js') }}"></script>
 <script>
     document.addEventListener('DOMContentLoaded', function() {
+        // Reference voice preview function
         const voiceFileInput = document.getElementById('voice-file');
         const voicePreview = document.getElementById('voice-preview');
         if (voiceFileInput && voicePreview) {
         let modelNames = { a: '', b: '' };
         let wavePlayers = { a: null, b: null };
         let cachedSentences = []; // To store sentences available in cache
+        let hasVoted = false; // Prevent duplicate voting
         // Initialize WavePlayers with mobile settings
         wavePlayerContainers.forEach(container => {
 {% block scripts %}
 {{ super() }}
 <script>
 // 2. 阻止输入框Enter触发合成，只允许点击按钮合成
 const ttsForm = document.querySelector('#tts-tab form.input-container');
 const textInput = ttsForm.querySelector('.text-input');
 textInput.addEventListener('keydown', function(e) {
     if (e.key === 'Enter') {
+        e.preventDefault(); // Prevent Enter submit
     }
 });
+// Optional: prevent form Enter auto submit
 ttsForm.addEventListener('submit', function(e) {
+    e.preventDefault(); // Prevent default submit
+    // Only trigger synth when clicking the synth button
     if (document.activeElement === synthBtn || e.submitter === synthBtn) {
+        // Call original synth logic if exists
         if (typeof window.triggerSynthesize === 'function') {
             window.triggerSynthesize();
         }

tts.py CHANGED Viewed

@@ -51,10 +51,10 @@ model_mapping = {
     #     "provider": "styletts",
     #     "model": "styletts2",
     # },
-    # "cosyvoice-2.0": {
-    #     "provider": "cosyvoice",
-    #     "model": "cosyvoice_2_0",
-    # },
     # "papla-p1": {
     #     "provider": "papla",
     #     "model": "papla_p1",
@@ -225,6 +225,26 @@ def predict_spark_tts(text, reference_audio_path=None):
     return result
 def predict_tts(text, model, reference_audio_path=None):
     global client
     print(f"Predicting TTS for {model}")
@@ -239,6 +259,8 @@ def predict_tts(text, model, reference_audio_path=None):
         return predict_index_tts(text, reference_audio_path)
     elif model == "spark-tts":
         return predict_spark_tts(text, reference_audio_path)
     if not model in model_mapping:
         raise ValueError(f"Model {model} not found")

     #     "provider": "styletts",
     #     "model": "styletts2",
     # },
+    "cosyvoice-2.0": {
+        "provider": "cosyvoice",
+        "model": "cosyvoice_2_0",
+    },
     # "papla-p1": {
     #     "provider": "papla",
     #     "model": "papla_p1",
     return result
+def predict_cosyvoice_tts(text, reference_audio_path=None):
+    from gradio_client import Client, file
+    client = Client("https://iic-cosyvoice2-0-5b.ms.show/")
+    if not reference_audio_path:
+        raise ValueError("cosyvoice-2.0 需要 reference_audio_path")
+    prompt_wav = file(reference_audio_path)
+    result = client.predict(
+        tts_text=text,
+        mode_checkbox_group="3s极速复刻",
+        prompt_text="",
+        prompt_wav_upload=prompt_wav,
+        prompt_wav_record=prompt_wav,
+        instruct_text="",
+        seed=0,
+        stream="false",
+        api_name="/generate_audio"
+    )
+    return result
 def predict_tts(text, model, reference_audio_path=None):
     global client
     print(f"Predicting TTS for {model}")
         return predict_index_tts(text, reference_audio_path)
     elif model == "spark-tts":
         return predict_spark_tts(text, reference_audio_path)
+    elif model == "cosyvoice-2.0":
+        return predict_cosyvoice_tts(text, reference_audio_path)
     if not model in model_mapping:
         raise ValueError(f"Model {model} not found")