Commit
·
19ae156
1
Parent(s):
946a55b
(wip)update html
Browse files- models.py +0 -8
- templates/arena.html +8 -25
- tts.py +26 -4
models.py
CHANGED
@@ -433,14 +433,6 @@ def insert_initial_models():
|
|
433 |
# is_active=False, # ran out of credits
|
434 |
# model_url="https://cartesia.ai/",
|
435 |
# ),
|
436 |
-
# Model(
|
437 |
-
# id="spark-tts",
|
438 |
-
# name="Spark TTS",
|
439 |
-
# model_type=ModelType.TTS,
|
440 |
-
# is_open=False,
|
441 |
-
# is_active=False, # API stopped working
|
442 |
-
# model_url="https://github.com/SparkAudio/Spark-TTS",
|
443 |
-
# ),
|
444 |
Model(
|
445 |
id="spark-tts",
|
446 |
name="Spark TTS",
|
|
|
433 |
# is_active=False, # ran out of credits
|
434 |
# model_url="https://cartesia.ai/",
|
435 |
# ),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
436 |
Model(
|
437 |
id="spark-tts",
|
438 |
name="Spark TTS",
|
templates/arena.html
CHANGED
@@ -13,7 +13,7 @@
|
|
13 |
<div id="tts-tab" class="tab-content active">
|
14 |
<form class="input-container">
|
15 |
<div class="input-group">
|
16 |
-
<label for="voice-file"
|
17 |
<input type="file" id="voice-file" accept="audio/*">
|
18 |
<audio id="voice-preview" controls style="display:none;"></audio>
|
19 |
</div>
|
@@ -992,7 +992,7 @@
|
|
992 |
<script src="{{ url_for('static', filename='js/waveplayer.js') }}"></script>
|
993 |
<script>
|
994 |
document.addEventListener('DOMContentLoaded', function() {
|
995 |
-
//
|
996 |
const voiceFileInput = document.getElementById('voice-file');
|
997 |
const voicePreview = document.getElementById('voice-preview');
|
998 |
if (voiceFileInput && voicePreview) {
|
@@ -1036,7 +1036,7 @@
|
|
1036 |
let modelNames = { a: '', b: '' };
|
1037 |
let wavePlayers = { a: null, b: null };
|
1038 |
let cachedSentences = []; // To store sentences available in cache
|
1039 |
-
let hasVoted = false; //
|
1040 |
|
1041 |
// Initialize WavePlayers with mobile settings
|
1042 |
wavePlayerContainers.forEach(container => {
|
@@ -2039,22 +2039,6 @@
|
|
2039 |
{% block scripts %}
|
2040 |
{{ super() }}
|
2041 |
<script>
|
2042 |
-
// 1. 参考音色试听功能
|
2043 |
-
const voiceFileInput = document.getElementById('voice-file');
|
2044 |
-
const voicePreview = document.getElementById('voice-preview');
|
2045 |
-
voiceFileInput.addEventListener('change', function() {
|
2046 |
-
const file = this.files[0];
|
2047 |
-
if (file) {
|
2048 |
-
const url = URL.createObjectURL(file);
|
2049 |
-
voicePreview.src = url;
|
2050 |
-
voicePreview.style.display = 'inline-block';
|
2051 |
-
voicePreview.load();
|
2052 |
-
} else {
|
2053 |
-
voicePreview.src = '';
|
2054 |
-
voicePreview.style.display = 'none';
|
2055 |
-
}
|
2056 |
-
});
|
2057 |
-
|
2058 |
// 2. 阻止输入框Enter触发合成,只允许点击按钮合成
|
2059 |
const ttsForm = document.querySelector('#tts-tab form.input-container');
|
2060 |
const textInput = ttsForm.querySelector('.text-input');
|
@@ -2062,16 +2046,15 @@ const synthBtn = ttsForm.querySelector('.synth-btn');
|
|
2062 |
|
2063 |
textInput.addEventListener('keydown', function(e) {
|
2064 |
if (e.key === 'Enter') {
|
2065 |
-
e.preventDefault(); //
|
2066 |
}
|
2067 |
});
|
2068 |
-
//
|
2069 |
ttsForm.addEventListener('submit', function(e) {
|
2070 |
-
e.preventDefault(); //
|
2071 |
-
//
|
2072 |
if (document.activeElement === synthBtn || e.submitter === synthBtn) {
|
2073 |
-
//
|
2074 |
-
// 例如:triggerSynthesize();
|
2075 |
if (typeof window.triggerSynthesize === 'function') {
|
2076 |
window.triggerSynthesize();
|
2077 |
}
|
|
|
13 |
<div id="tts-tab" class="tab-content active">
|
14 |
<form class="input-container">
|
15 |
<div class="input-group">
|
16 |
+
<label for="voice-file">Upload reference voice:</label>
|
17 |
<input type="file" id="voice-file" accept="audio/*">
|
18 |
<audio id="voice-preview" controls style="display:none;"></audio>
|
19 |
</div>
|
|
|
992 |
<script src="{{ url_for('static', filename='js/waveplayer.js') }}"></script>
|
993 |
<script>
|
994 |
document.addEventListener('DOMContentLoaded', function() {
|
995 |
+
// Reference voice preview function
|
996 |
const voiceFileInput = document.getElementById('voice-file');
|
997 |
const voicePreview = document.getElementById('voice-preview');
|
998 |
if (voiceFileInput && voicePreview) {
|
|
|
1036 |
let modelNames = { a: '', b: '' };
|
1037 |
let wavePlayers = { a: null, b: null };
|
1038 |
let cachedSentences = []; // To store sentences available in cache
|
1039 |
+
let hasVoted = false; // Prevent duplicate voting
|
1040 |
|
1041 |
// Initialize WavePlayers with mobile settings
|
1042 |
wavePlayerContainers.forEach(container => {
|
|
|
2039 |
{% block scripts %}
|
2040 |
{{ super() }}
|
2041 |
<script>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2042 |
// 2. 阻止输入框Enter触发合成,只允许点击按钮合成
|
2043 |
const ttsForm = document.querySelector('#tts-tab form.input-container');
|
2044 |
const textInput = ttsForm.querySelector('.text-input');
|
|
|
2046 |
|
2047 |
textInput.addEventListener('keydown', function(e) {
|
2048 |
if (e.key === 'Enter') {
|
2049 |
+
e.preventDefault(); // Prevent Enter submit
|
2050 |
}
|
2051 |
});
|
2052 |
+
// Optional: prevent form Enter auto submit
|
2053 |
ttsForm.addEventListener('submit', function(e) {
|
2054 |
+
e.preventDefault(); // Prevent default submit
|
2055 |
+
// Only trigger synth when clicking the synth button
|
2056 |
if (document.activeElement === synthBtn || e.submitter === synthBtn) {
|
2057 |
+
// Call original synth logic if exists
|
|
|
2058 |
if (typeof window.triggerSynthesize === 'function') {
|
2059 |
window.triggerSynthesize();
|
2060 |
}
|
tts.py
CHANGED
@@ -51,10 +51,10 @@ model_mapping = {
|
|
51 |
# "provider": "styletts",
|
52 |
# "model": "styletts2",
|
53 |
# },
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
# "papla-p1": {
|
59 |
# "provider": "papla",
|
60 |
# "model": "papla_p1",
|
@@ -225,6 +225,26 @@ def predict_spark_tts(text, reference_audio_path=None):
|
|
225 |
return result
|
226 |
|
227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
def predict_tts(text, model, reference_audio_path=None):
|
229 |
global client
|
230 |
print(f"Predicting TTS for {model}")
|
@@ -239,6 +259,8 @@ def predict_tts(text, model, reference_audio_path=None):
|
|
239 |
return predict_index_tts(text, reference_audio_path)
|
240 |
elif model == "spark-tts":
|
241 |
return predict_spark_tts(text, reference_audio_path)
|
|
|
|
|
242 |
|
243 |
if not model in model_mapping:
|
244 |
raise ValueError(f"Model {model} not found")
|
|
|
51 |
# "provider": "styletts",
|
52 |
# "model": "styletts2",
|
53 |
# },
|
54 |
+
"cosyvoice-2.0": {
|
55 |
+
"provider": "cosyvoice",
|
56 |
+
"model": "cosyvoice_2_0",
|
57 |
+
},
|
58 |
# "papla-p1": {
|
59 |
# "provider": "papla",
|
60 |
# "model": "papla_p1",
|
|
|
225 |
return result
|
226 |
|
227 |
|
228 |
+
def predict_cosyvoice_tts(text, reference_audio_path=None):
|
229 |
+
from gradio_client import Client, file
|
230 |
+
client = Client("https://iic-cosyvoice2-0-5b.ms.show/")
|
231 |
+
if not reference_audio_path:
|
232 |
+
raise ValueError("cosyvoice-2.0 需要 reference_audio_path")
|
233 |
+
prompt_wav = file(reference_audio_path)
|
234 |
+
result = client.predict(
|
235 |
+
tts_text=text,
|
236 |
+
mode_checkbox_group="3s极速复刻",
|
237 |
+
prompt_text="",
|
238 |
+
prompt_wav_upload=prompt_wav,
|
239 |
+
prompt_wav_record=prompt_wav,
|
240 |
+
instruct_text="",
|
241 |
+
seed=0,
|
242 |
+
stream="false",
|
243 |
+
api_name="/generate_audio"
|
244 |
+
)
|
245 |
+
return result
|
246 |
+
|
247 |
+
|
248 |
def predict_tts(text, model, reference_audio_path=None):
|
249 |
global client
|
250 |
print(f"Predicting TTS for {model}")
|
|
|
259 |
return predict_index_tts(text, reference_audio_path)
|
260 |
elif model == "spark-tts":
|
261 |
return predict_spark_tts(text, reference_audio_path)
|
262 |
+
elif model == "cosyvoice-2.0":
|
263 |
+
return predict_cosyvoice_tts(text, reference_audio_path)
|
264 |
|
265 |
if not model in model_mapping:
|
266 |
raise ValueError(f"Model {model} not found")
|