Spaces:
Running
Running
New TTS: Spark
Browse files- app/models.py +38 -18
app/models.py
CHANGED
|
@@ -31,11 +31,11 @@ AVAILABLE_MODELS = {
|
|
| 31 |
#'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
|
| 32 |
#'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
|
| 33 |
# 'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1', # 4.29 4.32
|
| 34 |
-
'Pendrokar/xVASynth-TTS': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
|
| 35 |
# 'Pendrokar/xVASynth-TTS/NoDeepMoji': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
|
| 36 |
# 'coqui/CoquiTTS': 'coqui/CoquiTTS',
|
| 37 |
-
'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
|
| 38 |
-
|
| 39 |
|
| 40 |
# E2 & F5 TTS
|
| 41 |
# F5 model
|
|
@@ -45,9 +45,9 @@ AVAILABLE_MODELS = {
|
|
| 45 |
|
| 46 |
# # Parler
|
| 47 |
# Parler Large model
|
| 48 |
-
'parler-tts/parler_tts/large': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
|
| 49 |
# Parler Mini model
|
| 50 |
-
|
| 51 |
# 'parler-tts/parler_tts_mini': 'parler-tts/parler_tts_mini', # Mini is the default model of parler_tts
|
| 52 |
# 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0
|
| 53 |
|
|
@@ -95,9 +95,12 @@ AVAILABLE_MODELS = {
|
|
| 95 |
'CAMB-AI/mars6-turbo-demo': 'CAMB-AI/mars6-turbo-demo',
|
| 96 |
|
| 97 |
# Zonos
|
| 98 |
-
'Steveeeeeeen/Zonos': 'Steveeeeeeen/Zonos',
|
| 99 |
'Steveeeeeeen/Zonos/hybrid': 'Steveeeeeeen/Zonos',
|
| 100 |
|
|
|
|
|
|
|
|
|
|
| 101 |
# HF TTS w issues
|
| 102 |
# 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
|
| 103 |
# 'PolyAI/pheme': '/predict#0', # sleepy HF Space
|
|
@@ -255,7 +258,7 @@ HF_SPACES = {
|
|
| 255 |
'fishaudio/fish-speech-1': {
|
| 256 |
'name': 'Fish Speech',
|
| 257 |
'function': '/inference_wrapper',
|
| 258 |
-
'text_param_index':
|
| 259 |
'return_audio_index': 0,
|
| 260 |
'series': 'Fish Speech',
|
| 261 |
'emoji': '😷', # broken space
|
|
@@ -468,6 +471,16 @@ HF_SPACES = {
|
|
| 468 |
'is_zero_gpu_space': True,
|
| 469 |
'series': 'Zonos',
|
| 470 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 471 |
}
|
| 472 |
|
| 473 |
# for zero-shot TTS - voice sample used by XTTS (11 seconds)
|
|
@@ -568,16 +581,16 @@ OVERRIDE_INPUTS = {
|
|
| 568 |
},
|
| 569 |
|
| 570 |
'fishaudio/fish-speech-1': {
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
},
|
| 582 |
|
| 583 |
# F5
|
|
@@ -746,6 +759,13 @@ OVERRIDE_INPUTS = {
|
|
| 746 |
# 'Steveeeeeeen/Zonos/hybrid': {
|
| 747 |
# 'model_choice': 'Zyphra/Zonos-v0.1-hybrid',
|
| 748 |
# },
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 749 |
}
|
| 750 |
|
| 751 |
# minor mods to model from the same space
|
|
@@ -810,7 +830,7 @@ closed_source = [
|
|
| 810 |
]
|
| 811 |
|
| 812 |
# top five models in order to always have one of them picked and scrutinized
|
| 813 |
-
top_five = ['
|
| 814 |
|
| 815 |
# prioritize low vote models
|
| 816 |
sql = 'SELECT name FROM model WHERE (upvote + downvote) < 750 ORDER BY (upvote + downvote) ASC'
|
|
|
|
| 31 |
#'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
|
| 32 |
#'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # extra_headers error appears for 5.13+
|
| 33 |
# 'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1', # 4.29 4.32
|
| 34 |
+
# 'Pendrokar/xVASynth-TTS': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
|
| 35 |
# 'Pendrokar/xVASynth-TTS/NoDeepMoji': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
|
| 36 |
# 'coqui/CoquiTTS': 'coqui/CoquiTTS',
|
| 37 |
+
# 'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
|
| 38 |
+
'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # Queue ERROR
|
| 39 |
|
| 40 |
# E2 & F5 TTS
|
| 41 |
# F5 model
|
|
|
|
| 45 |
|
| 46 |
# # Parler
|
| 47 |
# Parler Large model
|
| 48 |
+
# 'parler-tts/parler_tts/large': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
|
| 49 |
# Parler Mini model
|
| 50 |
+
'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
|
| 51 |
# 'parler-tts/parler_tts_mini': 'parler-tts/parler_tts_mini', # Mini is the default model of parler_tts
|
| 52 |
# 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0
|
| 53 |
|
|
|
|
| 95 |
'CAMB-AI/mars6-turbo-demo': 'CAMB-AI/mars6-turbo-demo',
|
| 96 |
|
| 97 |
# Zonos
|
| 98 |
+
# 'Steveeeeeeen/Zonos': 'Steveeeeeeen/Zonos',
|
| 99 |
'Steveeeeeeen/Zonos/hybrid': 'Steveeeeeeen/Zonos',
|
| 100 |
|
| 101 |
+
# Spark
|
| 102 |
+
'thunnai/SparkTTS': 'thunnai/SparkTTS',
|
| 103 |
+
|
| 104 |
# HF TTS w issues
|
| 105 |
# 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
|
| 106 |
# 'PolyAI/pheme': '/predict#0', # sleepy HF Space
|
|
|
|
| 258 |
'fishaudio/fish-speech-1': {
|
| 259 |
'name': 'Fish Speech',
|
| 260 |
'function': '/inference_wrapper',
|
| 261 |
+
'text_param_index': 'text',
|
| 262 |
'return_audio_index': 0,
|
| 263 |
'series': 'Fish Speech',
|
| 264 |
'emoji': '😷', # broken space
|
|
|
|
| 471 |
'is_zero_gpu_space': True,
|
| 472 |
'series': 'Zonos',
|
| 473 |
},
|
| 474 |
+
|
| 475 |
+
# Spark-TTS
|
| 476 |
+
'thunnai/SparkTTS': {
|
| 477 |
+
'name': 'Spark-TTS',
|
| 478 |
+
'function': '/voice_clone',
|
| 479 |
+
'text_param_index': 'text',
|
| 480 |
+
'return_audio_index': 0,
|
| 481 |
+
'is_zero_gpu_space': True,
|
| 482 |
+
'series': 'Spark-TTS',
|
| 483 |
+
},
|
| 484 |
}
|
| 485 |
|
| 486 |
# for zero-shot TTS - voice sample used by XTTS (11 seconds)
|
|
|
|
| 581 |
},
|
| 582 |
|
| 583 |
'fishaudio/fish-speech-1': {
|
| 584 |
+
'normalize': False,
|
| 585 |
+
'reference_audio': handle_file('https://huggingface.co/spaces/fishaudio/fish-speech-1/resolve/main/examples/English.wav'),
|
| 586 |
+
'reference_text': 'In the ancient land of Eldoria, where the skies were painted with shades of mystic hues and the forests whispered secrets of old, there existed a dragon named Zephyros. Unlike the fearsome tales of dragons that plagued human hearts with terror, Zephyros was a creature of wonder and wisdom, revered by all who knew of his existence.', # reference_text
|
| 587 |
+
'max_new_tokens': 1024,
|
| 588 |
+
'chunk_length': 200,
|
| 589 |
+
'top_p': 0.7,
|
| 590 |
+
'repetition_penalty': 1.2,
|
| 591 |
+
'temperature': 0.7,
|
| 592 |
+
'seed': 0,
|
| 593 |
+
'use_memory_cache': "never",
|
| 594 |
},
|
| 595 |
|
| 596 |
# F5
|
|
|
|
| 759 |
# 'Steveeeeeeen/Zonos/hybrid': {
|
| 760 |
# 'model_choice': 'Zyphra/Zonos-v0.1-hybrid',
|
| 761 |
# },
|
| 762 |
+
|
| 763 |
+
# Spark-TTS
|
| 764 |
+
'thunnai/SparkTTS' : {
|
| 765 |
+
'prompt_text': DEFAULT_VOICE_TRANSCRIPT,
|
| 766 |
+
'prompt_wav_upload': DEFAULT_VOICE_SAMPLE,
|
| 767 |
+
'prompt_wav_record': None,
|
| 768 |
+
}
|
| 769 |
}
|
| 770 |
|
| 771 |
# minor mods to model from the same space
|
|
|
|
| 830 |
]
|
| 831 |
|
| 832 |
# top five models in order to always have one of them picked and scrutinized
|
| 833 |
+
top_five = ['Spark-TTS']
|
| 834 |
|
| 835 |
# prioritize low vote models
|
| 836 |
sql = 'SELECT name FROM model WHERE (upvote + downvote) < 750 ORDER BY (upvote + downvote) ASC'
|