milwright commited on
Commit
aaa7d4e
·
verified ·
1 Parent(s): 54ce8c0

Upload 4 files

Browse files
Files changed (2) hide show
  1. app.py +8 -26
  2. config.json +4 -2
app.py CHANGED
@@ -27,13 +27,13 @@ DEFAULT_CONFIG = {
27
  'model': 'google/gemma-3-27b-it',
28
  'api_key_var': 'API_KEY',
29
  'theme': 'Default',
30
- 'grounding_urls': [],
31
  'enable_dynamic_urls': True,
32
  'enable_file_upload': True,
33
  'examples': ['Ciao! Come stai oggi?', 'Mi piace giocare a calcio. E tu?', 'Cosa mangi di solito a colazione?', 'A che ora ti svegli la mattina?', 'Qual è il tuo sport preferito?'],
34
  'language': 'Italian',
35
  'enable_tts': True,
36
- 'tts_model': 'microsoft/speecht5_tts',
37
  'tts_voice': 'default',
38
  'locked': False
39
  }
@@ -544,32 +544,14 @@ def generate_tts(text: str, max_retries: int = 2) -> Tuple[Optional[Tuple[int, n
544
  # Limit text length for TTS
545
  text = text[:500]
546
 
547
- # Select speaker embedding based on voice preference
548
- speaker_embeddings = None
549
- if TTS_MODEL == "microsoft/speecht5_tts":
550
- # For SpeechT5, we need speaker embeddings
551
- # Using a predefined speaker ID (7306 is a clear female voice)
552
- speaker_id = {
553
- "default": 7306,
554
- "female": 7306,
555
- "male": 5105,
556
- "neutral": 6678
557
- }.get(TTS_VOICE, 7306)
558
-
559
- # Note: In production, you'd load actual embeddings from the dataset
560
- # For now, we'll let the API handle default voice
561
- speaker_embeddings = {"speaker_id": speaker_id}
562
 
563
  for attempt in range(max_retries):
564
  try:
565
  headers = {"Authorization": f"Bearer {hf_token}"}
566
  api_url = f"https://api-inference.huggingface.co/models/{TTS_MODEL}"
567
 
568
- # Prepare payload
569
- payload = {"inputs": text}
570
- if speaker_embeddings and TTS_MODEL == "microsoft/speecht5_tts":
571
- # For models that support speaker embeddings
572
- payload["parameters"] = speaker_embeddings
573
 
574
  response = requests.post(
575
  api_url,
@@ -1012,12 +994,12 @@ def create_interface():
1012
  edit_tts_model = gr.Dropdown(
1013
  label="TTS Model",
1014
  choices=[
1015
- "microsoft/speecht5_tts",
1016
  "facebook/mms-tts-eng",
1017
- "suno/bark",
1018
- "parler-tts/parler-tts-mini-v1"
1019
  ],
1020
- value=config.get('tts_model', 'microsoft/speecht5_tts'),
1021
  allow_custom_value=True
1022
  )
1023
  edit_tts_voice = gr.Dropdown(
 
27
  'model': 'google/gemma-3-27b-it',
28
  'api_key_var': 'API_KEY',
29
  'theme': 'Default',
30
+ 'grounding_urls': ["https://www.pnac.org/wp-content/uploads/Italian-Study-Guide.pdf"],
31
  'enable_dynamic_urls': True,
32
  'enable_file_upload': True,
33
  'examples': ['Ciao! Come stai oggi?', 'Mi piace giocare a calcio. E tu?', 'Cosa mangi di solito a colazione?', 'A che ora ti svegli la mattina?', 'Qual è il tuo sport preferito?'],
34
  'language': 'Italian',
35
  'enable_tts': True,
36
+ 'tts_model': 'facebook/fastspeech2-en-ljspeech',
37
  'tts_voice': 'default',
38
  'locked': False
39
  }
 
544
  # Limit text length for TTS
545
  text = text[:500]
546
 
547
+ # Prepare payload - most models just need the text
548
+ payload = {"inputs": text}
 
 
 
 
 
 
 
 
 
 
 
 
 
549
 
550
  for attempt in range(max_retries):
551
  try:
552
  headers = {"Authorization": f"Bearer {hf_token}"}
553
  api_url = f"https://api-inference.huggingface.co/models/{TTS_MODEL}"
554
 
 
 
 
 
 
555
 
556
  response = requests.post(
557
  api_url,
 
994
  edit_tts_model = gr.Dropdown(
995
  label="TTS Model",
996
  choices=[
997
+ "facebook/fastspeech2-en-ljspeech",
998
  "facebook/mms-tts-eng",
999
+ "espnet/kan-bayashi_ljspeech_vits",
1000
+ "microsoft/speecht5_tts"
1001
  ],
1002
+ value=config.get('tts_model', 'facebook/fastspeech2-en-ljspeech'),
1003
  allow_custom_value=True
1004
  )
1005
  edit_tts_voice = gr.Dropdown(
config.json CHANGED
@@ -15,11 +15,13 @@
15
  "A che ora ti svegli la mattina?",
16
  "Qual \u00e8 il tuo sport preferito?"
17
  ],
18
- "grounding_urls": [],
 
 
19
  "enable_dynamic_urls": true,
20
  "enable_file_upload": true,
21
  "enable_tts": true,
22
- "tts_model": "microsoft/speecht5_tts",
23
  "tts_voice": "default",
24
  "theme": "Default"
25
  }
 
15
  "A che ora ti svegli la mattina?",
16
  "Qual \u00e8 il tuo sport preferito?"
17
  ],
18
+ "grounding_urls": [
19
+ "https://www.pnac.org/wp-content/uploads/Italian-Study-Guide.pdf"
20
+ ],
21
  "enable_dynamic_urls": true,
22
  "enable_file_upload": true,
23
  "enable_tts": true,
24
+ "tts_model": "facebook/fastspeech2-en-ljspeech",
25
  "tts_voice": "default",
26
  "theme": "Default"
27
  }