Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- app.py +54 -40
- config.json +3 -3
app.py
CHANGED
@@ -26,15 +26,15 @@ DEFAULT_CONFIG = {
|
|
26 |
'max_tokens': 250,
|
27 |
'model': 'google/gemma-3-27b-it',
|
28 |
'api_key_var': 'API_KEY',
|
29 |
-
'theme': '
|
30 |
'grounding_urls': ["https://www.pnac.org/wp-content/uploads/Italian-Study-Guide.pdf"],
|
31 |
'enable_dynamic_urls': True,
|
32 |
'enable_file_upload': True,
|
33 |
'examples': ['Ciao! Come stai oggi?', 'Mi piace giocare a calcio. E tu?', 'Cosa mangi di solito a colazione?', 'A che ora ti svegli la mattina?', 'Qual è il tuo sport preferito?'],
|
34 |
'language': 'Italian',
|
35 |
'enable_tts': True,
|
36 |
-
'tts_model': '
|
37 |
-
'tts_voice': '
|
38 |
'locked': False
|
39 |
}
|
40 |
|
@@ -533,51 +533,66 @@ def verify_hf_token_access() -> Tuple[bool, str]:
|
|
533 |
|
534 |
|
535 |
def generate_tts(text: str, max_retries: int = 2) -> Tuple[Optional[Tuple[int, np.ndarray]], str]:
|
536 |
-
"""Generate TTS audio using
|
537 |
if not ENABLE_TTS or not text:
|
538 |
return None, "TTS disabled or no text provided"
|
539 |
|
540 |
-
|
541 |
-
if not
|
542 |
-
return None, "⚠️
|
543 |
|
544 |
# Limit text length for TTS
|
545 |
-
text = text[:
|
546 |
|
547 |
-
#
|
548 |
-
|
|
|
549 |
|
550 |
for attempt in range(max_retries):
|
551 |
try:
|
552 |
-
headers = {
|
553 |
-
|
|
|
|
|
|
|
|
|
554 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
555 |
|
556 |
response = requests.post(
|
557 |
api_url,
|
558 |
headers=headers,
|
559 |
json=payload,
|
560 |
-
timeout=
|
561 |
)
|
562 |
|
563 |
if response.status_code == 200:
|
564 |
-
#
|
565 |
-
|
566 |
-
|
567 |
-
|
568 |
-
|
569 |
-
|
570 |
-
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
return None, "
|
577 |
|
578 |
else:
|
579 |
try:
|
580 |
-
error_msg = response.json().get('error', 'Unknown error')
|
581 |
except:
|
582 |
error_msg = response.text if response.text else 'Unknown error'
|
583 |
return None, f"❌ API Error ({response.status_code}): {error_msg}"
|
@@ -712,11 +727,11 @@ def create_interface():
|
|
712 |
if not last_message:
|
713 |
return None, gr.update(visible=False), gr.update(value="⚠️ No message to read", visible=True)
|
714 |
|
715 |
-
|
716 |
|
717 |
-
if
|
718 |
return (
|
719 |
-
|
720 |
gr.update(visible=True),
|
721 |
gr.update(value=status_msg, visible=True)
|
722 |
)
|
@@ -992,20 +1007,19 @@ def create_interface():
|
|
992 |
info="Enable text-to-speech for assistant responses"
|
993 |
)
|
994 |
edit_tts_model = gr.Dropdown(
|
995 |
-
label="TTS Model",
|
996 |
choices=[
|
997 |
-
"
|
998 |
-
"
|
999 |
-
"espnet/kan-bayashi_ljspeech_vits",
|
1000 |
-
"microsoft/speecht5_tts"
|
1001 |
],
|
1002 |
-
value=config.get('tts_model', '
|
1003 |
allow_custom_value=True
|
1004 |
)
|
1005 |
edit_tts_voice = gr.Dropdown(
|
1006 |
label="Voice",
|
1007 |
-
choices=["
|
1008 |
-
value=config.get('tts_voice', '
|
|
|
1009 |
)
|
1010 |
|
1011 |
# Configuration actions
|
@@ -1095,8 +1109,8 @@ def create_interface():
|
|
1095 |
DEFAULT_CONFIG['enable_dynamic_urls'],
|
1096 |
DEFAULT_CONFIG['enable_file_upload'],
|
1097 |
DEFAULT_CONFIG.get('enable_tts', False),
|
1098 |
-
DEFAULT_CONFIG.get('tts_model', '
|
1099 |
-
DEFAULT_CONFIG.get('tts_voice', '
|
1100 |
"✅ Reset to default configuration"
|
1101 |
)
|
1102 |
else:
|
|
|
26 |
'max_tokens': 250,
|
27 |
'model': 'google/gemma-3-27b-it',
|
28 |
'api_key_var': 'API_KEY',
|
29 |
+
'theme': 'Base',
|
30 |
'grounding_urls': ["https://www.pnac.org/wp-content/uploads/Italian-Study-Guide.pdf"],
|
31 |
'enable_dynamic_urls': True,
|
32 |
'enable_file_upload': True,
|
33 |
'examples': ['Ciao! Come stai oggi?', 'Mi piace giocare a calcio. E tu?', 'Cosa mangi di solito a colazione?', 'A che ora ti svegli la mattina?', 'Qual è il tuo sport preferito?'],
|
34 |
'language': 'Italian',
|
35 |
'enable_tts': True,
|
36 |
+
'tts_model': 'openai/tts-1-hd',
|
37 |
+
'tts_voice': 'onyx',
|
38 |
'locked': False
|
39 |
}
|
40 |
|
|
|
533 |
|
534 |
|
535 |
def generate_tts(text: str, max_retries: int = 2) -> Tuple[Optional[Tuple[int, np.ndarray]], str]:
|
536 |
+
"""Generate TTS audio using OpenAI's TTS API through OpenRouter"""
|
537 |
if not ENABLE_TTS or not text:
|
538 |
return None, "TTS disabled or no text provided"
|
539 |
|
540 |
+
api_key = os.getenv(API_KEY_VAR)
|
541 |
+
if not api_key:
|
542 |
+
return None, f"⚠️ {API_KEY_VAR} not configured for TTS"
|
543 |
|
544 |
# Limit text length for TTS
|
545 |
+
text = text[:1000] # OpenAI supports up to 4096 chars but let's be reasonable
|
546 |
|
547 |
+
# OpenAI TTS models and voices
|
548 |
+
model = TTS_MODEL if TTS_MODEL.startswith("openai/") else "openai/tts-1"
|
549 |
+
voice = TTS_VOICE if TTS_VOICE in ["alloy", "echo", "fable", "onyx", "nova", "shimmer"] else "alloy"
|
550 |
|
551 |
for attempt in range(max_retries):
|
552 |
try:
|
553 |
+
headers = {
|
554 |
+
"Authorization": f"Bearer {api_key}",
|
555 |
+
"HTTP-Referer": "https://huggingface.co",
|
556 |
+
"X-Title": SPACE_NAME,
|
557 |
+
"Content-Type": "application/json"
|
558 |
+
}
|
559 |
|
560 |
+
# OpenRouter endpoint for OpenAI TTS
|
561 |
+
api_url = "https://openrouter.ai/api/v1/audio/speech"
|
562 |
+
|
563 |
+
payload = {
|
564 |
+
"model": model,
|
565 |
+
"input": text,
|
566 |
+
"voice": voice,
|
567 |
+
"response_format": "mp3", # Can be mp3, opus, aac, flac
|
568 |
+
"speed": 1.0 # 0.25 to 4.0
|
569 |
+
}
|
570 |
|
571 |
response = requests.post(
|
572 |
api_url,
|
573 |
headers=headers,
|
574 |
json=payload,
|
575 |
+
timeout=30
|
576 |
)
|
577 |
|
578 |
if response.status_code == 200:
|
579 |
+
# OpenAI returns MP3 audio data
|
580 |
+
# Convert to format Gradio expects
|
581 |
+
try:
|
582 |
+
# Save temporarily and load with a library that can read MP3
|
583 |
+
import tempfile
|
584 |
+
with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as tmp_file:
|
585 |
+
tmp_file.write(response.content)
|
586 |
+
tmp_path = tmp_file.name
|
587 |
+
|
588 |
+
# For now, return the file path - Gradio can handle MP3 files
|
589 |
+
return tmp_path, "✅ Audio generated successfully"
|
590 |
+
except Exception as e:
|
591 |
+
return None, f"❌ Error processing audio: {str(e)}"
|
592 |
|
593 |
else:
|
594 |
try:
|
595 |
+
error_msg = response.json().get('error', {}).get('message', 'Unknown error')
|
596 |
except:
|
597 |
error_msg = response.text if response.text else 'Unknown error'
|
598 |
return None, f"❌ API Error ({response.status_code}): {error_msg}"
|
|
|
727 |
if not last_message:
|
728 |
return None, gr.update(visible=False), gr.update(value="⚠️ No message to read", visible=True)
|
729 |
|
730 |
+
audio_file, status_msg = generate_tts(last_message)
|
731 |
|
732 |
+
if audio_file:
|
733 |
return (
|
734 |
+
audio_file, # File path for Gradio to play
|
735 |
gr.update(visible=True),
|
736 |
gr.update(value=status_msg, visible=True)
|
737 |
)
|
|
|
1007 |
info="Enable text-to-speech for assistant responses"
|
1008 |
)
|
1009 |
edit_tts_model = gr.Dropdown(
|
1010 |
+
label="TTS Model",
|
1011 |
choices=[
|
1012 |
+
"openai/tts-1",
|
1013 |
+
"openai/tts-1-hd"
|
|
|
|
|
1014 |
],
|
1015 |
+
value=config.get('tts_model', 'openai/tts-1'),
|
1016 |
allow_custom_value=True
|
1017 |
)
|
1018 |
edit_tts_voice = gr.Dropdown(
|
1019 |
label="Voice",
|
1020 |
+
choices=["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
|
1021 |
+
value=config.get('tts_voice', 'alloy'),
|
1022 |
+
info="alloy: neutral, echo: male, fable: british male, onyx: deep male, nova: female, shimmer: female"
|
1023 |
)
|
1024 |
|
1025 |
# Configuration actions
|
|
|
1109 |
DEFAULT_CONFIG['enable_dynamic_urls'],
|
1110 |
DEFAULT_CONFIG['enable_file_upload'],
|
1111 |
DEFAULT_CONFIG.get('enable_tts', False),
|
1112 |
+
DEFAULT_CONFIG.get('tts_model', 'openai/tts-1'),
|
1113 |
+
DEFAULT_CONFIG.get('tts_voice', 'alloy'),
|
1114 |
"✅ Reset to default configuration"
|
1115 |
)
|
1116 |
else:
|
config.json
CHANGED
@@ -21,7 +21,7 @@
|
|
21 |
"enable_dynamic_urls": true,
|
22 |
"enable_file_upload": true,
|
23 |
"enable_tts": true,
|
24 |
-
"tts_model": "
|
25 |
-
"tts_voice": "
|
26 |
-
"theme": "
|
27 |
}
|
|
|
21 |
"enable_dynamic_urls": true,
|
22 |
"enable_file_upload": true,
|
23 |
"enable_tts": true,
|
24 |
+
"tts_model": "openai/tts-1-hd",
|
25 |
+
"tts_voice": "onyx",
|
26 |
+
"theme": "Base"
|
27 |
}
|