Spaces:

milwright
/

speech-buddy

Sleeping

App Files Files Community

milwright commited on 5 days ago

Commit

685ec96

verified ·

1 Parent(s): 2850f05

Upload 4 files

Browse files

Files changed (4) hide show

README.md +1 -1
app.py +174 -6
config.json +4 -1
requirements.txt +2 -1

README.md CHANGED Viewed

@@ -42,7 +42,7 @@ AI Italian conversation partner
 Your Space should now be running! Try the example prompts or ask your own questions.
 ## Configuration
-- **Model**: openai/gpt-oss-120b
 - **API Key Variable**: API_KEY
 - **HF Token Variable**: HF_TOKEN (for auto-updates)
 - **Access Control**: Enabled (ACCESS_CODE)

 Your Space should now be running! Try the example prompts or ask your own questions.
 ## Configuration
+- **Model**: google/gemma-3-27b-it
 - **API Key Variable**: API_KEY
 - **HF Token Variable**: HF_TOKEN (for auto-updates)
 - **Access Control**: Enabled (ACCESS_CODE)

app.py CHANGED Viewed

@@ -9,6 +9,8 @@ from datetime import datetime
 import urllib.parse
 from pathlib import Path
 from typing import List, Dict, Optional, Any, Tuple
 # Configuration
@@ -22,7 +24,7 @@ DEFAULT_CONFIG = {
     'system_prompt': "You are Domenico from Sicily, a Juventus football fan, native Italian speaker serving as a conversational partner for university students in an Italian 101 class. Students will interact and converse with you in Italian, and you must respond EXCLUSIVELY IN ITALIAN without providing English translations, using vocabulary appropriate for beginner-level Italian 101 students. Focus your responses on topics suitable for beginners such as sports, daily life, routines, food, numbers, and hobbies. When students make errors, model the correct forms naturally in your response without explicitly pointing out mistakes, allowing them to learn through exposure to proper usage. Recognize when students demonstrate more advanced abilities and adjust your language complexity accordingly, while ensuring your Italian remains error-free. Keep all responses between 5-50 words, making sure sentences are grammatically complete. Limit all verb conjugations to the present tense only, avoiding all other verb forms and tenses. Address students using the informal second-person singular 'tu' form.",
     'temperature': 0.5,
     'max_tokens': 250,
-    'model': 'openai/gpt-oss-120b',
     'api_key_var': 'API_KEY',
     'theme': 'Default',
     'grounding_urls': ["https://www.pnac.org/wp-content/uploads/Italian-Study-Guide.pdf"],
@@ -30,6 +32,9 @@ DEFAULT_CONFIG = {
     'enable_file_upload': True,
     'examples': ['Ciao! Come stai oggi?', 'Mi piace giocare a calcio. E tu?', 'Cosa mangi di solito a colazione?', 'A che ora ti svegli la mattina?', 'Qual è il tuo sport preferito?'],
     'language': 'Italian',
     'locked': False
 }
@@ -138,6 +143,9 @@ GROUNDING_URLS = config.get('grounding_urls', DEFAULT_CONFIG['grounding_urls'])
 ENABLE_DYNAMIC_URLS = config.get('enable_dynamic_urls', DEFAULT_CONFIG['enable_dynamic_urls'])
 ENABLE_FILE_UPLOAD = config.get('enable_file_upload', DEFAULT_CONFIG.get('enable_file_upload', True))
 LANGUAGE = config.get('language', DEFAULT_CONFIG.get('language', 'English'))
 # Environment variables
 ACCESS_CODE = os.environ.get("ACCESS_CODE")
@@ -518,6 +526,81 @@ def verify_hf_token_access() -> Tuple[bool, str]:
         return False, f"Error verifying HF token: {str(e)}"
 # Create main interface with clean tab structure
 def create_interface():
     """Create the Gradio interface with clean tab structure"""
@@ -613,6 +696,60 @@ def create_interface():
                         outputs=[export_btn]
                     )
                     # Examples section
                     if examples:
                         gr.Examples(examples=examples, inputs=msg)
@@ -855,6 +992,31 @@ def create_interface():
                                 info="Allow users to upload files for context"
                             )
                         # Configuration actions
                         with gr.Row():
                             save_btn = gr.Button("💾 Save Configuration", variant="primary")
@@ -862,7 +1024,7 @@ def create_interface():
                         config_status = gr.Markdown()
-                        def save_configuration(name, description, system_prompt, model, language, temp, tokens, examples, grounding_urls, enable_dynamic_urls, enable_file_upload):
                             """Save updated configuration"""
                             try:
                                 updated_config = config.copy()
@@ -878,6 +1040,9 @@ def create_interface():
                                     'grounding_urls': [url.strip() for url in grounding_urls.split('\n') if url.strip()],
                                     'enable_dynamic_urls': enable_dynamic_urls,
                                     'enable_file_upload': enable_file_upload,
                                     'locked': config.get('locked', False)
                                 })
@@ -918,7 +1083,7 @@ def create_interface():
                             save_configuration,
                             inputs=[edit_name, edit_description, edit_system_prompt, edit_model, edit_language,
                                    edit_temperature, edit_max_tokens, edit_examples, edit_grounding_urls,
-                                   edit_enable_dynamic_urls, edit_enable_file_upload],
                             outputs=[config_status]
                         )
@@ -938,18 +1103,21 @@ def create_interface():
                                         '\n'.join(DEFAULT_CONFIG['grounding_urls']),
                                         DEFAULT_CONFIG['enable_dynamic_urls'],
                                         DEFAULT_CONFIG['enable_file_upload'],
                                         "✅ Reset to default configuration"
                                     )
                                 else:
-                                    return (*[gr.update() for _ in range(11)], "❌ Failed to reset")
                             except Exception as e:
-                                return (*[gr.update() for _ in range(11)], f"❌ Error: {str(e)}")
                         reset_btn.click(
                             reset_configuration,
                             outputs=[edit_name, edit_description, edit_system_prompt, edit_model, edit_language,
                                     edit_temperature, edit_max_tokens, edit_examples, edit_grounding_urls,
-                                    edit_enable_dynamic_urls, edit_enable_file_upload, config_status]
                         )
                         # Configuration tab authentication handler

 import urllib.parse
 from pathlib import Path
 from typing import List, Dict, Optional, Any, Tuple
+import numpy as np
+import time
 # Configuration
     'system_prompt': "You are Domenico from Sicily, a Juventus football fan, native Italian speaker serving as a conversational partner for university students in an Italian 101 class. Students will interact and converse with you in Italian, and you must respond EXCLUSIVELY IN ITALIAN without providing English translations, using vocabulary appropriate for beginner-level Italian 101 students. Focus your responses on topics suitable for beginners such as sports, daily life, routines, food, numbers, and hobbies. When students make errors, model the correct forms naturally in your response without explicitly pointing out mistakes, allowing them to learn through exposure to proper usage. Recognize when students demonstrate more advanced abilities and adjust your language complexity accordingly, while ensuring your Italian remains error-free. Keep all responses between 5-50 words, making sure sentences are grammatically complete. Limit all verb conjugations to the present tense only, avoiding all other verb forms and tenses. Address students using the informal second-person singular 'tu' form.",
     'temperature': 0.5,
     'max_tokens': 250,
+    'model': 'google/gemma-3-27b-it',
     'api_key_var': 'API_KEY',
     'theme': 'Default',
     'grounding_urls': ["https://www.pnac.org/wp-content/uploads/Italian-Study-Guide.pdf"],
     'enable_file_upload': True,
     'examples': ['Ciao! Come stai oggi?', 'Mi piace giocare a calcio. E tu?', 'Cosa mangi di solito a colazione?', 'A che ora ti svegli la mattina?', 'Qual è il tuo sport preferito?'],
     'language': 'Italian',
+    'enable_tts': True,
+    'tts_model': 'microsoft/speecht5_tts',
+    'tts_voice': 'default',
     'locked': False
 }
 ENABLE_DYNAMIC_URLS = config.get('enable_dynamic_urls', DEFAULT_CONFIG['enable_dynamic_urls'])
 ENABLE_FILE_UPLOAD = config.get('enable_file_upload', DEFAULT_CONFIG.get('enable_file_upload', True))
 LANGUAGE = config.get('language', DEFAULT_CONFIG.get('language', 'English'))
+ENABLE_TTS = config.get('enable_tts', DEFAULT_CONFIG.get('enable_tts', False))
+TTS_MODEL = config.get('tts_model', DEFAULT_CONFIG.get('tts_model', 'microsoft/speecht5_tts'))
+TTS_VOICE = config.get('tts_voice', DEFAULT_CONFIG.get('tts_voice', 'default'))
 # Environment variables
 ACCESS_CODE = os.environ.get("ACCESS_CODE")
         return False, f"Error verifying HF token: {str(e)}"
+def generate_tts(text: str, max_retries: int = 2) -> Tuple[Optional[Tuple[int, np.ndarray]], str]:
+    """Generate TTS audio using HuggingFace Inference API"""
+    if not ENABLE_TTS or not text:
+        return None, "TTS disabled or no text provided"
+    hf_token = os.getenv("HF_TOKEN")
+    if not hf_token:
+        return None, "⚠️ HF_TOKEN not configured for TTS"
+    # Limit text length for TTS
+    text = text[:500]
+    # Select speaker embedding based on voice preference
+    speaker_embeddings = None
+    if TTS_MODEL == "microsoft/speecht5_tts":
+        # For SpeechT5, we need speaker embeddings
+        # Using a predefined speaker ID (7306 is a clear female voice)
+        speaker_id = {
+            "default": 7306,
+            "female": 7306,
+            "male": 5105,
+            "neutral": 6678
+        }.get(TTS_VOICE, 7306)
+        # Note: In production, you'd load actual embeddings from the dataset
+        # For now, we'll let the API handle default voice
+        speaker_embeddings = {"speaker_id": speaker_id}
+    for attempt in range(max_retries):
+        try:
+            headers = {"Authorization": f"Bearer {hf_token}"}
+            api_url = f"https://api-inference.huggingface.co/models/{TTS_MODEL}"
+            # Prepare payload
+            payload = {"inputs": text}
+            if speaker_embeddings and TTS_MODEL == "microsoft/speecht5_tts":
+                # For models that support speaker embeddings
+                payload["parameters"] = speaker_embeddings
+            response = requests.post(
+                api_url,
+                headers=headers,
+                json=payload,
+                timeout=20
+            )
+            if response.status_code == 200:
+                # Convert audio bytes to numpy array
+                audio_array = np.frombuffer(response.content, dtype=np.int16)
+                # Most TTS models output at 16kHz
+                sample_rate = 16000
+                return (sample_rate, audio_array), "✅ Audio generated successfully"
+            elif response.status_code == 503:
+                # Model is loading
+                if attempt < max_retries - 1:
+                    time.sleep(20)  # Wait for model to load
+                    continue
+                else:
+                    return None, "⏳ Model is loading, please try again in a moment"
+            else:
+                error_msg = response.json().get('error', 'Unknown error')
+                return None, f"❌ API Error ({response.status_code}): {error_msg}"
+        except requests.exceptions.Timeout:
+            return None, "⏰ TTS request timeout"
+        except Exception as e:
+            if attempt == max_retries - 1:
+                return None, f"❌ TTS Error: {str(e)}"
+            time.sleep(2)
+    return None, "❌ Max retries exceeded"
 # Create main interface with clean tab structure
 def create_interface():
     """Create the Gradio interface with clean tab structure"""
                         outputs=[export_btn]
                     )
+                    # TTS functionality
+                    if ENABLE_TTS:
+                        with gr.Row():
+                            tts_btn = gr.Button("🔊 Read Last Response", variant="secondary", size="sm")
+                            audio_output = gr.Audio(label="TTS Output", visible=False, autoplay=True)
+                        tts_status = gr.Markdown("", visible=False)
+                        last_assistant_message = gr.State("")
+                        def update_last_message(chat_history):
+                            """Extract the last assistant message from chat history"""
+                            if not chat_history:
+                                return ""
+                            # Find the last assistant message
+                            for message in reversed(chat_history):
+                                if isinstance(message, dict) and message.get('role') == 'assistant':
+                                    return message.get('content', '')
+                            return ""
+                        def handle_tts_click(last_message):
+                            """Handle TTS button click"""
+                            if not last_message:
+                                return None, gr.update(visible=False), gr.update(value="⚠️ No message to read", visible=True)
+                            audio_data, status_msg = generate_tts(last_message)
+                            if audio_data:
+                                return (
+                                    audio_data,
+                                    gr.update(visible=True),
+                                    gr.update(value=status_msg, visible=True)
+                                )
+                            else:
+                                return (
+                                    None,
+                                    gr.update(visible=False),
+                                    gr.update(value=status_msg, visible=True)
+                                )
+                        # Update last message whenever chat updates
+                        chatbot.change(
+                            update_last_message,
+                            inputs=[chatbot],
+                            outputs=[last_assistant_message]
+                        )
+                        # Handle TTS button click
+                        tts_btn.click(
+                            handle_tts_click,
+                            inputs=[last_assistant_message],
+                            outputs=[audio_output, audio_output, tts_status]
+                        )
                     # Examples section
                     if examples:
                         gr.Examples(examples=examples, inputs=msg)
                                 info="Allow users to upload files for context"
                             )
+                        # TTS Configuration
+                        gr.Markdown("### 🔊 Text-to-Speech")
+                        with gr.Row():
+                            edit_enable_tts = gr.Checkbox(
+                                label="Enable TTS",
+                                value=config.get('enable_tts', False),
+                                info="Enable text-to-speech for assistant responses"
+                            )
+                            edit_tts_model = gr.Dropdown(
+                                label="TTS Model",
+                                choices=[
+                                    "microsoft/speecht5_tts",
+                                    "facebook/mms-tts-eng",
+                                    "suno/bark",
+                                    "parler-tts/parler-tts-mini-v1"
+                                ],
+                                value=config.get('tts_model', 'microsoft/speecht5_tts'),
+                                allow_custom_value=True
+                            )
+                            edit_tts_voice = gr.Dropdown(
+                                label="Voice",
+                                choices=["default", "female", "male", "neutral"],
+                                value=config.get('tts_voice', 'default')
+                            )
                         # Configuration actions
                         with gr.Row():
                             save_btn = gr.Button("💾 Save Configuration", variant="primary")
                         config_status = gr.Markdown()
+                        def save_configuration(name, description, system_prompt, model, language, temp, tokens, examples, grounding_urls, enable_dynamic_urls, enable_file_upload, enable_tts, tts_model, tts_voice):
                             """Save updated configuration"""
                             try:
                                 updated_config = config.copy()
                                     'grounding_urls': [url.strip() for url in grounding_urls.split('\n') if url.strip()],
                                     'enable_dynamic_urls': enable_dynamic_urls,
                                     'enable_file_upload': enable_file_upload,
+                                    'enable_tts': enable_tts,
+                                    'tts_model': tts_model,
+                                    'tts_voice': tts_voice,
                                     'locked': config.get('locked', False)
                                 })
                             save_configuration,
                             inputs=[edit_name, edit_description, edit_system_prompt, edit_model, edit_language,
                                    edit_temperature, edit_max_tokens, edit_examples, edit_grounding_urls,
+                                   edit_enable_dynamic_urls, edit_enable_file_upload, edit_enable_tts, edit_tts_model, edit_tts_voice],
                             outputs=[config_status]
                         )
                                         '\n'.join(DEFAULT_CONFIG['grounding_urls']),
                                         DEFAULT_CONFIG['enable_dynamic_urls'],
                                         DEFAULT_CONFIG['enable_file_upload'],
+                                        DEFAULT_CONFIG.get('enable_tts', False),
+                                        DEFAULT_CONFIG.get('tts_model', 'microsoft/speecht5_tts'),
+                                        DEFAULT_CONFIG.get('tts_voice', 'default'),
                                         "✅ Reset to default configuration"
                                     )
                                 else:
+                                    return (*[gr.update() for _ in range(14)], "❌ Failed to reset")
                             except Exception as e:
+                                return (*[gr.update() for _ in range(14)], f"❌ Error: {str(e)}")
                         reset_btn.click(
                             reset_configuration,
                             outputs=[edit_name, edit_description, edit_system_prompt, edit_model, edit_language,
                                     edit_temperature, edit_max_tokens, edit_examples, edit_grounding_urls,
+                                    edit_enable_dynamic_urls, edit_enable_file_upload, edit_enable_tts, edit_tts_model, edit_tts_voice, config_status]
                         )
                         # Configuration tab authentication handler

config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "tagline": "AI Italian conversation partner",
   "description": "AI Italian conversation partner",
   "system_prompt": "You are Domenico from Sicily, a Juventus football fan, native Italian speaker serving as a conversational partner for university students in an Italian 101 class. Students will interact and converse with you in Italian, and you must respond EXCLUSIVELY IN ITALIAN without providing English translations, using vocabulary appropriate for beginner-level Italian 101 students. Focus your responses on topics suitable for beginners such as sports, daily life, routines, food, numbers, and hobbies. When students make errors, model the correct forms naturally in your response without explicitly pointing out mistakes, allowing them to learn through exposure to proper usage. Recognize when students demonstrate more advanced abilities and adjust your language complexity accordingly, while ensuring your Italian remains error-free. Keep all responses between 5-50 words, making sure sentences are grammatically complete. Limit all verb conjugations to the present tense only, avoiding all other verb forms and tenses. Address students using the informal second-person singular 'tu' form.",
-  "model": "openai/gpt-oss-120b",
   "language": "Italian",
   "api_key_var": "API_KEY",
   "temperature": 0.5,
@@ -20,5 +20,8 @@
   ],
   "enable_dynamic_urls": true,
   "enable_file_upload": true,
   "theme": "Default"
 }

   "tagline": "AI Italian conversation partner",
   "description": "AI Italian conversation partner",
   "system_prompt": "You are Domenico from Sicily, a Juventus football fan, native Italian speaker serving as a conversational partner for university students in an Italian 101 class. Students will interact and converse with you in Italian, and you must respond EXCLUSIVELY IN ITALIAN without providing English translations, using vocabulary appropriate for beginner-level Italian 101 students. Focus your responses on topics suitable for beginners such as sports, daily life, routines, food, numbers, and hobbies. When students make errors, model the correct forms naturally in your response without explicitly pointing out mistakes, allowing them to learn through exposure to proper usage. Recognize when students demonstrate more advanced abilities and adjust your language complexity accordingly, while ensuring your Italian remains error-free. Keep all responses between 5-50 words, making sure sentences are grammatically complete. Limit all verb conjugations to the present tense only, avoiding all other verb forms and tenses. Address students using the informal second-person singular 'tu' form.",
+  "model": "google/gemma-3-27b-it",
   "language": "Italian",
   "api_key_var": "API_KEY",
   "temperature": 0.5,
   ],
   "enable_dynamic_urls": true,
   "enable_file_upload": true,
+  "enable_tts": true,
+  "tts_model": "microsoft/speecht5_tts",
+  "tts_voice": "default",
   "theme": "Default"
 }

requirements.txt CHANGED Viewed

@@ -2,4 +2,5 @@ gradio>=5.39.0
 requests>=2.32.3
 beautifulsoup4>=4.12.3
 python-dotenv>=1.0.0
-huggingface-hub>=0.20.0

 requests>=2.32.3
 beautifulsoup4>=4.12.3
 python-dotenv>=1.0.0
+huggingface-hub>=0.20.0
+numpy>=1.24.0