milwright commited on
Commit
faebaa0
·
verified ·
1 Parent(s): 71a2826

Upload 4 files

Browse files
Files changed (3) hide show
  1. app.py +188 -11
  2. config.json +4 -8
  3. requirements.txt +2 -2
app.py CHANGED
@@ -9,6 +9,8 @@ from datetime import datetime
9
  import urllib.parse
10
  from pathlib import Path
11
  from typing import List, Dict, Optional, Any, Tuple
 
 
12
 
13
 
14
  # Configuration
@@ -22,14 +24,17 @@ DEFAULT_CONFIG = {
22
  'system_prompt': "You are Domenico from Sicily, a Juventus football fan, native Italian speaker serving as a conversational partner for university students in an Italian 101 class. Students will interact and converse with you in Italian, and you must respond EXCLUSIVELY IN ITALIAN without providing English translations, using vocabulary appropriate for beginner-level Italian 101 students. Focus your responses on topics suitable for beginners such as sports, daily life, routines, food, numbers, and hobbies. When students make errors, model the correct forms naturally in your response without explicitly pointing out mistakes, allowing them to learn through exposure to proper usage. Recognize when students demonstrate more advanced abilities and adjust your language complexity accordingly, while ensuring your Italian remains error-free. Keep all responses between 5-50 words, making sure sentences are grammatically complete. Limit all verb conjugations to the present tense only, avoiding all other verb forms and tenses. Address students using the informal second-person singular 'tu' form.",
23
  'temperature': 0.5,
24
  'max_tokens': 250,
25
- 'model': 'openai/gpt-oss-120b',
26
  'api_key_var': 'API_KEY',
27
  'theme': 'Default',
28
- 'grounding_urls': ["https://www.pnac.org/wp-content/uploads/Italian-Study-Guide.pdf"],
29
  'enable_dynamic_urls': True,
30
  'enable_file_upload': True,
31
  'examples': ['Ciao! Come stai oggi?', 'Mi piace giocare a calcio. E tu?', 'Cosa mangi di solito a colazione?', 'A che ora ti svegli la mattina?', 'Qual è il tuo sport preferito?'],
32
  'language': 'Italian',
 
 
 
33
  'locked': False
34
  }
35
 
@@ -138,6 +143,9 @@ GROUNDING_URLS = config.get('grounding_urls', DEFAULT_CONFIG['grounding_urls'])
138
  ENABLE_DYNAMIC_URLS = config.get('enable_dynamic_urls', DEFAULT_CONFIG['enable_dynamic_urls'])
139
  ENABLE_FILE_UPLOAD = config.get('enable_file_upload', DEFAULT_CONFIG.get('enable_file_upload', True))
140
  LANGUAGE = config.get('language', DEFAULT_CONFIG.get('language', 'English'))
 
 
 
141
 
142
  # Environment variables
143
  ACCESS_CODE = os.environ.get("ACCESS_CODE")
@@ -470,8 +478,11 @@ Get your API key at: https://openrouter.ai/keys"""
470
  )
471
 
472
  if response.status_code == 200:
473
- result = response.json()
474
- ai_response = result['choices'][0]['message']['content']
 
 
 
475
 
476
  # Add file notification if files were uploaded
477
  if file_notification:
@@ -479,8 +490,11 @@ Get your API key at: https://openrouter.ai/keys"""
479
 
480
  return ai_response
481
  else:
482
- error_data = response.json()
483
- error_message = error_data.get('error', {}).get('message', 'Unknown error')
 
 
 
484
  return f"❌ API Error ({response.status_code}): {error_message}"
485
 
486
  except requests.exceptions.Timeout:
@@ -518,6 +532,84 @@ def verify_hf_token_access() -> Tuple[bool, str]:
518
  return False, f"Error verifying HF token: {str(e)}"
519
 
520
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
521
  # Create main interface with clean tab structure
522
  def create_interface():
523
  """Create the Gradio interface with clean tab structure"""
@@ -613,6 +705,60 @@ def create_interface():
613
  outputs=[export_btn]
614
  )
615
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
616
  # Examples section
617
  if examples:
618
  gr.Examples(examples=examples, inputs=msg)
@@ -855,6 +1001,31 @@ def create_interface():
855
  info="Allow users to upload files for context"
856
  )
857
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
858
  # Configuration actions
859
  with gr.Row():
860
  save_btn = gr.Button("💾 Save Configuration", variant="primary")
@@ -862,7 +1033,7 @@ def create_interface():
862
 
863
  config_status = gr.Markdown()
864
 
865
- def save_configuration(name, description, system_prompt, model, language, temp, tokens, examples, grounding_urls, enable_dynamic_urls, enable_file_upload):
866
  """Save updated configuration"""
867
  try:
868
  updated_config = config.copy()
@@ -878,6 +1049,9 @@ def create_interface():
878
  'grounding_urls': [url.strip() for url in grounding_urls.split('\n') if url.strip()],
879
  'enable_dynamic_urls': enable_dynamic_urls,
880
  'enable_file_upload': enable_file_upload,
 
 
 
881
  'locked': config.get('locked', False)
882
  })
883
 
@@ -918,7 +1092,7 @@ def create_interface():
918
  save_configuration,
919
  inputs=[edit_name, edit_description, edit_system_prompt, edit_model, edit_language,
920
  edit_temperature, edit_max_tokens, edit_examples, edit_grounding_urls,
921
- edit_enable_dynamic_urls, edit_enable_file_upload],
922
  outputs=[config_status]
923
  )
924
 
@@ -938,18 +1112,21 @@ def create_interface():
938
  '\n'.join(DEFAULT_CONFIG['grounding_urls']),
939
  DEFAULT_CONFIG['enable_dynamic_urls'],
940
  DEFAULT_CONFIG['enable_file_upload'],
 
 
 
941
  "✅ Reset to default configuration"
942
  )
943
  else:
944
- return (*[gr.update() for _ in range(11)], "❌ Failed to reset")
945
  except Exception as e:
946
- return (*[gr.update() for _ in range(11)], f"❌ Error: {str(e)}")
947
 
948
  reset_btn.click(
949
  reset_configuration,
950
  outputs=[edit_name, edit_description, edit_system_prompt, edit_model, edit_language,
951
  edit_temperature, edit_max_tokens, edit_examples, edit_grounding_urls,
952
- edit_enable_dynamic_urls, edit_enable_file_upload, config_status]
953
  )
954
 
955
  # Configuration tab authentication handler
 
9
  import urllib.parse
10
  from pathlib import Path
11
  from typing import List, Dict, Optional, Any, Tuple
12
+ import numpy as np
13
+ import time
14
 
15
 
16
  # Configuration
 
24
  'system_prompt': "You are Domenico from Sicily, a Juventus football fan, native Italian speaker serving as a conversational partner for university students in an Italian 101 class. Students will interact and converse with you in Italian, and you must respond EXCLUSIVELY IN ITALIAN without providing English translations, using vocabulary appropriate for beginner-level Italian 101 students. Focus your responses on topics suitable for beginners such as sports, daily life, routines, food, numbers, and hobbies. When students make errors, model the correct forms naturally in your response without explicitly pointing out mistakes, allowing them to learn through exposure to proper usage. Recognize when students demonstrate more advanced abilities and adjust your language complexity accordingly, while ensuring your Italian remains error-free. Keep all responses between 5-50 words, making sure sentences are grammatically complete. Limit all verb conjugations to the present tense only, avoiding all other verb forms and tenses. Address students using the informal second-person singular 'tu' form.",
25
  'temperature': 0.5,
26
  'max_tokens': 250,
27
+ 'model': 'google/gemma-3-27b-it',
28
  'api_key_var': 'API_KEY',
29
  'theme': 'Default',
30
+ 'grounding_urls': [],
31
  'enable_dynamic_urls': True,
32
  'enable_file_upload': True,
33
  'examples': ['Ciao! Come stai oggi?', 'Mi piace giocare a calcio. E tu?', 'Cosa mangi di solito a colazione?', 'A che ora ti svegli la mattina?', 'Qual è il tuo sport preferito?'],
34
  'language': 'Italian',
35
+ 'enable_tts': True,
36
+ 'tts_model': 'microsoft/speecht5_tts',
37
+ 'tts_voice': 'default',
38
  'locked': False
39
  }
40
 
 
143
  ENABLE_DYNAMIC_URLS = config.get('enable_dynamic_urls', DEFAULT_CONFIG['enable_dynamic_urls'])
144
  ENABLE_FILE_UPLOAD = config.get('enable_file_upload', DEFAULT_CONFIG.get('enable_file_upload', True))
145
  LANGUAGE = config.get('language', DEFAULT_CONFIG.get('language', 'English'))
146
+ ENABLE_TTS = config.get('enable_tts', DEFAULT_CONFIG.get('enable_tts', False))
147
+ TTS_MODEL = config.get('tts_model', DEFAULT_CONFIG.get('tts_model', 'microsoft/speecht5_tts'))
148
+ TTS_VOICE = config.get('tts_voice', DEFAULT_CONFIG.get('tts_voice', 'default'))
149
 
150
  # Environment variables
151
  ACCESS_CODE = os.environ.get("ACCESS_CODE")
 
478
  )
479
 
480
  if response.status_code == 200:
481
+ try:
482
+ result = response.json()
483
+ ai_response = result['choices'][0]['message']['content']
484
+ except (json.JSONDecodeError, KeyError) as e:
485
+ return f"❌ Error parsing API response: {str(e)}"
486
 
487
  # Add file notification if files were uploaded
488
  if file_notification:
 
490
 
491
  return ai_response
492
  else:
493
+ try:
494
+ error_data = response.json()
495
+ error_message = error_data.get('error', {}).get('message', 'Unknown error')
496
+ except:
497
+ error_message = response.text if response.text else 'Unknown error'
498
  return f"❌ API Error ({response.status_code}): {error_message}"
499
 
500
  except requests.exceptions.Timeout:
 
532
  return False, f"Error verifying HF token: {str(e)}"
533
 
534
 
535
+ def generate_tts(text: str, max_retries: int = 2) -> Tuple[Optional[Tuple[int, np.ndarray]], str]:
536
+ """Generate TTS audio using HuggingFace Inference API"""
537
+ if not ENABLE_TTS or not text:
538
+ return None, "TTS disabled or no text provided"
539
+
540
+ hf_token = os.getenv("HF_TOKEN")
541
+ if not hf_token:
542
+ return None, "⚠️ HF_TOKEN not configured for TTS"
543
+
544
+ # Limit text length for TTS
545
+ text = text[:500]
546
+
547
+ # Select speaker embedding based on voice preference
548
+ speaker_embeddings = None
549
+ if TTS_MODEL == "microsoft/speecht5_tts":
550
+ # For SpeechT5, we need speaker embeddings
551
+ # Using a predefined speaker ID (7306 is a clear female voice)
552
+ speaker_id = {
553
+ "default": 7306,
554
+ "female": 7306,
555
+ "male": 5105,
556
+ "neutral": 6678
557
+ }.get(TTS_VOICE, 7306)
558
+
559
+ # Note: In production, you'd load actual embeddings from the dataset
560
+ # For now, we'll let the API handle default voice
561
+ speaker_embeddings = {"speaker_id": speaker_id}
562
+
563
+ for attempt in range(max_retries):
564
+ try:
565
+ headers = {"Authorization": f"Bearer {hf_token}"}
566
+ api_url = f"https://api-inference.huggingface.co/models/{TTS_MODEL}"
567
+
568
+ # Prepare payload
569
+ payload = {"inputs": text}
570
+ if speaker_embeddings and TTS_MODEL == "microsoft/speecht5_tts":
571
+ # For models that support speaker embeddings
572
+ payload["parameters"] = speaker_embeddings
573
+
574
+ response = requests.post(
575
+ api_url,
576
+ headers=headers,
577
+ json=payload,
578
+ timeout=20
579
+ )
580
+
581
+ if response.status_code == 200:
582
+ # Convert audio bytes to numpy array
583
+ audio_array = np.frombuffer(response.content, dtype=np.int16)
584
+ # Most TTS models output at 16kHz
585
+ sample_rate = 16000
586
+ return (sample_rate, audio_array), "✅ Audio generated successfully"
587
+
588
+ elif response.status_code == 503:
589
+ # Model is loading
590
+ if attempt < max_retries - 1:
591
+ time.sleep(20) # Wait for model to load
592
+ continue
593
+ else:
594
+ return None, "⏳ Model is loading, please try again in a moment"
595
+
596
+ else:
597
+ try:
598
+ error_msg = response.json().get('error', 'Unknown error')
599
+ except:
600
+ error_msg = response.text if response.text else 'Unknown error'
601
+ return None, f"❌ API Error ({response.status_code}): {error_msg}"
602
+
603
+ except requests.exceptions.Timeout:
604
+ return None, "⏰ TTS request timeout"
605
+ except Exception as e:
606
+ if attempt == max_retries - 1:
607
+ return None, f"❌ TTS Error: {str(e)}"
608
+ time.sleep(2)
609
+
610
+ return None, "❌ Max retries exceeded"
611
+
612
+
613
  # Create main interface with clean tab structure
614
  def create_interface():
615
  """Create the Gradio interface with clean tab structure"""
 
705
  outputs=[export_btn]
706
  )
707
 
708
+ # TTS functionality
709
+ if ENABLE_TTS:
710
+ with gr.Row():
711
+ tts_btn = gr.Button("🔊 Read Last Response", variant="secondary", size="sm")
712
+ audio_output = gr.Audio(label="TTS Output", visible=False, autoplay=True)
713
+
714
+ tts_status = gr.Markdown("", visible=False)
715
+ last_assistant_message = gr.State("")
716
+
717
+ def update_last_message(chat_history):
718
+ """Extract the last assistant message from chat history"""
719
+ if not chat_history:
720
+ return ""
721
+
722
+ # Find the last assistant message
723
+ for message in reversed(chat_history):
724
+ if isinstance(message, dict) and message.get('role') == 'assistant':
725
+ return message.get('content', '')
726
+ return ""
727
+
728
+ def handle_tts_click(last_message):
729
+ """Handle TTS button click"""
730
+ if not last_message:
731
+ return None, gr.update(visible=False), gr.update(value="⚠️ No message to read", visible=True)
732
+
733
+ audio_data, status_msg = generate_tts(last_message)
734
+
735
+ if audio_data:
736
+ return (
737
+ audio_data,
738
+ gr.update(visible=True),
739
+ gr.update(value=status_msg, visible=True)
740
+ )
741
+ else:
742
+ return (
743
+ None,
744
+ gr.update(visible=False),
745
+ gr.update(value=status_msg, visible=True)
746
+ )
747
+
748
+ # Update last message whenever chat updates
749
+ chatbot.change(
750
+ update_last_message,
751
+ inputs=[chatbot],
752
+ outputs=[last_assistant_message]
753
+ )
754
+
755
+ # Handle TTS button click
756
+ tts_btn.click(
757
+ handle_tts_click,
758
+ inputs=[last_assistant_message],
759
+ outputs=[audio_output, audio_output, tts_status]
760
+ )
761
+
762
  # Examples section
763
  if examples:
764
  gr.Examples(examples=examples, inputs=msg)
 
1001
  info="Allow users to upload files for context"
1002
  )
1003
 
1004
+ # TTS Configuration
1005
+ gr.Markdown("### 🔊 Text-to-Speech")
1006
+ with gr.Row():
1007
+ edit_enable_tts = gr.Checkbox(
1008
+ label="Enable TTS",
1009
+ value=config.get('enable_tts', False),
1010
+ info="Enable text-to-speech for assistant responses"
1011
+ )
1012
+ edit_tts_model = gr.Dropdown(
1013
+ label="TTS Model",
1014
+ choices=[
1015
+ "microsoft/speecht5_tts",
1016
+ "facebook/mms-tts-eng",
1017
+ "suno/bark",
1018
+ "parler-tts/parler-tts-mini-v1"
1019
+ ],
1020
+ value=config.get('tts_model', 'microsoft/speecht5_tts'),
1021
+ allow_custom_value=True
1022
+ )
1023
+ edit_tts_voice = gr.Dropdown(
1024
+ label="Voice",
1025
+ choices=["default", "female", "male", "neutral"],
1026
+ value=config.get('tts_voice', 'default')
1027
+ )
1028
+
1029
  # Configuration actions
1030
  with gr.Row():
1031
  save_btn = gr.Button("💾 Save Configuration", variant="primary")
 
1033
 
1034
  config_status = gr.Markdown()
1035
 
1036
+ def save_configuration(name, description, system_prompt, model, language, temp, tokens, examples, grounding_urls, enable_dynamic_urls, enable_file_upload, enable_tts, tts_model, tts_voice):
1037
  """Save updated configuration"""
1038
  try:
1039
  updated_config = config.copy()
 
1049
  'grounding_urls': [url.strip() for url in grounding_urls.split('\n') if url.strip()],
1050
  'enable_dynamic_urls': enable_dynamic_urls,
1051
  'enable_file_upload': enable_file_upload,
1052
+ 'enable_tts': enable_tts,
1053
+ 'tts_model': tts_model,
1054
+ 'tts_voice': tts_voice,
1055
  'locked': config.get('locked', False)
1056
  })
1057
 
 
1092
  save_configuration,
1093
  inputs=[edit_name, edit_description, edit_system_prompt, edit_model, edit_language,
1094
  edit_temperature, edit_max_tokens, edit_examples, edit_grounding_urls,
1095
+ edit_enable_dynamic_urls, edit_enable_file_upload, edit_enable_tts, edit_tts_model, edit_tts_voice],
1096
  outputs=[config_status]
1097
  )
1098
 
 
1112
  '\n'.join(DEFAULT_CONFIG['grounding_urls']),
1113
  DEFAULT_CONFIG['enable_dynamic_urls'],
1114
  DEFAULT_CONFIG['enable_file_upload'],
1115
+ DEFAULT_CONFIG.get('enable_tts', False),
1116
+ DEFAULT_CONFIG.get('tts_model', 'microsoft/speecht5_tts'),
1117
+ DEFAULT_CONFIG.get('tts_voice', 'default'),
1118
  "✅ Reset to default configuration"
1119
  )
1120
  else:
1121
+ return (*[gr.update() for _ in range(14)], "❌ Failed to reset")
1122
  except Exception as e:
1123
+ return (*[gr.update() for _ in range(14)], f"❌ Error: {str(e)}")
1124
 
1125
  reset_btn.click(
1126
  reset_configuration,
1127
  outputs=[edit_name, edit_description, edit_system_prompt, edit_model, edit_language,
1128
  edit_temperature, edit_max_tokens, edit_examples, edit_grounding_urls,
1129
+ edit_enable_dynamic_urls, edit_enable_file_upload, edit_enable_tts, edit_tts_model, edit_tts_voice, config_status]
1130
  )
1131
 
1132
  # Configuration tab authentication handler
config.json CHANGED
@@ -15,15 +15,11 @@
15
  "A che ora ti svegli la mattina?",
16
  "Qual \u00e8 il tuo sport preferito?"
17
  ],
18
- "grounding_urls": [
19
- "https://www.pnac.org/wp-content/uploads/Italian-Study-Guide.pdf"
20
- ],
21
  "enable_dynamic_urls": true,
22
  "enable_file_upload": true,
23
- "enable_tts": false,
24
- "tts_spaces": [
25
- "facebook/mms-tts-eng",
26
- "microsoft/speecht5-tts-demo"
27
- ],
28
  "theme": "Default"
29
  }
 
15
  "A che ora ti svegli la mattina?",
16
  "Qual \u00e8 il tuo sport preferito?"
17
  ],
18
+ "grounding_urls": [],
 
 
19
  "enable_dynamic_urls": true,
20
  "enable_file_upload": true,
21
+ "enable_tts": true,
22
+ "tts_model": "microsoft/speecht5_tts",
23
+ "tts_voice": "default",
 
 
24
  "theme": "Default"
25
  }
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  gradio>=5.39.0
2
- gradio_client>=1.0.0
3
  requests>=2.32.3
4
  beautifulsoup4>=4.12.3
5
  python-dotenv>=1.0.0
6
- huggingface-hub>=0.20.0
 
 
1
  gradio>=5.39.0
 
2
  requests>=2.32.3
3
  beautifulsoup4>=4.12.3
4
  python-dotenv>=1.0.0
5
+ huggingface-hub>=0.20.0
6
+ numpy>=1.24.0