milwright commited on
Commit
685ec96
Β·
verified Β·
1 Parent(s): 2850f05

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. app.py +174 -6
  3. config.json +4 -1
  4. requirements.txt +2 -1
README.md CHANGED
@@ -42,7 +42,7 @@ AI Italian conversation partner
42
  Your Space should now be running! Try the example prompts or ask your own questions.
43
 
44
  ## Configuration
45
- - **Model**: openai/gpt-oss-120b
46
  - **API Key Variable**: API_KEY
47
  - **HF Token Variable**: HF_TOKEN (for auto-updates)
48
  - **Access Control**: Enabled (ACCESS_CODE)
 
42
  Your Space should now be running! Try the example prompts or ask your own questions.
43
 
44
  ## Configuration
45
+ - **Model**: google/gemma-3-27b-it
46
  - **API Key Variable**: API_KEY
47
  - **HF Token Variable**: HF_TOKEN (for auto-updates)
48
  - **Access Control**: Enabled (ACCESS_CODE)
app.py CHANGED
@@ -9,6 +9,8 @@ from datetime import datetime
9
  import urllib.parse
10
  from pathlib import Path
11
  from typing import List, Dict, Optional, Any, Tuple
 
 
12
 
13
 
14
  # Configuration
@@ -22,7 +24,7 @@ DEFAULT_CONFIG = {
22
  'system_prompt': "You are Domenico from Sicily, a Juventus football fan, native Italian speaker serving as a conversational partner for university students in an Italian 101 class. Students will interact and converse with you in Italian, and you must respond EXCLUSIVELY IN ITALIAN without providing English translations, using vocabulary appropriate for beginner-level Italian 101 students. Focus your responses on topics suitable for beginners such as sports, daily life, routines, food, numbers, and hobbies. When students make errors, model the correct forms naturally in your response without explicitly pointing out mistakes, allowing them to learn through exposure to proper usage. Recognize when students demonstrate more advanced abilities and adjust your language complexity accordingly, while ensuring your Italian remains error-free. Keep all responses between 5-50 words, making sure sentences are grammatically complete. Limit all verb conjugations to the present tense only, avoiding all other verb forms and tenses. Address students using the informal second-person singular 'tu' form.",
23
  'temperature': 0.5,
24
  'max_tokens': 250,
25
- 'model': 'openai/gpt-oss-120b',
26
  'api_key_var': 'API_KEY',
27
  'theme': 'Default',
28
  'grounding_urls': ["https://www.pnac.org/wp-content/uploads/Italian-Study-Guide.pdf"],
@@ -30,6 +32,9 @@ DEFAULT_CONFIG = {
30
  'enable_file_upload': True,
31
  'examples': ['Ciao! Come stai oggi?', 'Mi piace giocare a calcio. E tu?', 'Cosa mangi di solito a colazione?', 'A che ora ti svegli la mattina?', 'Qual Γ¨ il tuo sport preferito?'],
32
  'language': 'Italian',
 
 
 
33
  'locked': False
34
  }
35
 
@@ -138,6 +143,9 @@ GROUNDING_URLS = config.get('grounding_urls', DEFAULT_CONFIG['grounding_urls'])
138
  ENABLE_DYNAMIC_URLS = config.get('enable_dynamic_urls', DEFAULT_CONFIG['enable_dynamic_urls'])
139
  ENABLE_FILE_UPLOAD = config.get('enable_file_upload', DEFAULT_CONFIG.get('enable_file_upload', True))
140
  LANGUAGE = config.get('language', DEFAULT_CONFIG.get('language', 'English'))
 
 
 
141
 
142
  # Environment variables
143
  ACCESS_CODE = os.environ.get("ACCESS_CODE")
@@ -518,6 +526,81 @@ def verify_hf_token_access() -> Tuple[bool, str]:
518
  return False, f"Error verifying HF token: {str(e)}"
519
 
520
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
521
  # Create main interface with clean tab structure
522
  def create_interface():
523
  """Create the Gradio interface with clean tab structure"""
@@ -613,6 +696,60 @@ def create_interface():
613
  outputs=[export_btn]
614
  )
615
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
616
  # Examples section
617
  if examples:
618
  gr.Examples(examples=examples, inputs=msg)
@@ -855,6 +992,31 @@ def create_interface():
855
  info="Allow users to upload files for context"
856
  )
857
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
858
  # Configuration actions
859
  with gr.Row():
860
  save_btn = gr.Button("πŸ’Ύ Save Configuration", variant="primary")
@@ -862,7 +1024,7 @@ def create_interface():
862
 
863
  config_status = gr.Markdown()
864
 
865
- def save_configuration(name, description, system_prompt, model, language, temp, tokens, examples, grounding_urls, enable_dynamic_urls, enable_file_upload):
866
  """Save updated configuration"""
867
  try:
868
  updated_config = config.copy()
@@ -878,6 +1040,9 @@ def create_interface():
878
  'grounding_urls': [url.strip() for url in grounding_urls.split('\n') if url.strip()],
879
  'enable_dynamic_urls': enable_dynamic_urls,
880
  'enable_file_upload': enable_file_upload,
 
 
 
881
  'locked': config.get('locked', False)
882
  })
883
 
@@ -918,7 +1083,7 @@ def create_interface():
918
  save_configuration,
919
  inputs=[edit_name, edit_description, edit_system_prompt, edit_model, edit_language,
920
  edit_temperature, edit_max_tokens, edit_examples, edit_grounding_urls,
921
- edit_enable_dynamic_urls, edit_enable_file_upload],
922
  outputs=[config_status]
923
  )
924
 
@@ -938,18 +1103,21 @@ def create_interface():
938
  '\n'.join(DEFAULT_CONFIG['grounding_urls']),
939
  DEFAULT_CONFIG['enable_dynamic_urls'],
940
  DEFAULT_CONFIG['enable_file_upload'],
 
 
 
941
  "βœ… Reset to default configuration"
942
  )
943
  else:
944
- return (*[gr.update() for _ in range(11)], "❌ Failed to reset")
945
  except Exception as e:
946
- return (*[gr.update() for _ in range(11)], f"❌ Error: {str(e)}")
947
 
948
  reset_btn.click(
949
  reset_configuration,
950
  outputs=[edit_name, edit_description, edit_system_prompt, edit_model, edit_language,
951
  edit_temperature, edit_max_tokens, edit_examples, edit_grounding_urls,
952
- edit_enable_dynamic_urls, edit_enable_file_upload, config_status]
953
  )
954
 
955
  # Configuration tab authentication handler
 
9
  import urllib.parse
10
  from pathlib import Path
11
  from typing import List, Dict, Optional, Any, Tuple
12
+ import numpy as np
13
+ import time
14
 
15
 
16
  # Configuration
 
24
  'system_prompt': "You are Domenico from Sicily, a Juventus football fan, native Italian speaker serving as a conversational partner for university students in an Italian 101 class. Students will interact and converse with you in Italian, and you must respond EXCLUSIVELY IN ITALIAN without providing English translations, using vocabulary appropriate for beginner-level Italian 101 students. Focus your responses on topics suitable for beginners such as sports, daily life, routines, food, numbers, and hobbies. When students make errors, model the correct forms naturally in your response without explicitly pointing out mistakes, allowing them to learn through exposure to proper usage. Recognize when students demonstrate more advanced abilities and adjust your language complexity accordingly, while ensuring your Italian remains error-free. Keep all responses between 5-50 words, making sure sentences are grammatically complete. Limit all verb conjugations to the present tense only, avoiding all other verb forms and tenses. Address students using the informal second-person singular 'tu' form.",
25
  'temperature': 0.5,
26
  'max_tokens': 250,
27
+ 'model': 'google/gemma-3-27b-it',
28
  'api_key_var': 'API_KEY',
29
  'theme': 'Default',
30
  'grounding_urls': ["https://www.pnac.org/wp-content/uploads/Italian-Study-Guide.pdf"],
 
32
  'enable_file_upload': True,
33
  'examples': ['Ciao! Come stai oggi?', 'Mi piace giocare a calcio. E tu?', 'Cosa mangi di solito a colazione?', 'A che ora ti svegli la mattina?', 'Qual Γ¨ il tuo sport preferito?'],
34
  'language': 'Italian',
35
+ 'enable_tts': True,
36
+ 'tts_model': 'microsoft/speecht5_tts',
37
+ 'tts_voice': 'default',
38
  'locked': False
39
  }
40
 
 
143
  ENABLE_DYNAMIC_URLS = config.get('enable_dynamic_urls', DEFAULT_CONFIG['enable_dynamic_urls'])
144
  ENABLE_FILE_UPLOAD = config.get('enable_file_upload', DEFAULT_CONFIG.get('enable_file_upload', True))
145
  LANGUAGE = config.get('language', DEFAULT_CONFIG.get('language', 'English'))
146
+ ENABLE_TTS = config.get('enable_tts', DEFAULT_CONFIG.get('enable_tts', False))
147
+ TTS_MODEL = config.get('tts_model', DEFAULT_CONFIG.get('tts_model', 'microsoft/speecht5_tts'))
148
+ TTS_VOICE = config.get('tts_voice', DEFAULT_CONFIG.get('tts_voice', 'default'))
149
 
150
  # Environment variables
151
  ACCESS_CODE = os.environ.get("ACCESS_CODE")
 
526
  return False, f"Error verifying HF token: {str(e)}"
527
 
528
 
529
+ def generate_tts(text: str, max_retries: int = 2) -> Tuple[Optional[Tuple[int, np.ndarray]], str]:
530
+ """Generate TTS audio using HuggingFace Inference API"""
531
+ if not ENABLE_TTS or not text:
532
+ return None, "TTS disabled or no text provided"
533
+
534
+ hf_token = os.getenv("HF_TOKEN")
535
+ if not hf_token:
536
+ return None, "⚠️ HF_TOKEN not configured for TTS"
537
+
538
+ # Limit text length for TTS
539
+ text = text[:500]
540
+
541
+ # Select speaker embedding based on voice preference
542
+ speaker_embeddings = None
543
+ if TTS_MODEL == "microsoft/speecht5_tts":
544
+ # For SpeechT5, we need speaker embeddings
545
+ # Using a predefined speaker ID (7306 is a clear female voice)
546
+ speaker_id = {
547
+ "default": 7306,
548
+ "female": 7306,
549
+ "male": 5105,
550
+ "neutral": 6678
551
+ }.get(TTS_VOICE, 7306)
552
+
553
+ # Note: In production, you'd load actual embeddings from the dataset
554
+ # For now, we'll let the API handle default voice
555
+ speaker_embeddings = {"speaker_id": speaker_id}
556
+
557
+ for attempt in range(max_retries):
558
+ try:
559
+ headers = {"Authorization": f"Bearer {hf_token}"}
560
+ api_url = f"https://api-inference.huggingface.co/models/{TTS_MODEL}"
561
+
562
+ # Prepare payload
563
+ payload = {"inputs": text}
564
+ if speaker_embeddings and TTS_MODEL == "microsoft/speecht5_tts":
565
+ # For models that support speaker embeddings
566
+ payload["parameters"] = speaker_embeddings
567
+
568
+ response = requests.post(
569
+ api_url,
570
+ headers=headers,
571
+ json=payload,
572
+ timeout=20
573
+ )
574
+
575
+ if response.status_code == 200:
576
+ # Convert audio bytes to numpy array
577
+ audio_array = np.frombuffer(response.content, dtype=np.int16)
578
+ # Most TTS models output at 16kHz
579
+ sample_rate = 16000
580
+ return (sample_rate, audio_array), "βœ… Audio generated successfully"
581
+
582
+ elif response.status_code == 503:
583
+ # Model is loading
584
+ if attempt < max_retries - 1:
585
+ time.sleep(20) # Wait for model to load
586
+ continue
587
+ else:
588
+ return None, "⏳ Model is loading, please try again in a moment"
589
+
590
+ else:
591
+ error_msg = response.json().get('error', 'Unknown error')
592
+ return None, f"❌ API Error ({response.status_code}): {error_msg}"
593
+
594
+ except requests.exceptions.Timeout:
595
+ return None, "⏰ TTS request timeout"
596
+ except Exception as e:
597
+ if attempt == max_retries - 1:
598
+ return None, f"❌ TTS Error: {str(e)}"
599
+ time.sleep(2)
600
+
601
+ return None, "❌ Max retries exceeded"
602
+
603
+
604
  # Create main interface with clean tab structure
605
  def create_interface():
606
  """Create the Gradio interface with clean tab structure"""
 
696
  outputs=[export_btn]
697
  )
698
 
699
+ # TTS functionality
700
+ if ENABLE_TTS:
701
+ with gr.Row():
702
+ tts_btn = gr.Button("πŸ”Š Read Last Response", variant="secondary", size="sm")
703
+ audio_output = gr.Audio(label="TTS Output", visible=False, autoplay=True)
704
+
705
+ tts_status = gr.Markdown("", visible=False)
706
+ last_assistant_message = gr.State("")
707
+
708
+ def update_last_message(chat_history):
709
+ """Extract the last assistant message from chat history"""
710
+ if not chat_history:
711
+ return ""
712
+
713
+ # Find the last assistant message
714
+ for message in reversed(chat_history):
715
+ if isinstance(message, dict) and message.get('role') == 'assistant':
716
+ return message.get('content', '')
717
+ return ""
718
+
719
+ def handle_tts_click(last_message):
720
+ """Handle TTS button click"""
721
+ if not last_message:
722
+ return None, gr.update(visible=False), gr.update(value="⚠️ No message to read", visible=True)
723
+
724
+ audio_data, status_msg = generate_tts(last_message)
725
+
726
+ if audio_data:
727
+ return (
728
+ audio_data,
729
+ gr.update(visible=True),
730
+ gr.update(value=status_msg, visible=True)
731
+ )
732
+ else:
733
+ return (
734
+ None,
735
+ gr.update(visible=False),
736
+ gr.update(value=status_msg, visible=True)
737
+ )
738
+
739
+ # Update last message whenever chat updates
740
+ chatbot.change(
741
+ update_last_message,
742
+ inputs=[chatbot],
743
+ outputs=[last_assistant_message]
744
+ )
745
+
746
+ # Handle TTS button click
747
+ tts_btn.click(
748
+ handle_tts_click,
749
+ inputs=[last_assistant_message],
750
+ outputs=[audio_output, audio_output, tts_status]
751
+ )
752
+
753
  # Examples section
754
  if examples:
755
  gr.Examples(examples=examples, inputs=msg)
 
992
  info="Allow users to upload files for context"
993
  )
994
 
995
+ # TTS Configuration
996
+ gr.Markdown("### πŸ”Š Text-to-Speech")
997
+ with gr.Row():
998
+ edit_enable_tts = gr.Checkbox(
999
+ label="Enable TTS",
1000
+ value=config.get('enable_tts', False),
1001
+ info="Enable text-to-speech for assistant responses"
1002
+ )
1003
+ edit_tts_model = gr.Dropdown(
1004
+ label="TTS Model",
1005
+ choices=[
1006
+ "microsoft/speecht5_tts",
1007
+ "facebook/mms-tts-eng",
1008
+ "suno/bark",
1009
+ "parler-tts/parler-tts-mini-v1"
1010
+ ],
1011
+ value=config.get('tts_model', 'microsoft/speecht5_tts'),
1012
+ allow_custom_value=True
1013
+ )
1014
+ edit_tts_voice = gr.Dropdown(
1015
+ label="Voice",
1016
+ choices=["default", "female", "male", "neutral"],
1017
+ value=config.get('tts_voice', 'default')
1018
+ )
1019
+
1020
  # Configuration actions
1021
  with gr.Row():
1022
  save_btn = gr.Button("πŸ’Ύ Save Configuration", variant="primary")
 
1024
 
1025
  config_status = gr.Markdown()
1026
 
1027
+ def save_configuration(name, description, system_prompt, model, language, temp, tokens, examples, grounding_urls, enable_dynamic_urls, enable_file_upload, enable_tts, tts_model, tts_voice):
1028
  """Save updated configuration"""
1029
  try:
1030
  updated_config = config.copy()
 
1040
  'grounding_urls': [url.strip() for url in grounding_urls.split('\n') if url.strip()],
1041
  'enable_dynamic_urls': enable_dynamic_urls,
1042
  'enable_file_upload': enable_file_upload,
1043
+ 'enable_tts': enable_tts,
1044
+ 'tts_model': tts_model,
1045
+ 'tts_voice': tts_voice,
1046
  'locked': config.get('locked', False)
1047
  })
1048
 
 
1083
  save_configuration,
1084
  inputs=[edit_name, edit_description, edit_system_prompt, edit_model, edit_language,
1085
  edit_temperature, edit_max_tokens, edit_examples, edit_grounding_urls,
1086
+ edit_enable_dynamic_urls, edit_enable_file_upload, edit_enable_tts, edit_tts_model, edit_tts_voice],
1087
  outputs=[config_status]
1088
  )
1089
 
 
1103
  '\n'.join(DEFAULT_CONFIG['grounding_urls']),
1104
  DEFAULT_CONFIG['enable_dynamic_urls'],
1105
  DEFAULT_CONFIG['enable_file_upload'],
1106
+ DEFAULT_CONFIG.get('enable_tts', False),
1107
+ DEFAULT_CONFIG.get('tts_model', 'microsoft/speecht5_tts'),
1108
+ DEFAULT_CONFIG.get('tts_voice', 'default'),
1109
  "βœ… Reset to default configuration"
1110
  )
1111
  else:
1112
+ return (*[gr.update() for _ in range(14)], "❌ Failed to reset")
1113
  except Exception as e:
1114
+ return (*[gr.update() for _ in range(14)], f"❌ Error: {str(e)}")
1115
 
1116
  reset_btn.click(
1117
  reset_configuration,
1118
  outputs=[edit_name, edit_description, edit_system_prompt, edit_model, edit_language,
1119
  edit_temperature, edit_max_tokens, edit_examples, edit_grounding_urls,
1120
+ edit_enable_dynamic_urls, edit_enable_file_upload, edit_enable_tts, edit_tts_model, edit_tts_voice, config_status]
1121
  )
1122
 
1123
  # Configuration tab authentication handler
config.json CHANGED
@@ -3,7 +3,7 @@
3
  "tagline": "AI Italian conversation partner",
4
  "description": "AI Italian conversation partner",
5
  "system_prompt": "You are Domenico from Sicily, a Juventus football fan, native Italian speaker serving as a conversational partner for university students in an Italian 101 class. Students will interact and converse with you in Italian, and you must respond EXCLUSIVELY IN ITALIAN without providing English translations, using vocabulary appropriate for beginner-level Italian 101 students. Focus your responses on topics suitable for beginners such as sports, daily life, routines, food, numbers, and hobbies. When students make errors, model the correct forms naturally in your response without explicitly pointing out mistakes, allowing them to learn through exposure to proper usage. Recognize when students demonstrate more advanced abilities and adjust your language complexity accordingly, while ensuring your Italian remains error-free. Keep all responses between 5-50 words, making sure sentences are grammatically complete. Limit all verb conjugations to the present tense only, avoiding all other verb forms and tenses. Address students using the informal second-person singular 'tu' form.",
6
- "model": "openai/gpt-oss-120b",
7
  "language": "Italian",
8
  "api_key_var": "API_KEY",
9
  "temperature": 0.5,
@@ -20,5 +20,8 @@
20
  ],
21
  "enable_dynamic_urls": true,
22
  "enable_file_upload": true,
 
 
 
23
  "theme": "Default"
24
  }
 
3
  "tagline": "AI Italian conversation partner",
4
  "description": "AI Italian conversation partner",
5
  "system_prompt": "You are Domenico from Sicily, a Juventus football fan, native Italian speaker serving as a conversational partner for university students in an Italian 101 class. Students will interact and converse with you in Italian, and you must respond EXCLUSIVELY IN ITALIAN without providing English translations, using vocabulary appropriate for beginner-level Italian 101 students. Focus your responses on topics suitable for beginners such as sports, daily life, routines, food, numbers, and hobbies. When students make errors, model the correct forms naturally in your response without explicitly pointing out mistakes, allowing them to learn through exposure to proper usage. Recognize when students demonstrate more advanced abilities and adjust your language complexity accordingly, while ensuring your Italian remains error-free. Keep all responses between 5-50 words, making sure sentences are grammatically complete. Limit all verb conjugations to the present tense only, avoiding all other verb forms and tenses. Address students using the informal second-person singular 'tu' form.",
6
+ "model": "google/gemma-3-27b-it",
7
  "language": "Italian",
8
  "api_key_var": "API_KEY",
9
  "temperature": 0.5,
 
20
  ],
21
  "enable_dynamic_urls": true,
22
  "enable_file_upload": true,
23
+ "enable_tts": true,
24
+ "tts_model": "microsoft/speecht5_tts",
25
+ "tts_voice": "default",
26
  "theme": "Default"
27
  }
requirements.txt CHANGED
@@ -2,4 +2,5 @@ gradio>=5.39.0
2
  requests>=2.32.3
3
  beautifulsoup4>=4.12.3
4
  python-dotenv>=1.0.0
5
- huggingface-hub>=0.20.0
 
 
2
  requests>=2.32.3
3
  beautifulsoup4>=4.12.3
4
  python-dotenv>=1.0.0
5
+ huggingface-hub>=0.20.0
6
+ numpy>=1.24.0