milwright commited on
Commit
65c037b
·
verified ·
1 Parent(s): af54f4b

Upload 4 files

Browse files
Files changed (3) hide show
  1. app.py +131 -158
  2. config.json +6 -4
  3. requirements.txt +2 -2
app.py CHANGED
@@ -9,8 +9,16 @@ from datetime import datetime
9
  import urllib.parse
10
  from pathlib import Path
11
  from typing import List, Dict, Optional, Any, Tuple
12
- import numpy as np
13
- import time
 
 
 
 
 
 
 
 
14
 
15
 
16
  # Configuration
@@ -26,15 +34,14 @@ DEFAULT_CONFIG = {
26
  'max_tokens': 250,
27
  'model': 'google/gemma-3-27b-it',
28
  'api_key_var': 'API_KEY',
29
- 'theme': 'Base',
30
  'grounding_urls': ["https://www.pnac.org/wp-content/uploads/Italian-Study-Guide.pdf"],
31
  'enable_dynamic_urls': True,
32
  'enable_file_upload': True,
33
  'examples': ['Ciao! Come stai oggi?', 'Mi piace giocare a calcio. E tu?', 'Cosa mangi di solito a colazione?', 'A che ora ti svegli la mattina?', 'Qual è il tuo sport preferito?'],
34
  'language': 'Italian',
35
- 'enable_tts': True,
36
- 'tts_model': 'openai/tts-1-hd',
37
- 'tts_voice': 'onyx',
38
  'locked': False
39
  }
40
 
@@ -143,9 +150,6 @@ GROUNDING_URLS = config.get('grounding_urls', DEFAULT_CONFIG['grounding_urls'])
143
  ENABLE_DYNAMIC_URLS = config.get('enable_dynamic_urls', DEFAULT_CONFIG['enable_dynamic_urls'])
144
  ENABLE_FILE_UPLOAD = config.get('enable_file_upload', DEFAULT_CONFIG.get('enable_file_upload', True))
145
  LANGUAGE = config.get('language', DEFAULT_CONFIG.get('language', 'English'))
146
- ENABLE_TTS = config.get('enable_tts', DEFAULT_CONFIG.get('enable_tts', False))
147
- TTS_MODEL = config.get('tts_model', DEFAULT_CONFIG.get('tts_model', 'microsoft/speecht5_tts'))
148
- TTS_VOICE = config.get('tts_voice', DEFAULT_CONFIG.get('tts_voice', 'default'))
149
 
150
  # Environment variables
151
  ACCESS_CODE = os.environ.get("ACCESS_CODE")
@@ -290,6 +294,80 @@ def process_file_upload(file_path: str) -> str:
290
  _url_content_cache = {}
291
 
292
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
  def get_grounding_context() -> str:
294
  """Get grounding context from configured URLs with caching"""
295
  urls = GROUNDING_URLS
@@ -532,80 +610,6 @@ def verify_hf_token_access() -> Tuple[bool, str]:
532
  return False, f"Error verifying HF token: {str(e)}"
533
 
534
 
535
- def generate_tts(text: str, max_retries: int = 2) -> Tuple[Optional[Tuple[int, np.ndarray]], str]:
536
- """Generate TTS audio using OpenAI's TTS API through OpenRouter"""
537
- if not ENABLE_TTS or not text:
538
- return None, "TTS disabled or no text provided"
539
-
540
- api_key = os.getenv(API_KEY_VAR)
541
- if not api_key:
542
- return None, f"⚠️ {API_KEY_VAR} not configured for TTS"
543
-
544
- # Limit text length for TTS
545
- text = text[:1000] # OpenAI supports up to 4096 chars but let's be reasonable
546
-
547
- # OpenAI TTS models and voices
548
- model = TTS_MODEL if TTS_MODEL.startswith("openai/") else "openai/tts-1"
549
- voice = TTS_VOICE if TTS_VOICE in ["alloy", "echo", "fable", "onyx", "nova", "shimmer"] else "alloy"
550
-
551
- for attempt in range(max_retries):
552
- try:
553
- headers = {
554
- "Authorization": f"Bearer {api_key}",
555
- "HTTP-Referer": "https://huggingface.co",
556
- "X-Title": SPACE_NAME,
557
- "Content-Type": "application/json"
558
- }
559
-
560
- # OpenRouter endpoint for OpenAI TTS
561
- api_url = "https://openrouter.ai/api/v1/audio/speech"
562
-
563
- payload = {
564
- "model": model,
565
- "input": text,
566
- "voice": voice,
567
- "response_format": "mp3", # Can be mp3, opus, aac, flac
568
- "speed": 1.0 # 0.25 to 4.0
569
- }
570
-
571
- response = requests.post(
572
- api_url,
573
- headers=headers,
574
- json=payload,
575
- timeout=30
576
- )
577
-
578
- if response.status_code == 200:
579
- # OpenAI returns MP3 audio data
580
- # Convert to format Gradio expects
581
- try:
582
- # Save temporarily and load with a library that can read MP3
583
- import tempfile
584
- with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as tmp_file:
585
- tmp_file.write(response.content)
586
- tmp_path = tmp_file.name
587
-
588
- # For now, return the file path - Gradio can handle MP3 files
589
- return tmp_path, "✅ Audio generated successfully"
590
- except Exception as e:
591
- return None, f"❌ Error processing audio: {str(e)}"
592
-
593
- else:
594
- try:
595
- error_msg = response.json().get('error', {}).get('message', 'Unknown error')
596
- except:
597
- error_msg = response.text if response.text else 'Unknown error'
598
- return None, f"❌ API Error ({response.status_code}): {error_msg}"
599
-
600
- except requests.exceptions.Timeout:
601
- return None, "⏰ TTS request timeout"
602
- except Exception as e:
603
- if attempt == max_retries - 1:
604
- return None, f"❌ TTS Error: {str(e)}"
605
- time.sleep(2)
606
-
607
- return None, "❌ Max retries exceeded"
608
-
609
 
610
  # Create main interface with clean tab structure
611
  def create_interface():
@@ -679,6 +683,19 @@ def create_interface():
679
  size="sm"
680
  )
681
 
 
 
 
 
 
 
 
 
 
 
 
 
 
682
  # Export handler
683
  def prepare_export():
684
  if not chat_history_store:
@@ -702,58 +719,44 @@ def create_interface():
702
  outputs=[export_btn]
703
  )
704
 
705
- # TTS functionality
706
- if ENABLE_TTS:
707
- with gr.Row():
708
- tts_btn = gr.Button("🔊 Read Last Response", variant="secondary", size="sm")
709
- audio_output = gr.Audio(label="TTS Output", visible=False, autoplay=True)
710
-
711
- tts_status = gr.Markdown("", visible=False)
712
- last_assistant_message = gr.State("")
713
-
714
- def update_last_message(chat_history):
715
- """Extract the last assistant message from chat history"""
716
  if not chat_history:
717
- return ""
718
 
719
- # Find the last assistant message
720
- for message in reversed(chat_history):
721
- if isinstance(message, dict) and message.get('role') == 'assistant':
722
- return message.get('content', '')
723
- return ""
724
-
725
- def handle_tts_click(last_message):
726
- """Handle TTS button click"""
727
- if not last_message:
728
- return None, gr.update(visible=False), gr.update(value="⚠️ No message to read", visible=True)
729
 
730
- audio_file, status_msg = generate_tts(last_message)
 
731
 
732
- if audio_file:
 
 
 
733
  return (
734
- audio_file, # File path for Gradio to play
735
- gr.update(visible=True),
736
- gr.update(value=status_msg, visible=True)
737
  )
738
  else:
739
  return (
740
  None,
741
- gr.update(visible=False),
742
- gr.update(value=status_msg, visible=True)
743
  )
744
 
745
- # Update last message whenever chat updates
746
- chatbot.change(
747
- update_last_message,
748
- inputs=[chatbot],
749
- outputs=[last_assistant_message]
750
- )
751
-
752
- # Handle TTS button click
753
  tts_btn.click(
754
- handle_tts_click,
755
- inputs=[last_assistant_message],
756
- outputs=[audio_output, audio_output, tts_status]
757
  )
758
 
759
  # Examples section
@@ -998,30 +1001,6 @@ def create_interface():
998
  info="Allow users to upload files for context"
999
  )
1000
 
1001
- # TTS Configuration
1002
- gr.Markdown("### 🔊 Text-to-Speech")
1003
- with gr.Row():
1004
- edit_enable_tts = gr.Checkbox(
1005
- label="Enable TTS",
1006
- value=config.get('enable_tts', False),
1007
- info="Enable text-to-speech for assistant responses"
1008
- )
1009
- edit_tts_model = gr.Dropdown(
1010
- label="TTS Model",
1011
- choices=[
1012
- "openai/tts-1",
1013
- "openai/tts-1-hd"
1014
- ],
1015
- value=config.get('tts_model', 'openai/tts-1'),
1016
- allow_custom_value=True
1017
- )
1018
- edit_tts_voice = gr.Dropdown(
1019
- label="Voice",
1020
- choices=["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
1021
- value=config.get('tts_voice', 'alloy'),
1022
- info="alloy: neutral, echo: male, fable: british male, onyx: deep male, nova: female, shimmer: female"
1023
- )
1024
-
1025
  # Configuration actions
1026
  with gr.Row():
1027
  save_btn = gr.Button("💾 Save Configuration", variant="primary")
@@ -1029,7 +1008,7 @@ def create_interface():
1029
 
1030
  config_status = gr.Markdown()
1031
 
1032
- def save_configuration(name, description, system_prompt, model, language, temp, tokens, examples, grounding_urls, enable_dynamic_urls, enable_file_upload, enable_tts, tts_model, tts_voice):
1033
  """Save updated configuration"""
1034
  try:
1035
  updated_config = config.copy()
@@ -1045,9 +1024,6 @@ def create_interface():
1045
  'grounding_urls': [url.strip() for url in grounding_urls.split('\n') if url.strip()],
1046
  'enable_dynamic_urls': enable_dynamic_urls,
1047
  'enable_file_upload': enable_file_upload,
1048
- 'enable_tts': enable_tts,
1049
- 'tts_model': tts_model,
1050
- 'tts_voice': tts_voice,
1051
  'locked': config.get('locked', False)
1052
  })
1053
 
@@ -1088,7 +1064,7 @@ def create_interface():
1088
  save_configuration,
1089
  inputs=[edit_name, edit_description, edit_system_prompt, edit_model, edit_language,
1090
  edit_temperature, edit_max_tokens, edit_examples, edit_grounding_urls,
1091
- edit_enable_dynamic_urls, edit_enable_file_upload, edit_enable_tts, edit_tts_model, edit_tts_voice],
1092
  outputs=[config_status]
1093
  )
1094
 
@@ -1108,21 +1084,18 @@ def create_interface():
1108
  '\n'.join(DEFAULT_CONFIG['grounding_urls']),
1109
  DEFAULT_CONFIG['enable_dynamic_urls'],
1110
  DEFAULT_CONFIG['enable_file_upload'],
1111
- DEFAULT_CONFIG.get('enable_tts', False),
1112
- DEFAULT_CONFIG.get('tts_model', 'openai/tts-1'),
1113
- DEFAULT_CONFIG.get('tts_voice', 'alloy'),
1114
  "✅ Reset to default configuration"
1115
  )
1116
  else:
1117
- return (*[gr.update() for _ in range(14)], "❌ Failed to reset")
1118
  except Exception as e:
1119
- return (*[gr.update() for _ in range(14)], f"❌ Error: {str(e)}")
1120
 
1121
  reset_btn.click(
1122
  reset_configuration,
1123
  outputs=[edit_name, edit_description, edit_system_prompt, edit_model, edit_language,
1124
  edit_temperature, edit_max_tokens, edit_examples, edit_grounding_urls,
1125
- edit_enable_dynamic_urls, edit_enable_file_upload, edit_enable_tts, edit_tts_model, edit_tts_voice, config_status]
1126
  )
1127
 
1128
  # Configuration tab authentication handler
 
9
  import urllib.parse
10
  from pathlib import Path
11
  from typing import List, Dict, Optional, Any, Tuple
12
+ import base64
13
+ import io
14
+
15
+ # Try to import gradio_client for TTS support
16
+ try:
17
+ from gradio_client import Client
18
+ GRADIO_CLIENT_AVAILABLE = True
19
+ except ImportError:
20
+ GRADIO_CLIENT_AVAILABLE = False
21
+ print("Warning: gradio_client not available. TTS features will be disabled.")
22
 
23
 
24
  # Configuration
 
34
  'max_tokens': 250,
35
  'model': 'google/gemma-3-27b-it',
36
  'api_key_var': 'API_KEY',
37
+ 'theme': 'Default',
38
  'grounding_urls': ["https://www.pnac.org/wp-content/uploads/Italian-Study-Guide.pdf"],
39
  'enable_dynamic_urls': True,
40
  'enable_file_upload': True,
41
  'examples': ['Ciao! Come stai oggi?', 'Mi piace giocare a calcio. E tu?', 'Cosa mangi di solito a colazione?', 'A che ora ti svegli la mattina?', 'Qual è il tuo sport preferito?'],
42
  'language': 'Italian',
43
+ 'enable_tts': False,
44
+ 'tts_spaces': ['facebook/mms-tts-eng', 'microsoft/speecht5-tts-demo'],
 
45
  'locked': False
46
  }
47
 
 
150
  ENABLE_DYNAMIC_URLS = config.get('enable_dynamic_urls', DEFAULT_CONFIG['enable_dynamic_urls'])
151
  ENABLE_FILE_UPLOAD = config.get('enable_file_upload', DEFAULT_CONFIG.get('enable_file_upload', True))
152
  LANGUAGE = config.get('language', DEFAULT_CONFIG.get('language', 'English'))
 
 
 
153
 
154
  # Environment variables
155
  ACCESS_CODE = os.environ.get("ACCESS_CODE")
 
294
  _url_content_cache = {}
295
 
296
 
297
+ def generate_tts(text: str, hf_token: Optional[str] = None) -> Optional[Tuple[int, Any]]:
298
+ """
299
+ Generate text-to-speech audio using HuggingFace Spaces via gradio_client.
300
+ Uses multiple fallback options for maximum reliability.
301
+
302
+ Returns: Tuple of (sample_rate, audio_array) or None if failed
303
+ """
304
+ if not GRADIO_CLIENT_AVAILABLE:
305
+ return None
306
+
307
+ if not text or not text.strip():
308
+ return None
309
+
310
+ # Get HF token from environment if not provided
311
+ if not hf_token:
312
+ hf_token = os.getenv("HF_TOKEN")
313
+
314
+ # Get TTS spaces from config
315
+ tts_spaces = DEFAULT_CONFIG.get('tts_spaces', [])
316
+ if not tts_spaces:
317
+ # Default fallback spaces if none configured
318
+ tts_spaces = [
319
+ "facebook/mms-tts-eng",
320
+ "microsoft/speecht5-tts-demo",
321
+ "coqui/XTTS",
322
+ "myshell-ai/OpenVoice"
323
+ ]
324
+
325
+ # Limit text length for TTS
326
+ max_text_length = 500
327
+ if len(text) > max_text_length:
328
+ text = text[:max_text_length] + "..."
329
+
330
+ # Try each TTS space in order
331
+ for space_name in tts_spaces:
332
+ try:
333
+ print(f"Trying TTS space: {space_name}")
334
+ client = Client(space_name, hf_token=hf_token)
335
+
336
+ # Different spaces have different APIs, try common patterns
337
+ try:
338
+ # Pattern 1: Simple text input
339
+ result = client.predict(text, api_name="/predict")
340
+ except:
341
+ try:
342
+ # Pattern 2: Text + language
343
+ result = client.predict(text, "en", api_name="/predict")
344
+ except:
345
+ try:
346
+ # Pattern 3: Text + voice/speaker
347
+ result = client.predict(text, "default", api_name="/predict")
348
+ except:
349
+ continue
350
+
351
+ # Handle different return types
352
+ if isinstance(result, str) and os.path.exists(result):
353
+ # Result is a file path
354
+ import soundfile as sf
355
+ audio_data, sample_rate = sf.read(result)
356
+ return (sample_rate, audio_data)
357
+ elif isinstance(result, tuple) and len(result) >= 2:
358
+ # Result is (sample_rate, audio_array)
359
+ return result
360
+ elif hasattr(result, 'get') and 'audio' in result:
361
+ # Result is a dict with audio key
362
+ return result['audio']
363
+
364
+ except Exception as e:
365
+ print(f"TTS failed with {space_name}: {str(e)}")
366
+ continue
367
+
368
+ return None
369
+
370
+
371
  def get_grounding_context() -> str:
372
  """Get grounding context from configured URLs with caching"""
373
  urls = GROUNDING_URLS
 
610
  return False, f"Error verifying HF token: {str(e)}"
611
 
612
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
613
 
614
  # Create main interface with clean tab structure
615
  def create_interface():
 
683
  size="sm"
684
  )
685
 
686
+ # TTS functionality
687
+ if DEFAULT_CONFIG.get('enable_tts', False) and GRADIO_CLIENT_AVAILABLE:
688
+ with gr.Row():
689
+ tts_btn = gr.Button("🔊 Read Last Response", variant="secondary", size="sm")
690
+ tts_status = gr.Textbox(label="TTS Status", visible=False, interactive=False)
691
+
692
+ audio_output = gr.Audio(
693
+ label="TTS Output",
694
+ visible=False,
695
+ autoplay=True,
696
+ type="numpy"
697
+ )
698
+
699
  # Export handler
700
  def prepare_export():
701
  if not chat_history_store:
 
719
  outputs=[export_btn]
720
  )
721
 
722
+ # TTS handler
723
+ if DEFAULT_CONFIG.get('enable_tts', False) and GRADIO_CLIENT_AVAILABLE:
724
+ def handle_tts(chat_history):
725
+ """Generate TTS for the last assistant message"""
 
 
 
 
 
 
 
726
  if not chat_history:
727
+ return None, gr.update(value="No messages to read", visible=True)
728
 
729
+ # Find last assistant message
730
+ last_assistant_msg = None
731
+ for msg in reversed(chat_history):
732
+ if msg.get("role") == "assistant":
733
+ last_assistant_msg = msg.get("content", "")
734
+ break
735
+
736
+ if not last_assistant_msg:
737
+ return None, gr.update(value="No assistant message found", visible=True)
 
738
 
739
+ # Update status
740
+ status_msg = "🎯 Generating audio..."
741
 
742
+ # Generate TTS
743
+ audio_result = generate_tts(last_assistant_msg)
744
+
745
+ if audio_result:
746
  return (
747
+ gr.update(value=audio_result, visible=True),
748
+ gr.update(value="✅ Audio generated successfully", visible=True)
 
749
  )
750
  else:
751
  return (
752
  None,
753
+ gr.update(value="❌ TTS generation failed", visible=True)
 
754
  )
755
 
 
 
 
 
 
 
 
 
756
  tts_btn.click(
757
+ handle_tts,
758
+ inputs=[chatbot],
759
+ outputs=[audio_output, tts_status]
760
  )
761
 
762
  # Examples section
 
1001
  info="Allow users to upload files for context"
1002
  )
1003
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1004
  # Configuration actions
1005
  with gr.Row():
1006
  save_btn = gr.Button("💾 Save Configuration", variant="primary")
 
1008
 
1009
  config_status = gr.Markdown()
1010
 
1011
+ def save_configuration(name, description, system_prompt, model, language, temp, tokens, examples, grounding_urls, enable_dynamic_urls, enable_file_upload):
1012
  """Save updated configuration"""
1013
  try:
1014
  updated_config = config.copy()
 
1024
  'grounding_urls': [url.strip() for url in grounding_urls.split('\n') if url.strip()],
1025
  'enable_dynamic_urls': enable_dynamic_urls,
1026
  'enable_file_upload': enable_file_upload,
 
 
 
1027
  'locked': config.get('locked', False)
1028
  })
1029
 
 
1064
  save_configuration,
1065
  inputs=[edit_name, edit_description, edit_system_prompt, edit_model, edit_language,
1066
  edit_temperature, edit_max_tokens, edit_examples, edit_grounding_urls,
1067
+ edit_enable_dynamic_urls, edit_enable_file_upload],
1068
  outputs=[config_status]
1069
  )
1070
 
 
1084
  '\n'.join(DEFAULT_CONFIG['grounding_urls']),
1085
  DEFAULT_CONFIG['enable_dynamic_urls'],
1086
  DEFAULT_CONFIG['enable_file_upload'],
 
 
 
1087
  "✅ Reset to default configuration"
1088
  )
1089
  else:
1090
+ return (*[gr.update() for _ in range(11)], "❌ Failed to reset")
1091
  except Exception as e:
1092
+ return (*[gr.update() for _ in range(11)], f"❌ Error: {str(e)}")
1093
 
1094
  reset_btn.click(
1095
  reset_configuration,
1096
  outputs=[edit_name, edit_description, edit_system_prompt, edit_model, edit_language,
1097
  edit_temperature, edit_max_tokens, edit_examples, edit_grounding_urls,
1098
+ edit_enable_dynamic_urls, edit_enable_file_upload, config_status]
1099
  )
1100
 
1101
  # Configuration tab authentication handler
config.json CHANGED
@@ -20,8 +20,10 @@
20
  ],
21
  "enable_dynamic_urls": true,
22
  "enable_file_upload": true,
23
- "enable_tts": true,
24
- "tts_model": "openai/tts-1-hd",
25
- "tts_voice": "onyx",
26
- "theme": "Base"
 
 
27
  }
 
20
  ],
21
  "enable_dynamic_urls": true,
22
  "enable_file_upload": true,
23
+ "enable_tts": false,
24
+ "tts_spaces": [
25
+ "facebook/mms-tts-eng",
26
+ "microsoft/speecht5-tts-demo"
27
+ ],
28
+ "theme": "Default"
29
  }
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  gradio>=5.39.0
 
2
  requests>=2.32.3
3
  beautifulsoup4>=4.12.3
4
  python-dotenv>=1.0.0
5
- huggingface-hub>=0.20.0
6
- numpy>=1.24.0
 
1
  gradio>=5.39.0
2
+ gradio_client>=1.0.0
3
  requests>=2.32.3
4
  beautifulsoup4>=4.12.3
5
  python-dotenv>=1.0.0
6
+ huggingface-hub>=0.20.0