Edmon02 commited on
Commit
d2f6021
·
1 Parent(s): 9fb8195

Refactor: Simplify TTS application for HuggingFace Spaces with improved error handling and interface

Browse files
Files changed (7) hide show
  1. DEPLOYMENT_FIX_SUMMARY.md +98 -0
  2. app.py +120 -329
  3. app_deploy.py +170 -0
  4. app_optimized.py +6 -3
  5. app_simple.py +210 -0
  6. deploy.py +6 -2
  7. requirements.txt +2 -2
DEPLOYMENT_FIX_SUMMARY.md ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # HuggingFace Spaces Deployment Fix
2
+
3
+ ## Issues Identified and Fixed
4
+
5
+ ### 1. Gradio JSON Schema Error
6
+ **Error**: `TypeError: argument of type 'bool' is not iterable`
7
+
8
+ **Root Cause**: The error occurred in Gradio's JSON schema processing when trying to check `if "const" in schema:` where `schema` was a boolean instead of a dictionary.
9
+
10
+ **Fixes Applied**:
11
+ - Updated Gradio version to a more stable release (4.20.0)
12
+ - Simplified the interface using `gr.Interface` instead of complex `gr.Blocks`
13
+ - Disabled example caching (`cache_examples=False`)
14
+ - Disabled flagging (`allow_flagging="never"`)
15
+ - Removed `share=True` parameter (not supported on HF Spaces)
16
+
17
+ ### 2. Import and Dependency Issues
18
+ **Fixes Applied**:
19
+ - Added robust fallback import system
20
+ - Created dummy pipeline for testing when imports fail
21
+ - Improved error handling throughout the application
22
+ - Added proper sys.path management for src imports
23
+
24
+ ### 3. HuggingFace Spaces Compatibility
25
+ **Fixes Applied**:
26
+ - Set `share=False` (share links not supported on HF Spaces)
27
+ - Used standard server configuration (`0.0.0.0:7860`)
28
+ - Simplified interface structure
29
+ - Added proper error boundaries
30
+
31
+ ## Files Modified
32
+
33
+ 1. **`app.py`** - Main deployment file with robust error handling
34
+ 2. **`app_deploy.py`** - Clean deployment version
35
+ 3. **`app_simple.py`** - Simplified alternative
36
+ 4. **`requirements.txt`** - Updated Gradio version
37
+ 5. **`deploy.py`** - Enhanced deployment script
38
+
39
+ ## Deployment Steps
40
+
41
+ 1. **Test Locally** (optional):
42
+ ```bash
43
+ python app.py
44
+ ```
45
+
46
+ 2. **Deploy to HuggingFace Spaces**:
47
+ ```bash
48
+ git add .
49
+ git commit -m "Fix Gradio schema errors and improve compatibility"
50
+ git push
51
+ ```
52
+
53
+ ## Key Changes Made
54
+
55
+ ### App Structure
56
+ - Switched from `gr.Blocks` to `gr.Interface` for better compatibility
57
+ - Simplified input/output definitions
58
+ - Removed complex state management
59
+
60
+ ### Error Handling
61
+ - Added comprehensive try-catch blocks
62
+ - Created fallback pipeline for testing
63
+ - Improved logging throughout
64
+
65
+ ### Dependencies
66
+ - Pinned Gradio to stable version
67
+ - Maintained all core ML dependencies
68
+ - Added proper import fallbacks
69
+
70
+ ### Configuration
71
+ - Disabled problematic features (share, caching, flagging)
72
+ - Set proper server configuration for HF Spaces
73
+ - Simplified launch parameters
74
+
75
+ ## Testing the Fix
76
+
77
+ The fixed version should:
78
+ 1. ✅ Load without JSON schema errors
79
+ 2. ✅ Handle import failures gracefully
80
+ 3. ✅ Work on HuggingFace Spaces infrastructure
81
+ 4. ✅ Provide fallback functionality when models fail to load
82
+ 5. ✅ Display proper error messages to users
83
+
84
+ ## Backup Files
85
+
86
+ - `app_original.py` - Your original application
87
+ - `app_optimized.py` - The optimized version (fixed)
88
+ - `app_simple.py` - Simplified version
89
+ - `app_deploy.py` - Final deployment version
90
+
91
+ ## If Issues Persist
92
+
93
+ 1. Check HuggingFace Spaces logs for specific errors
94
+ 2. Verify all dependencies are properly installed
95
+ 3. Test with the simple version (`app_simple.py`)
96
+ 4. Contact HF support if infrastructure issues persist
97
+
98
+ The main fix addresses the Gradio JSON schema error by simplifying the interface structure and using compatible Gradio features.
app.py CHANGED
@@ -1,379 +1,170 @@
1
  """
2
- Optimized SpeechT5 Armenian TTS Application
3
- ==========================================
4
 
5
- High-performance Gradio application with advanced optimization features.
6
  """
7
 
8
  import gradio as gr
9
  import numpy as np
10
  import logging
11
  import time
12
- from typing import Tuple, Optional
13
  import os
14
  import sys
 
15
 
16
- # Add src to path for imports
17
- current_dir = os.path.dirname(os.path.abspath(__file__))
18
- src_path = os.path.join(current_dir, 'src')
19
- if src_path not in sys.path:
20
- sys.path.insert(0, src_path)
21
-
22
- try:
23
- from src.pipeline import TTSPipeline
24
- except ImportError as e:
25
- logging.error(f"Failed to import pipeline: {e}")
26
- # Fallback import attempt
27
- sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
28
- from src.pipeline import TTSPipeline
29
-
30
- # Configure logging
31
  logging.basicConfig(
32
  level=logging.INFO,
33
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
34
  )
35
  logger = logging.getLogger(__name__)
36
 
37
- # Global pipeline instance
38
- tts_pipeline: Optional[TTSPipeline] = None
39
-
40
 
41
- def initialize_pipeline():
42
- """Initialize the TTS pipeline with error handling."""
43
- global tts_pipeline
44
 
45
  try:
 
 
 
 
 
 
 
 
 
46
  logger.info("Initializing TTS Pipeline...")
47
- tts_pipeline = TTSPipeline(
48
  model_checkpoint="Edmon02/TTS_NB_2",
49
- max_chunk_length=200, # Optimal for 5-20s clips
50
  crossfade_duration=0.1,
51
  use_mixed_precision=True
52
  )
53
 
54
- # Apply production optimizations
55
- tts_pipeline.optimize_for_production()
56
-
57
- logger.info("TTS Pipeline initialized successfully")
58
  return True
59
 
60
  except Exception as e:
61
- logger.error(f"Failed to initialize TTS pipeline: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  return False
63
 
64
 
65
- def predict(text: str, speaker: str,
66
- enable_chunking: bool = True,
67
- apply_processing: bool = True) -> Tuple[int, np.ndarray]:
68
  """
69
- Main prediction function with optimization and error handling.
70
 
71
  Args:
72
- text: Input text to synthesize
73
- speaker: Speaker selection
74
- enable_chunking: Whether to enable intelligent chunking
75
- apply_processing: Whether to apply audio post-processing
76
 
77
  Returns:
78
- Tuple of (sample_rate, audio_array)
79
  """
80
- global tts_pipeline
 
 
 
81
 
82
- start_time = time.time()
 
 
83
 
84
  try:
85
- # Validate inputs
86
- if not text or not text.strip():
87
- logger.warning("Empty text provided")
88
- return 16000, np.zeros(0, dtype=np.int16)
89
-
90
- if tts_pipeline is None:
91
- logger.error("TTS pipeline not initialized")
92
- return 16000, np.zeros(0, dtype=np.int16)
93
-
94
- # Extract speaker code from selection
95
- speaker_code = speaker.split("(")[0].strip()
96
-
97
- # Log request
98
- logger.info(f"Processing request: {len(text)} chars, speaker: {speaker_code}")
99
 
100
- # Synthesize speech
101
- sample_rate, audio = tts_pipeline.synthesize(
102
  text=text,
103
- speaker=speaker_code,
104
- enable_chunking=enable_chunking,
105
- apply_audio_processing=apply_processing
106
  )
107
 
108
- # Log performance
109
- total_time = time.time() - start_time
110
- audio_duration = len(audio) / sample_rate if len(audio) > 0 else 0
111
- rtf = total_time / audio_duration if audio_duration > 0 else float('inf')
112
-
113
- logger.info(f"Request completed in {total_time:.3f}s (RTF: {rtf:.2f})")
114
 
115
  return sample_rate, audio
116
 
117
  except Exception as e:
118
- logger.error(f"Prediction failed: {e}")
119
- return 16000, np.zeros(0, dtype=np.int16)
120
-
121
-
122
- def get_performance_info() -> str:
123
- """Get performance statistics as formatted string."""
124
- global tts_pipeline
125
-
126
- if tts_pipeline is None:
127
- return "Pipeline not initialized"
128
-
129
- try:
130
- stats = tts_pipeline.get_performance_stats()
131
-
132
- info = f"""
133
- **Performance Statistics:**
134
- - Total Inferences: {stats['pipeline_stats']['total_inferences']}
135
- - Average Processing Time: {stats['pipeline_stats']['avg_processing_time']:.3f}s
136
- - Translation Cache Size: {stats['text_processor_stats']['translation_cache_size']}
137
- - Model Inferences: {stats['model_stats']['total_inferences']}
138
- - Average Model Time: {stats['model_stats'].get('avg_inference_time', 0):.3f}s
139
- """
140
-
141
- return info.strip()
142
-
143
- except Exception as e:
144
- return f"Error getting performance info: {e}"
145
-
146
-
147
- def health_check() -> str:
148
- """Perform system health check."""
149
- global tts_pipeline
150
-
151
- if tts_pipeline is None:
152
- return "❌ Pipeline not initialized"
153
-
154
- try:
155
- health = tts_pipeline.health_check()
156
-
157
- if health["status"] == "healthy":
158
- return "✅ All systems operational"
159
- elif health["status"] == "degraded":
160
- return "⚠️ Some components have issues"
161
- else:
162
- return f"❌ System error: {health.get('error', 'Unknown error')}"
163
-
164
- except Exception as e:
165
- return f" Health check failed: {e}"
166
-
167
-
168
- # Application metadata
169
- TITLE = "🎤 SpeechT5 Armenian TTS - Optimized"
170
-
171
- DESCRIPTION = """
172
- # High-Performance Armenian Text-to-Speech
173
-
174
- This is an **optimized version** of SpeechT5 for Armenian language synthesis, featuring:
175
-
176
- ### 🚀 **Performance Optimizations**
177
- - **Intelligent Text Chunking**: Handles long texts by splitting them intelligently at sentence boundaries
178
- - **Caching**: Translation and embedding caching for faster repeated requests
179
- - **Mixed Precision**: GPU optimization with FP16 inference when available
180
- - **Crossfading**: Smooth audio transitions between chunks for natural-sounding longer texts
181
-
182
- ### 🎯 **Advanced Features**
183
- - **Smart Text Processing**: Automatic number-to-word conversion with Armenian translation
184
- - **Audio Post-Processing**: Noise gating, normalization, and dynamic range optimization
185
- - **Robust Error Handling**: Graceful fallbacks and comprehensive logging
186
- - **Real-time Performance Monitoring**: Track processing times and system health
187
-
188
- ### 📝 **Usage Tips**
189
- - **Short texts** (< 200 chars): Processed directly for maximum speed
190
- - **Long texts**: Automatically chunked with overlap for seamless audio
191
- - **Numbers**: Automatically converted to Armenian words
192
- - **Performance**: Enable chunking for texts longer than a few sentences
193
-
194
- ### 🎵 **Audio Quality**
195
- - Sample Rate: 16 kHz
196
- - Optimized for natural prosody and clear pronunciation
197
- - Cross-fade transitions for multi-chunk synthesis
198
-
199
- The model was trained on short clips (5-20s) but uses advanced algorithms to handle longer texts effectively.
200
- """
201
-
202
- EXAMPLES = [
203
- # Short examples for quick testing
204
- ["Բարև ձեզ, ինչպե՞ս եք:", "BDL (male)", True, True],
205
- ["Այսօր գեղեցիկ օր է:", "BDL (male)", False, True],
206
-
207
- # Medium examples demonstrating chunking
208
- ["Հայաստանն ունի հարուստ պատմություն և մշակույթ: Երևանը մայրաքաղաքն է, որն ունի 2800 տարվա պատմություն:", "BDL (male)", True, True],
209
-
210
- # Long example with numbers
211
- ["Արարատ լեռը բարձրությունը 5165 մետր է: Այն Հայաստանի խորհրդանիշն է և գտնվում է Թուրքիայի տարածքում: Լեռան վրա ըստ Աստվածաշնչի՝ կանգնել է Նոյի տապանը 40 օրվա ջրհեղեղից հետո:", "BDL (male)", True, True],
212
-
213
- # Technical example
214
- ["Մեքենայի շարժիչը 150 ձիուժ է և 2.0 լիտր ծավալ ունի: Այն կարող է արագացնել 0-ից 100 կմ/ժ 8.5 վայրկյանում:", "BDL (male)", True, True],
215
- ]
216
-
217
- # Custom CSS for better styling
218
- CUSTOM_CSS = """
219
- .gradio-container {
220
- max-width: 1200px !important;
221
- margin: auto !important;
222
- }
223
-
224
- .performance-info {
225
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
226
- padding: 15px;
227
- border-radius: 10px;
228
- color: white;
229
- margin: 10px 0;
230
- }
231
-
232
- .health-status {
233
- padding: 10px;
234
- border-radius: 8px;
235
- margin: 10px 0;
236
- font-weight: bold;
237
- }
238
-
239
- .status-healthy { background-color: #d4edda; color: #155724; }
240
- .status-warning { background-color: #fff3cd; color: #856404; }
241
- .status-error { background-color: #f8d7da; color: #721c24; }
242
- """
243
-
244
-
245
- def create_interface():
246
- """Create and configure the Gradio interface."""
247
-
248
- with gr.Blocks(
249
- theme=gr.themes.Soft(),
250
- css=CUSTOM_CSS,
251
- title="SpeechT5 Armenian TTS"
252
- ) as interface:
253
-
254
- # Header
255
- gr.Markdown(f"# {TITLE}")
256
- gr.Markdown(DESCRIPTION)
257
-
258
- with gr.Row():
259
- with gr.Column(scale=2):
260
- # Main input controls
261
- text_input = gr.Textbox(
262
- label="📝 Input Text (Armenian)",
263
- placeholder="Մուտքագրեք ձեր տեքստը այստեղ...",
264
- lines=3,
265
- max_lines=10
266
- )
267
-
268
- with gr.Row():
269
- speaker_input = gr.Radio(
270
- label="🎭 Speaker",
271
- choices=["BDL (male)"],
272
- value="BDL (male)"
273
- )
274
-
275
- with gr.Row():
276
- chunking_checkbox = gr.Checkbox(
277
- label="🧩 Enable Intelligent Chunking",
278
- value=True,
279
- info="Automatically split long texts for better quality"
280
- )
281
- processing_checkbox = gr.Checkbox(
282
- label="🎚️ Apply Audio Processing",
283
- value=True,
284
- info="Apply noise gating, normalization, and crossfading"
285
- )
286
-
287
- # Generate button
288
- generate_btn = gr.Button(
289
- "🎤 Generate Speech",
290
- variant="primary",
291
- size="lg"
292
- )
293
-
294
- with gr.Column(scale=1):
295
- # System information panel
296
- gr.Markdown("### 📊 System Status")
297
-
298
- health_display = gr.Textbox(
299
- label="Health Status",
300
- value="Initializing...",
301
- interactive=False,
302
- max_lines=1
303
- )
304
-
305
- performance_display = gr.Textbox(
306
- label="Performance Stats",
307
- value="No data yet",
308
- interactive=False,
309
- max_lines=8
310
- )
311
-
312
- refresh_btn = gr.Button("🔄 Refresh Stats", size="sm")
313
-
314
- # Output
315
- audio_output = gr.Audio(
316
- label="🔊 Generated Speech",
317
- type="numpy",
318
- interactive=False
319
- )
320
-
321
- # Examples section
322
- gr.Markdown("### 💡 Example Texts")
323
- gr.Examples(
324
- examples=EXAMPLES,
325
- inputs=[text_input, speaker_input, chunking_checkbox, processing_checkbox],
326
- outputs=[audio_output],
327
- fn=predict,
328
- label="Click any example to try it:"
329
- )
330
-
331
- # Event handlers
332
- generate_btn.click(
333
- fn=predict,
334
- inputs=[text_input, speaker_input, chunking_checkbox, processing_checkbox],
335
- outputs=[audio_output],
336
- show_progress="full"
337
- )
338
-
339
- refresh_btn.click(
340
- fn=lambda: (health_check(), get_performance_info()),
341
- outputs=[health_display, performance_display],
342
- show_progress="minimal"
343
- )
344
-
345
- # Auto-refresh health status on load
346
- interface.load(
347
- fn=lambda: (health_check(), get_performance_info()),
348
- outputs=[health_display, performance_display]
349
- )
350
-
351
- return interface
352
-
353
-
354
- def main():
355
- """Main application entry point."""
356
- logger.info("Starting SpeechT5 Armenian TTS Application")
357
-
358
- # Initialize pipeline
359
- if not initialize_pipeline():
360
- logger.error("Failed to initialize TTS pipeline - exiting")
361
- sys.exit(1)
362
-
363
- # Create and launch interface
364
- interface = create_interface()
365
-
366
- # Launch with optimized settings
367
- interface.launch(
368
- share=True,
369
- inbrowser=False,
370
- show_error=True,
371
- quiet=False,
372
- server_name="0.0.0.0", # Allow external connections
373
- server_port=7860, # Standard Gradio port
374
- max_threads=4, # Limit concurrent requests
375
- )
376
-
377
 
 
378
  if __name__ == "__main__":
379
- main()
 
 
 
 
 
1
  """
2
+ SpeechT5 Armenian TTS - Production Deployment
3
+ ============================================
4
 
5
+ Production-ready version for HuggingFace Spaces with robust error handling.
6
  """
7
 
8
  import gradio as gr
9
  import numpy as np
10
  import logging
11
  import time
 
12
  import os
13
  import sys
14
+ from typing import Tuple, Optional, Union
15
 
16
+ # Setup logging first
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  logging.basicConfig(
18
  level=logging.INFO,
19
+ format='%(asctime)s - %(levelname)s - %(message)s'
20
  )
21
  logger = logging.getLogger(__name__)
22
 
23
+ # Global pipeline variable
24
+ pipeline = None
 
25
 
26
+ def safe_import():
27
+ """Safely import the TTS pipeline with fallbacks."""
28
+ global pipeline
29
 
30
  try:
31
+ # Add src to path
32
+ current_dir = os.path.dirname(os.path.abspath(__file__))
33
+ src_path = os.path.join(current_dir, 'src')
34
+ if src_path not in sys.path:
35
+ sys.path.insert(0, src_path)
36
+
37
+ # Import pipeline
38
+ from src.pipeline import TTSPipeline
39
+
40
  logger.info("Initializing TTS Pipeline...")
41
+ pipeline = TTSPipeline(
42
  model_checkpoint="Edmon02/TTS_NB_2",
43
+ max_chunk_length=200,
44
  crossfade_duration=0.1,
45
  use_mixed_precision=True
46
  )
47
 
48
+ # Optimize for production
49
+ pipeline.optimize_for_production()
50
+ logger.info("TTS Pipeline ready")
 
51
  return True
52
 
53
  except Exception as e:
54
+ logger.error(f"Failed to initialize pipeline: {e}")
55
+ logger.info("Creating fallback pipeline for testing")
56
+
57
+ # Create a simple fallback
58
+ class FallbackPipeline:
59
+ def synthesize(self, text, **kwargs):
60
+ # Generate simple tone as placeholder
61
+ duration = min(len(text) * 0.08, 3.0)
62
+ sample_rate = 16000
63
+ samples = int(duration * sample_rate)
64
+ t = np.linspace(0, duration, samples)
65
+ # Create a simple beep
66
+ audio = np.sin(2 * np.pi * 440 * t) * 0.3
67
+ return sample_rate, (audio * 32767).astype(np.int16)
68
+
69
+ pipeline = FallbackPipeline()
70
  return False
71
 
72
 
73
+ def generate_audio(text: str) -> Tuple[int, np.ndarray]:
 
 
74
  """
75
+ Generate audio from Armenian text.
76
 
77
  Args:
78
+ text: Armenian text to synthesize
 
 
 
79
 
80
  Returns:
81
+ Tuple of (sample_rate, audio_data)
82
  """
83
+ if not text or not text.strip():
84
+ logger.warning("Empty text provided")
85
+ # Return silence
86
+ return 16000, np.zeros(1000, dtype=np.int16)
87
 
88
+ if pipeline is None:
89
+ logger.error("Pipeline not available")
90
+ return 16000, np.zeros(1000, dtype=np.int16)
91
 
92
  try:
93
+ logger.info(f"Processing: {text[:50]}...")
94
+ start_time = time.time()
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
+ # Synthesize with basic parameters
97
+ sample_rate, audio = pipeline.synthesize(
98
  text=text,
99
+ speaker="BDL",
100
+ enable_chunking=True,
101
+ apply_audio_processing=True
102
  )
103
 
104
+ duration = time.time() - start_time
105
+ logger.info(f"Generated {len(audio)} samples in {duration:.2f}s")
 
 
 
 
106
 
107
  return sample_rate, audio
108
 
109
  except Exception as e:
110
+ logger.error(f"Synthesis error: {e}")
111
+ # Return silence on error
112
+ return 16000, np.zeros(1000, dtype=np.int16)
113
+
114
+
115
+ # Initialize the pipeline
116
+ logger.info("Starting TTS application...")
117
+ initialization_success = safe_import()
118
+
119
+ if initialization_success:
120
+ status_message = "✅ TTS System Ready"
121
+ else:
122
+ status_message = "⚠️ Running in Test Mode (Limited Functionality)"
123
+
124
+ # Create the Gradio interface using the simpler gr.Interface
125
+ demo = gr.Interface(
126
+ fn=generate_audio,
127
+ inputs=gr.Textbox(
128
+ label="Armenian Text",
129
+ placeholder="Գրեք ձեր տեքստը այստեղ...",
130
+ lines=3,
131
+ max_lines=8
132
+ ),
133
+ outputs=gr.Audio(
134
+ label="Generated Speech",
135
+ type="numpy"
136
+ ),
137
+ title="🎤 Armenian Text-to-Speech",
138
+ description=f"""
139
+ {status_message}
140
+
141
+ Convert Armenian text to speech using SpeechT5.
142
+
143
+ **How to use:**
144
+ 1. Enter Armenian text in the box below
145
+ 2. Click Submit to generate speech
146
+ 3. Play the generated audio
147
+
148
+ **Tips:**
149
+ - Use standard Armenian script
150
+ - Shorter sentences work better
151
+ - Include punctuation for natural pauses
152
+ """,
153
+ examples=[
154
+ "Բարև ձեզ:",
155
+ "Ինչպե՞ս եք:",
156
+ "Շնորհակալություն:",
157
+ "Կեցցե՛ Հայաստանը:",
158
+ "Այսօր լավ օր է:"
159
+ ],
160
+ theme=gr.themes.Default(),
161
+ allow_flagging="never"
162
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
+ # Launch the app
165
  if __name__ == "__main__":
166
+ demo.launch(
167
+ server_name="0.0.0.0",
168
+ server_port=7860,
169
+ share=False
170
+ )
app_deploy.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SpeechT5 Armenian TTS - Production Deployment
3
+ ============================================
4
+
5
+ Production-ready version for HuggingFace Spaces with robust error handling.
6
+ """
7
+
8
+ import gradio as gr
9
+ import numpy as np
10
+ import logging
11
+ import time
12
+ import os
13
+ import sys
14
+ from typing import Tuple, Optional, Union
15
+
16
+ # Setup logging first
17
+ logging.basicConfig(
18
+ level=logging.INFO,
19
+ format='%(asctime)s - %(levelname)s - %(message)s'
20
+ )
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Global pipeline variable
24
+ pipeline = None
25
+
26
+ def safe_import():
27
+ """Safely import the TTS pipeline with fallbacks."""
28
+ global pipeline
29
+
30
+ try:
31
+ # Add src to path
32
+ current_dir = os.path.dirname(os.path.abspath(__file__))
33
+ src_path = os.path.join(current_dir, 'src')
34
+ if src_path not in sys.path:
35
+ sys.path.insert(0, src_path)
36
+
37
+ # Import pipeline
38
+ from src.pipeline import TTSPipeline
39
+
40
+ logger.info("Initializing TTS Pipeline...")
41
+ pipeline = TTSPipeline(
42
+ model_checkpoint="Edmon02/TTS_NB_2",
43
+ max_chunk_length=200,
44
+ crossfade_duration=0.1,
45
+ use_mixed_precision=True
46
+ )
47
+
48
+ # Optimize for production
49
+ pipeline.optimize_for_production()
50
+ logger.info("TTS Pipeline ready")
51
+ return True
52
+
53
+ except Exception as e:
54
+ logger.error(f"Failed to initialize pipeline: {e}")
55
+ logger.info("Creating fallback pipeline for testing")
56
+
57
+ # Create a simple fallback
58
+ class FallbackPipeline:
59
+ def synthesize(self, text, **kwargs):
60
+ # Generate simple tone as placeholder
61
+ duration = min(len(text) * 0.08, 3.0)
62
+ sample_rate = 16000
63
+ samples = int(duration * sample_rate)
64
+ t = np.linspace(0, duration, samples)
65
+ # Create a simple beep
66
+ audio = np.sin(2 * np.pi * 440 * t) * 0.3
67
+ return sample_rate, (audio * 32767).astype(np.int16)
68
+
69
+ pipeline = FallbackPipeline()
70
+ return False
71
+
72
+
73
+ def generate_audio(text: str) -> Tuple[int, np.ndarray]:
74
+ """
75
+ Generate audio from Armenian text.
76
+
77
+ Args:
78
+ text: Armenian text to synthesize
79
+
80
+ Returns:
81
+ Tuple of (sample_rate, audio_data)
82
+ """
83
+ if not text or not text.strip():
84
+ logger.warning("Empty text provided")
85
+ # Return silence
86
+ return 16000, np.zeros(1000, dtype=np.int16)
87
+
88
+ if pipeline is None:
89
+ logger.error("Pipeline not available")
90
+ return 16000, np.zeros(1000, dtype=np.int16)
91
+
92
+ try:
93
+ logger.info(f"Processing: {text[:50]}...")
94
+ start_time = time.time()
95
+
96
+ # Synthesize with basic parameters
97
+ sample_rate, audio = pipeline.synthesize(
98
+ text=text,
99
+ speaker="BDL",
100
+ enable_chunking=True,
101
+ apply_audio_processing=True
102
+ )
103
+
104
+ duration = time.time() - start_time
105
+ logger.info(f"Generated {len(audio)} samples in {duration:.2f}s")
106
+
107
+ return sample_rate, audio
108
+
109
+ except Exception as e:
110
+ logger.error(f"Synthesis error: {e}")
111
+ # Return silence on error
112
+ return 16000, np.zeros(1000, dtype=np.int16)
113
+
114
+
115
+ # Initialize the pipeline
116
+ logger.info("Starting TTS application...")
117
+ initialization_success = safe_import()
118
+
119
+ if initialization_success:
120
+ status_message = "✅ TTS System Ready"
121
+ else:
122
+ status_message = "⚠️ Running in Test Mode (Limited Functionality)"
123
+
124
+ # Create the Gradio interface using the simpler gr.Interface
125
+ demo = gr.Interface(
126
+ fn=generate_audio,
127
+ inputs=gr.Textbox(
128
+ label="Armenian Text",
129
+ placeholder="Գրեք ձեր տեքստը այստեղ...",
130
+ lines=3,
131
+ max_lines=8
132
+ ),
133
+ outputs=gr.Audio(
134
+ label="Generated Speech",
135
+ type="numpy"
136
+ ),
137
+ title="🎤 Armenian Text-to-Speech",
138
+ description=f"""
139
+ {status_message}
140
+
141
+ Convert Armenian text to speech using SpeechT5.
142
+
143
+ **How to use:**
144
+ 1. Enter Armenian text in the box below
145
+ 2. Click Submit to generate speech
146
+ 3. Play the generated audio
147
+
148
+ **Tips:**
149
+ - Use standard Armenian script
150
+ - Shorter sentences work better
151
+ - Include punctuation for natural pauses
152
+ """,
153
+ examples=[
154
+ "Բարև ձեզ:",
155
+ "Ինչպե՞ս եք:",
156
+ "Շնորհակալություն:",
157
+ "Կեցցե՛ Հայաստանը:",
158
+ "Այսօր լավ օր է:"
159
+ ],
160
+ theme=gr.themes.Default(),
161
+ allow_flagging="never"
162
+ )
163
+
164
+ # Launch the app
165
+ if __name__ == "__main__":
166
+ demo.launch(
167
+ server_name="0.0.0.0",
168
+ server_port=7860,
169
+ share=False
170
+ )
app_optimized.py CHANGED
@@ -320,11 +320,14 @@ def create_interface():
320
 
321
  # Examples section
322
  gr.Markdown("### 💡 Example Texts")
323
- gr.Examples(
 
 
324
  examples=EXAMPLES,
325
  inputs=[text_input, speaker_input, chunking_checkbox, processing_checkbox],
326
- outputs=[audio_output],
327
  fn=predict,
 
328
  label="Click any example to try it:"
329
  )
330
 
@@ -365,7 +368,7 @@ def main():
365
 
366
  # Launch with optimized settings
367
  interface.launch(
368
- share=True,
369
  inbrowser=False,
370
  show_error=True,
371
  quiet=False,
 
320
 
321
  # Examples section
322
  gr.Markdown("### 💡 Example Texts")
323
+
324
+ # Use simpler Examples component to avoid schema issues
325
+ examples = gr.Examples(
326
  examples=EXAMPLES,
327
  inputs=[text_input, speaker_input, chunking_checkbox, processing_checkbox],
328
+ outputs=audio_output,
329
  fn=predict,
330
+ cache_examples=False, # Disable caching to avoid schema issues
331
  label="Click any example to try it:"
332
  )
333
 
 
368
 
369
  # Launch with optimized settings
370
  interface.launch(
371
+ share=False, # Disable share for HF Spaces
372
  inbrowser=False,
373
  show_error=True,
374
  quiet=False,
app_simple.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SpeechT5 Armenian TTS - HuggingFace Spaces Deployment Version
3
+ ============================================================
4
+
5
+ Simplified and optimized for HuggingFace Spaces deployment.
6
+ """
7
+
8
+ import gradio as gr
9
+ import numpy as np
10
+ import logging
11
+ import time
12
+ from typing import Tuple, Optional
13
+ import os
14
+ import sys
15
+
16
+ # Add src to path for imports
17
+ current_dir = os.path.dirname(os.path.abspath(__file__))
18
+ src_path = os.path.join(current_dir, 'src')
19
+ if src_path not in sys.path:
20
+ sys.path.insert(0, src_path)
21
+
22
+ try:
23
+ from src.pipeline import TTSPipeline
24
+ HAS_PIPELINE = True
25
+ except ImportError as e:
26
+ logging.error(f"Failed to import pipeline: {e}")
27
+ # Fallback import attempt
28
+ sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
29
+ try:
30
+ from src.pipeline import TTSPipeline
31
+ HAS_PIPELINE = True
32
+ except ImportError:
33
+ HAS_PIPELINE = False
34
+ # Create a dummy pipeline for testing
35
+ class TTSPipeline:
36
+ def __init__(self, *args, **kwargs):
37
+ pass
38
+ def synthesize(self, text, **kwargs):
39
+ # Return dummy audio for testing
40
+ duration = min(len(text) * 0.1, 5.0) # Approximate duration
41
+ sample_rate = 16000
42
+ samples = int(duration * sample_rate)
43
+ # Generate a simple sine wave as placeholder
44
+ t = np.linspace(0, duration, samples)
45
+ frequency = 440 # A4 note
46
+ audio = (np.sin(2 * np.pi * frequency * t) * 0.3).astype(np.float32)
47
+ return sample_rate, (audio * 32767).astype(np.int16)
48
+ def optimize_for_production(self):
49
+ pass
50
+
51
+ # Configure logging
52
+ logging.basicConfig(
53
+ level=logging.INFO,
54
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
55
+ )
56
+ logger = logging.getLogger(__name__)
57
+
58
+ # Global pipeline instance
59
+ tts_pipeline: Optional[TTSPipeline] = None
60
+
61
+
62
+ def initialize_pipeline():
63
+ """Initialize the TTS pipeline with error handling."""
64
+ global tts_pipeline
65
+
66
+ if not HAS_PIPELINE:
67
+ logger.warning("Pipeline not available - using dummy implementation")
68
+ tts_pipeline = TTSPipeline()
69
+ return True
70
+
71
+ try:
72
+ logger.info("Initializing TTS Pipeline...")
73
+ tts_pipeline = TTSPipeline(
74
+ model_checkpoint="Edmon02/TTS_NB_2",
75
+ max_chunk_length=200,
76
+ crossfade_duration=0.1,
77
+ use_mixed_precision=True
78
+ )
79
+
80
+ # Apply production optimizations
81
+ tts_pipeline.optimize_for_production()
82
+
83
+ logger.info("TTS Pipeline initialized successfully")
84
+ return True
85
+
86
+ except Exception as e:
87
+ logger.error(f"Failed to initialize TTS pipeline: {e}")
88
+ # Fallback to dummy pipeline
89
+ tts_pipeline = TTSPipeline()
90
+ return False
91
+
92
+
93
+ def generate_speech(text: str) -> Tuple[int, np.ndarray]:
94
+ """
95
+ Main synthesis function optimized for HF Spaces.
96
+
97
+ Args:
98
+ text: Input text to synthesize
99
+
100
+ Returns:
101
+ Tuple of (sample_rate, audio_array)
102
+ """
103
+ global tts_pipeline
104
+
105
+ start_time = time.time()
106
+
107
+ try:
108
+ # Validate inputs
109
+ if not text or not text.strip():
110
+ logger.warning("Empty text provided")
111
+ return 16000, np.zeros(1000, dtype=np.int16)
112
+
113
+ if tts_pipeline is None:
114
+ logger.error("TTS pipeline not initialized")
115
+ return 16000, np.zeros(1000, dtype=np.int16)
116
+
117
+ # Log request
118
+ logger.info(f"Processing request: {len(text)} characters")
119
+
120
+ # Synthesize speech with default settings
121
+ sample_rate, audio = tts_pipeline.synthesize(
122
+ text=text,
123
+ speaker="BDL",
124
+ enable_chunking=True,
125
+ apply_audio_processing=True
126
+ )
127
+
128
+ # Log performance
129
+ total_time = time.time() - start_time
130
+ logger.info(f"Request completed in {total_time:.3f}s")
131
+
132
+ return sample_rate, audio
133
+
134
+ except Exception as e:
135
+ logger.error(f"Synthesis failed: {e}")
136
+ return 16000, np.zeros(1000, dtype=np.int16)
137
+
138
+
139
+ # Create the Gradio interface
140
+ def create_app():
141
+ """Create the main Gradio application."""
142
+
143
+ # Simple interface definition
144
+ interface = gr.Interface(
145
+ fn=generate_speech,
146
+ inputs=[
147
+ gr.Textbox(
148
+ label="Armenian Text",
149
+ placeholder="Մուտքագրեք ձեր տեքստը այստեղ...",
150
+ lines=3,
151
+ max_lines=10
152
+ )
153
+ ],
154
+ outputs=[
155
+ gr.Audio(
156
+ label="Generated Speech",
157
+ type="numpy"
158
+ )
159
+ ],
160
+ title="🎤 SpeechT5 Armenian Text-to-Speech",
161
+ description="""
162
+ Convert Armenian text to natural speech using SpeechT5.
163
+
164
+ **Instructions:**
165
+ 1. Enter Armenian text in the input box
166
+ 2. Click Submit to generate speech
167
+ 3. Listen to the generated audio
168
+
169
+ **Tips:**
170
+ - Works best with standard Armenian orthography
171
+ - Shorter sentences produce better quality
172
+ - Include proper punctuation for natural pauses
173
+ """,
174
+ examples=[
175
+ ["Բարև ձեզ, ինչպե՞ս եք:"],
176
+ ["Այսօր գեղեցիկ օր է:"],
177
+ ["Հայաստանն ունի հարուստ պատմություն:"],
178
+ ["Երևանը Հայաստանի մայրաքաղաքն է:"],
179
+ ["Արարատ լեռը Հայաստանի խորհրդանիշն է:"]
180
+ ],
181
+ theme=gr.themes.Soft(),
182
+ allow_flagging="never", # Disable flagging to avoid schema issues
183
+ cache_examples=False # Disable example caching
184
+ )
185
+
186
+ return interface
187
+
188
+
189
+ def main():
190
+ """Main application entry point."""
191
+ logger.info("Starting SpeechT5 Armenian TTS Application")
192
+
193
+ # Initialize pipeline
194
+ if not initialize_pipeline():
195
+ logger.error("Failed to initialize TTS pipeline - continuing with limited functionality")
196
+
197
+ # Create and launch interface
198
+ app = create_app()
199
+
200
+ # Launch with HF Spaces settings
201
+ app.launch(
202
+ share=False, # Don't create share link on HF Spaces
203
+ server_name="0.0.0.0",
204
+ server_port=7860,
205
+ show_error=True
206
+ )
207
+
208
+
209
+ if __name__ == "__main__":
210
+ main()
deploy.py CHANGED
@@ -24,12 +24,16 @@ def backup_original():
24
 
25
  def deploy_optimized():
26
  """Deploy the optimized version."""
27
- if os.path.exists("app_optimized.py"):
 
 
 
 
28
  shutil.copy2("app_optimized.py", "app.py")
29
  print("✅ Optimized version deployed as app.py")
30
  print("🚀 Ready for Hugging Face Spaces deployment!")
31
  else:
32
- print("❌ app_optimized.py not found")
33
  return False
34
  return True
35
 
 
24
 
25
  def deploy_optimized():
26
  """Deploy the optimized version."""
27
+ if os.path.exists("app_simple.py"):
28
+ shutil.copy2("app_simple.py", "app.py")
29
+ print("✅ Simple optimized version deployed as app.py")
30
+ print("🚀 Ready for Hugging Face Spaces deployment!")
31
+ elif os.path.exists("app_optimized.py"):
32
  shutil.copy2("app_optimized.py", "app.py")
33
  print("✅ Optimized version deployed as app.py")
34
  print("🚀 Ready for Hugging Face Spaces deployment!")
35
  else:
36
+ print("❌ No optimized version found")
37
  return False
38
  return True
39
 
requirements.txt CHANGED
@@ -11,8 +11,8 @@ librosa==0.10.1
11
  soundfile==0.12.1
12
  scipy==1.11.4
13
 
14
- # Gradio and web interface (updated to latest stable)
15
- gradio==4.44.1
16
 
17
  # Text processing
18
  inflect==7.0.0
 
11
  soundfile==0.12.1
12
  scipy==1.11.4
13
 
14
+ # Gradio and web interface (stable version for HF Spaces)
15
+ gradio==4.20.0
16
 
17
  # Text processing
18
  inflect==7.0.0