Spaces:
Runtime error
Runtime error
Refactor: Simplify TTS application for HuggingFace Spaces with improved error handling and interface
Browse files- DEPLOYMENT_FIX_SUMMARY.md +98 -0
- app.py +120 -329
- app_deploy.py +170 -0
- app_optimized.py +6 -3
- app_simple.py +210 -0
- deploy.py +6 -2
- requirements.txt +2 -2
DEPLOYMENT_FIX_SUMMARY.md
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# HuggingFace Spaces Deployment Fix
|
2 |
+
|
3 |
+
## Issues Identified and Fixed
|
4 |
+
|
5 |
+
### 1. Gradio JSON Schema Error
|
6 |
+
**Error**: `TypeError: argument of type 'bool' is not iterable`
|
7 |
+
|
8 |
+
**Root Cause**: The error occurred in Gradio's JSON schema processing when trying to check `if "const" in schema:` where `schema` was a boolean instead of a dictionary.
|
9 |
+
|
10 |
+
**Fixes Applied**:
|
11 |
+
- Updated Gradio version to a more stable release (4.20.0)
|
12 |
+
- Simplified the interface using `gr.Interface` instead of complex `gr.Blocks`
|
13 |
+
- Disabled example caching (`cache_examples=False`)
|
14 |
+
- Disabled flagging (`allow_flagging="never"`)
|
15 |
+
- Removed `share=True` parameter (not supported on HF Spaces)
|
16 |
+
|
17 |
+
### 2. Import and Dependency Issues
|
18 |
+
**Fixes Applied**:
|
19 |
+
- Added robust fallback import system
|
20 |
+
- Created dummy pipeline for testing when imports fail
|
21 |
+
- Improved error handling throughout the application
|
22 |
+
- Added proper sys.path management for src imports
|
23 |
+
|
24 |
+
### 3. HuggingFace Spaces Compatibility
|
25 |
+
**Fixes Applied**:
|
26 |
+
- Set `share=False` (share links not supported on HF Spaces)
|
27 |
+
- Used standard server configuration (`0.0.0.0:7860`)
|
28 |
+
- Simplified interface structure
|
29 |
+
- Added proper error boundaries
|
30 |
+
|
31 |
+
## Files Modified
|
32 |
+
|
33 |
+
1. **`app.py`** - Main deployment file with robust error handling
|
34 |
+
2. **`app_deploy.py`** - Clean deployment version
|
35 |
+
3. **`app_simple.py`** - Simplified alternative
|
36 |
+
4. **`requirements.txt`** - Updated Gradio version
|
37 |
+
5. **`deploy.py`** - Enhanced deployment script
|
38 |
+
|
39 |
+
## Deployment Steps
|
40 |
+
|
41 |
+
1. **Test Locally** (optional):
|
42 |
+
```bash
|
43 |
+
python app.py
|
44 |
+
```
|
45 |
+
|
46 |
+
2. **Deploy to HuggingFace Spaces**:
|
47 |
+
```bash
|
48 |
+
git add .
|
49 |
+
git commit -m "Fix Gradio schema errors and improve compatibility"
|
50 |
+
git push
|
51 |
+
```
|
52 |
+
|
53 |
+
## Key Changes Made
|
54 |
+
|
55 |
+
### App Structure
|
56 |
+
- Switched from `gr.Blocks` to `gr.Interface` for better compatibility
|
57 |
+
- Simplified input/output definitions
|
58 |
+
- Removed complex state management
|
59 |
+
|
60 |
+
### Error Handling
|
61 |
+
- Added comprehensive try-catch blocks
|
62 |
+
- Created fallback pipeline for testing
|
63 |
+
- Improved logging throughout
|
64 |
+
|
65 |
+
### Dependencies
|
66 |
+
- Pinned Gradio to stable version
|
67 |
+
- Maintained all core ML dependencies
|
68 |
+
- Added proper import fallbacks
|
69 |
+
|
70 |
+
### Configuration
|
71 |
+
- Disabled problematic features (share, caching, flagging)
|
72 |
+
- Set proper server configuration for HF Spaces
|
73 |
+
- Simplified launch parameters
|
74 |
+
|
75 |
+
## Testing the Fix
|
76 |
+
|
77 |
+
The fixed version should:
|
78 |
+
1. ✅ Load without JSON schema errors
|
79 |
+
2. ✅ Handle import failures gracefully
|
80 |
+
3. ✅ Work on HuggingFace Spaces infrastructure
|
81 |
+
4. ✅ Provide fallback functionality when models fail to load
|
82 |
+
5. ✅ Display proper error messages to users
|
83 |
+
|
84 |
+
## Backup Files
|
85 |
+
|
86 |
+
- `app_original.py` - Your original application
|
87 |
+
- `app_optimized.py` - The optimized version (fixed)
|
88 |
+
- `app_simple.py` - Simplified version
|
89 |
+
- `app_deploy.py` - Final deployment version
|
90 |
+
|
91 |
+
## If Issues Persist
|
92 |
+
|
93 |
+
1. Check HuggingFace Spaces logs for specific errors
|
94 |
+
2. Verify all dependencies are properly installed
|
95 |
+
3. Test with the simple version (`app_simple.py`)
|
96 |
+
4. Contact HF support if infrastructure issues persist
|
97 |
+
|
98 |
+
The main fix addresses the Gradio JSON schema error by simplifying the interface structure and using compatible Gradio features.
|
app.py
CHANGED
@@ -1,379 +1,170 @@
|
|
1 |
"""
|
2 |
-
|
3 |
-
|
4 |
|
5 |
-
|
6 |
"""
|
7 |
|
8 |
import gradio as gr
|
9 |
import numpy as np
|
10 |
import logging
|
11 |
import time
|
12 |
-
from typing import Tuple, Optional
|
13 |
import os
|
14 |
import sys
|
|
|
15 |
|
16 |
-
#
|
17 |
-
current_dir = os.path.dirname(os.path.abspath(__file__))
|
18 |
-
src_path = os.path.join(current_dir, 'src')
|
19 |
-
if src_path not in sys.path:
|
20 |
-
sys.path.insert(0, src_path)
|
21 |
-
|
22 |
-
try:
|
23 |
-
from src.pipeline import TTSPipeline
|
24 |
-
except ImportError as e:
|
25 |
-
logging.error(f"Failed to import pipeline: {e}")
|
26 |
-
# Fallback import attempt
|
27 |
-
sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
|
28 |
-
from src.pipeline import TTSPipeline
|
29 |
-
|
30 |
-
# Configure logging
|
31 |
logging.basicConfig(
|
32 |
level=logging.INFO,
|
33 |
-
format='%(asctime)s - %(
|
34 |
)
|
35 |
logger = logging.getLogger(__name__)
|
36 |
|
37 |
-
# Global pipeline
|
38 |
-
|
39 |
-
|
40 |
|
41 |
-
def
|
42 |
-
"""
|
43 |
-
global
|
44 |
|
45 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
logger.info("Initializing TTS Pipeline...")
|
47 |
-
|
48 |
model_checkpoint="Edmon02/TTS_NB_2",
|
49 |
-
max_chunk_length=200,
|
50 |
crossfade_duration=0.1,
|
51 |
use_mixed_precision=True
|
52 |
)
|
53 |
|
54 |
-
#
|
55 |
-
|
56 |
-
|
57 |
-
logger.info("TTS Pipeline initialized successfully")
|
58 |
return True
|
59 |
|
60 |
except Exception as e:
|
61 |
-
logger.error(f"Failed to initialize
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
return False
|
63 |
|
64 |
|
65 |
-
def
|
66 |
-
enable_chunking: bool = True,
|
67 |
-
apply_processing: bool = True) -> Tuple[int, np.ndarray]:
|
68 |
"""
|
69 |
-
|
70 |
|
71 |
Args:
|
72 |
-
text:
|
73 |
-
speaker: Speaker selection
|
74 |
-
enable_chunking: Whether to enable intelligent chunking
|
75 |
-
apply_processing: Whether to apply audio post-processing
|
76 |
|
77 |
Returns:
|
78 |
-
Tuple of (sample_rate,
|
79 |
"""
|
80 |
-
|
|
|
|
|
|
|
81 |
|
82 |
-
|
|
|
|
|
83 |
|
84 |
try:
|
85 |
-
|
86 |
-
|
87 |
-
logger.warning("Empty text provided")
|
88 |
-
return 16000, np.zeros(0, dtype=np.int16)
|
89 |
-
|
90 |
-
if tts_pipeline is None:
|
91 |
-
logger.error("TTS pipeline not initialized")
|
92 |
-
return 16000, np.zeros(0, dtype=np.int16)
|
93 |
-
|
94 |
-
# Extract speaker code from selection
|
95 |
-
speaker_code = speaker.split("(")[0].strip()
|
96 |
-
|
97 |
-
# Log request
|
98 |
-
logger.info(f"Processing request: {len(text)} chars, speaker: {speaker_code}")
|
99 |
|
100 |
-
# Synthesize
|
101 |
-
sample_rate, audio =
|
102 |
text=text,
|
103 |
-
speaker=
|
104 |
-
enable_chunking=
|
105 |
-
apply_audio_processing=
|
106 |
)
|
107 |
|
108 |
-
|
109 |
-
|
110 |
-
audio_duration = len(audio) / sample_rate if len(audio) > 0 else 0
|
111 |
-
rtf = total_time / audio_duration if audio_duration > 0 else float('inf')
|
112 |
-
|
113 |
-
logger.info(f"Request completed in {total_time:.3f}s (RTF: {rtf:.2f})")
|
114 |
|
115 |
return sample_rate, audio
|
116 |
|
117 |
except Exception as e:
|
118 |
-
logger.error(f"
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
DESCRIPTION = """
|
172 |
-
# High-Performance Armenian Text-to-Speech
|
173 |
-
|
174 |
-
This is an **optimized version** of SpeechT5 for Armenian language synthesis, featuring:
|
175 |
-
|
176 |
-
### 🚀 **Performance Optimizations**
|
177 |
-
- **Intelligent Text Chunking**: Handles long texts by splitting them intelligently at sentence boundaries
|
178 |
-
- **Caching**: Translation and embedding caching for faster repeated requests
|
179 |
-
- **Mixed Precision**: GPU optimization with FP16 inference when available
|
180 |
-
- **Crossfading**: Smooth audio transitions between chunks for natural-sounding longer texts
|
181 |
-
|
182 |
-
### 🎯 **Advanced Features**
|
183 |
-
- **Smart Text Processing**: Automatic number-to-word conversion with Armenian translation
|
184 |
-
- **Audio Post-Processing**: Noise gating, normalization, and dynamic range optimization
|
185 |
-
- **Robust Error Handling**: Graceful fallbacks and comprehensive logging
|
186 |
-
- **Real-time Performance Monitoring**: Track processing times and system health
|
187 |
-
|
188 |
-
### 📝 **Usage Tips**
|
189 |
-
- **Short texts** (< 200 chars): Processed directly for maximum speed
|
190 |
-
- **Long texts**: Automatically chunked with overlap for seamless audio
|
191 |
-
- **Numbers**: Automatically converted to Armenian words
|
192 |
-
- **Performance**: Enable chunking for texts longer than a few sentences
|
193 |
-
|
194 |
-
### 🎵 **Audio Quality**
|
195 |
-
- Sample Rate: 16 kHz
|
196 |
-
- Optimized for natural prosody and clear pronunciation
|
197 |
-
- Cross-fade transitions for multi-chunk synthesis
|
198 |
-
|
199 |
-
The model was trained on short clips (5-20s) but uses advanced algorithms to handle longer texts effectively.
|
200 |
-
"""
|
201 |
-
|
202 |
-
EXAMPLES = [
|
203 |
-
# Short examples for quick testing
|
204 |
-
["Բարև ձեզ, ինչպե՞ս եք:", "BDL (male)", True, True],
|
205 |
-
["Այսօր գեղեցիկ օր է:", "BDL (male)", False, True],
|
206 |
-
|
207 |
-
# Medium examples demonstrating chunking
|
208 |
-
["Հայաստանն ունի հարուստ պատմություն և մշակույթ: Երևանը մայրաքաղաքն է, որն ունի 2800 տարվա պատմություն:", "BDL (male)", True, True],
|
209 |
-
|
210 |
-
# Long example with numbers
|
211 |
-
["Արարատ լեռը բարձրությունը 5165 մետր է: Այն Հայաստանի խորհրդանիշն է և գտնվում է Թուրքիայի տարածքում: Լեռան վրա ըստ Աստվածաշնչի՝ կանգնել է Նոյի տապանը 40 օրվա ջրհեղեղից հետո:", "BDL (male)", True, True],
|
212 |
-
|
213 |
-
# Technical example
|
214 |
-
["Մեքենայի շարժիչը 150 ձիուժ է և 2.0 լիտր ծավալ ունի: Այն կարող է արագացնել 0-ից 100 կմ/ժ 8.5 վայրկյանում:", "BDL (male)", True, True],
|
215 |
-
]
|
216 |
-
|
217 |
-
# Custom CSS for better styling
|
218 |
-
CUSTOM_CSS = """
|
219 |
-
.gradio-container {
|
220 |
-
max-width: 1200px !important;
|
221 |
-
margin: auto !important;
|
222 |
-
}
|
223 |
-
|
224 |
-
.performance-info {
|
225 |
-
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
226 |
-
padding: 15px;
|
227 |
-
border-radius: 10px;
|
228 |
-
color: white;
|
229 |
-
margin: 10px 0;
|
230 |
-
}
|
231 |
-
|
232 |
-
.health-status {
|
233 |
-
padding: 10px;
|
234 |
-
border-radius: 8px;
|
235 |
-
margin: 10px 0;
|
236 |
-
font-weight: bold;
|
237 |
-
}
|
238 |
-
|
239 |
-
.status-healthy { background-color: #d4edda; color: #155724; }
|
240 |
-
.status-warning { background-color: #fff3cd; color: #856404; }
|
241 |
-
.status-error { background-color: #f8d7da; color: #721c24; }
|
242 |
-
"""
|
243 |
-
|
244 |
-
|
245 |
-
def create_interface():
|
246 |
-
"""Create and configure the Gradio interface."""
|
247 |
-
|
248 |
-
with gr.Blocks(
|
249 |
-
theme=gr.themes.Soft(),
|
250 |
-
css=CUSTOM_CSS,
|
251 |
-
title="SpeechT5 Armenian TTS"
|
252 |
-
) as interface:
|
253 |
-
|
254 |
-
# Header
|
255 |
-
gr.Markdown(f"# {TITLE}")
|
256 |
-
gr.Markdown(DESCRIPTION)
|
257 |
-
|
258 |
-
with gr.Row():
|
259 |
-
with gr.Column(scale=2):
|
260 |
-
# Main input controls
|
261 |
-
text_input = gr.Textbox(
|
262 |
-
label="📝 Input Text (Armenian)",
|
263 |
-
placeholder="Մուտքագրեք ձեր տեքստը այստեղ...",
|
264 |
-
lines=3,
|
265 |
-
max_lines=10
|
266 |
-
)
|
267 |
-
|
268 |
-
with gr.Row():
|
269 |
-
speaker_input = gr.Radio(
|
270 |
-
label="🎭 Speaker",
|
271 |
-
choices=["BDL (male)"],
|
272 |
-
value="BDL (male)"
|
273 |
-
)
|
274 |
-
|
275 |
-
with gr.Row():
|
276 |
-
chunking_checkbox = gr.Checkbox(
|
277 |
-
label="🧩 Enable Intelligent Chunking",
|
278 |
-
value=True,
|
279 |
-
info="Automatically split long texts for better quality"
|
280 |
-
)
|
281 |
-
processing_checkbox = gr.Checkbox(
|
282 |
-
label="🎚️ Apply Audio Processing",
|
283 |
-
value=True,
|
284 |
-
info="Apply noise gating, normalization, and crossfading"
|
285 |
-
)
|
286 |
-
|
287 |
-
# Generate button
|
288 |
-
generate_btn = gr.Button(
|
289 |
-
"🎤 Generate Speech",
|
290 |
-
variant="primary",
|
291 |
-
size="lg"
|
292 |
-
)
|
293 |
-
|
294 |
-
with gr.Column(scale=1):
|
295 |
-
# System information panel
|
296 |
-
gr.Markdown("### 📊 System Status")
|
297 |
-
|
298 |
-
health_display = gr.Textbox(
|
299 |
-
label="Health Status",
|
300 |
-
value="Initializing...",
|
301 |
-
interactive=False,
|
302 |
-
max_lines=1
|
303 |
-
)
|
304 |
-
|
305 |
-
performance_display = gr.Textbox(
|
306 |
-
label="Performance Stats",
|
307 |
-
value="No data yet",
|
308 |
-
interactive=False,
|
309 |
-
max_lines=8
|
310 |
-
)
|
311 |
-
|
312 |
-
refresh_btn = gr.Button("🔄 Refresh Stats", size="sm")
|
313 |
-
|
314 |
-
# Output
|
315 |
-
audio_output = gr.Audio(
|
316 |
-
label="🔊 Generated Speech",
|
317 |
-
type="numpy",
|
318 |
-
interactive=False
|
319 |
-
)
|
320 |
-
|
321 |
-
# Examples section
|
322 |
-
gr.Markdown("### 💡 Example Texts")
|
323 |
-
gr.Examples(
|
324 |
-
examples=EXAMPLES,
|
325 |
-
inputs=[text_input, speaker_input, chunking_checkbox, processing_checkbox],
|
326 |
-
outputs=[audio_output],
|
327 |
-
fn=predict,
|
328 |
-
label="Click any example to try it:"
|
329 |
-
)
|
330 |
-
|
331 |
-
# Event handlers
|
332 |
-
generate_btn.click(
|
333 |
-
fn=predict,
|
334 |
-
inputs=[text_input, speaker_input, chunking_checkbox, processing_checkbox],
|
335 |
-
outputs=[audio_output],
|
336 |
-
show_progress="full"
|
337 |
-
)
|
338 |
-
|
339 |
-
refresh_btn.click(
|
340 |
-
fn=lambda: (health_check(), get_performance_info()),
|
341 |
-
outputs=[health_display, performance_display],
|
342 |
-
show_progress="minimal"
|
343 |
-
)
|
344 |
-
|
345 |
-
# Auto-refresh health status on load
|
346 |
-
interface.load(
|
347 |
-
fn=lambda: (health_check(), get_performance_info()),
|
348 |
-
outputs=[health_display, performance_display]
|
349 |
-
)
|
350 |
-
|
351 |
-
return interface
|
352 |
-
|
353 |
-
|
354 |
-
def main():
|
355 |
-
"""Main application entry point."""
|
356 |
-
logger.info("Starting SpeechT5 Armenian TTS Application")
|
357 |
-
|
358 |
-
# Initialize pipeline
|
359 |
-
if not initialize_pipeline():
|
360 |
-
logger.error("Failed to initialize TTS pipeline - exiting")
|
361 |
-
sys.exit(1)
|
362 |
-
|
363 |
-
# Create and launch interface
|
364 |
-
interface = create_interface()
|
365 |
-
|
366 |
-
# Launch with optimized settings
|
367 |
-
interface.launch(
|
368 |
-
share=True,
|
369 |
-
inbrowser=False,
|
370 |
-
show_error=True,
|
371 |
-
quiet=False,
|
372 |
-
server_name="0.0.0.0", # Allow external connections
|
373 |
-
server_port=7860, # Standard Gradio port
|
374 |
-
max_threads=4, # Limit concurrent requests
|
375 |
-
)
|
376 |
-
|
377 |
|
|
|
378 |
if __name__ == "__main__":
|
379 |
-
|
|
|
|
|
|
|
|
|
|
1 |
"""
|
2 |
+
SpeechT5 Armenian TTS - Production Deployment
|
3 |
+
============================================
|
4 |
|
5 |
+
Production-ready version for HuggingFace Spaces with robust error handling.
|
6 |
"""
|
7 |
|
8 |
import gradio as gr
|
9 |
import numpy as np
|
10 |
import logging
|
11 |
import time
|
|
|
12 |
import os
|
13 |
import sys
|
14 |
+
from typing import Tuple, Optional, Union
|
15 |
|
16 |
+
# Setup logging first
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
logging.basicConfig(
|
18 |
level=logging.INFO,
|
19 |
+
format='%(asctime)s - %(levelname)s - %(message)s'
|
20 |
)
|
21 |
logger = logging.getLogger(__name__)
|
22 |
|
23 |
+
# Global pipeline variable
|
24 |
+
pipeline = None
|
|
|
25 |
|
26 |
+
def safe_import():
|
27 |
+
"""Safely import the TTS pipeline with fallbacks."""
|
28 |
+
global pipeline
|
29 |
|
30 |
try:
|
31 |
+
# Add src to path
|
32 |
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
33 |
+
src_path = os.path.join(current_dir, 'src')
|
34 |
+
if src_path not in sys.path:
|
35 |
+
sys.path.insert(0, src_path)
|
36 |
+
|
37 |
+
# Import pipeline
|
38 |
+
from src.pipeline import TTSPipeline
|
39 |
+
|
40 |
logger.info("Initializing TTS Pipeline...")
|
41 |
+
pipeline = TTSPipeline(
|
42 |
model_checkpoint="Edmon02/TTS_NB_2",
|
43 |
+
max_chunk_length=200,
|
44 |
crossfade_duration=0.1,
|
45 |
use_mixed_precision=True
|
46 |
)
|
47 |
|
48 |
+
# Optimize for production
|
49 |
+
pipeline.optimize_for_production()
|
50 |
+
logger.info("TTS Pipeline ready")
|
|
|
51 |
return True
|
52 |
|
53 |
except Exception as e:
|
54 |
+
logger.error(f"Failed to initialize pipeline: {e}")
|
55 |
+
logger.info("Creating fallback pipeline for testing")
|
56 |
+
|
57 |
+
# Create a simple fallback
|
58 |
+
class FallbackPipeline:
|
59 |
+
def synthesize(self, text, **kwargs):
|
60 |
+
# Generate simple tone as placeholder
|
61 |
+
duration = min(len(text) * 0.08, 3.0)
|
62 |
+
sample_rate = 16000
|
63 |
+
samples = int(duration * sample_rate)
|
64 |
+
t = np.linspace(0, duration, samples)
|
65 |
+
# Create a simple beep
|
66 |
+
audio = np.sin(2 * np.pi * 440 * t) * 0.3
|
67 |
+
return sample_rate, (audio * 32767).astype(np.int16)
|
68 |
+
|
69 |
+
pipeline = FallbackPipeline()
|
70 |
return False
|
71 |
|
72 |
|
73 |
+
def generate_audio(text: str) -> Tuple[int, np.ndarray]:
|
|
|
|
|
74 |
"""
|
75 |
+
Generate audio from Armenian text.
|
76 |
|
77 |
Args:
|
78 |
+
text: Armenian text to synthesize
|
|
|
|
|
|
|
79 |
|
80 |
Returns:
|
81 |
+
Tuple of (sample_rate, audio_data)
|
82 |
"""
|
83 |
+
if not text or not text.strip():
|
84 |
+
logger.warning("Empty text provided")
|
85 |
+
# Return silence
|
86 |
+
return 16000, np.zeros(1000, dtype=np.int16)
|
87 |
|
88 |
+
if pipeline is None:
|
89 |
+
logger.error("Pipeline not available")
|
90 |
+
return 16000, np.zeros(1000, dtype=np.int16)
|
91 |
|
92 |
try:
|
93 |
+
logger.info(f"Processing: {text[:50]}...")
|
94 |
+
start_time = time.time()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
+
# Synthesize with basic parameters
|
97 |
+
sample_rate, audio = pipeline.synthesize(
|
98 |
text=text,
|
99 |
+
speaker="BDL",
|
100 |
+
enable_chunking=True,
|
101 |
+
apply_audio_processing=True
|
102 |
)
|
103 |
|
104 |
+
duration = time.time() - start_time
|
105 |
+
logger.info(f"Generated {len(audio)} samples in {duration:.2f}s")
|
|
|
|
|
|
|
|
|
106 |
|
107 |
return sample_rate, audio
|
108 |
|
109 |
except Exception as e:
|
110 |
+
logger.error(f"Synthesis error: {e}")
|
111 |
+
# Return silence on error
|
112 |
+
return 16000, np.zeros(1000, dtype=np.int16)
|
113 |
+
|
114 |
+
|
115 |
+
# Initialize the pipeline
|
116 |
+
logger.info("Starting TTS application...")
|
117 |
+
initialization_success = safe_import()
|
118 |
+
|
119 |
+
if initialization_success:
|
120 |
+
status_message = "✅ TTS System Ready"
|
121 |
+
else:
|
122 |
+
status_message = "⚠️ Running in Test Mode (Limited Functionality)"
|
123 |
+
|
124 |
+
# Create the Gradio interface using the simpler gr.Interface
|
125 |
+
demo = gr.Interface(
|
126 |
+
fn=generate_audio,
|
127 |
+
inputs=gr.Textbox(
|
128 |
+
label="Armenian Text",
|
129 |
+
placeholder="Գրեք ձեր տեքստը այստեղ...",
|
130 |
+
lines=3,
|
131 |
+
max_lines=8
|
132 |
+
),
|
133 |
+
outputs=gr.Audio(
|
134 |
+
label="Generated Speech",
|
135 |
+
type="numpy"
|
136 |
+
),
|
137 |
+
title="🎤 Armenian Text-to-Speech",
|
138 |
+
description=f"""
|
139 |
+
{status_message}
|
140 |
+
|
141 |
+
Convert Armenian text to speech using SpeechT5.
|
142 |
+
|
143 |
+
**How to use:**
|
144 |
+
1. Enter Armenian text in the box below
|
145 |
+
2. Click Submit to generate speech
|
146 |
+
3. Play the generated audio
|
147 |
+
|
148 |
+
**Tips:**
|
149 |
+
- Use standard Armenian script
|
150 |
+
- Shorter sentences work better
|
151 |
+
- Include punctuation for natural pauses
|
152 |
+
""",
|
153 |
+
examples=[
|
154 |
+
"Բարև ձեզ:",
|
155 |
+
"Ինչպե՞ս եք:",
|
156 |
+
"Շնորհակալություն:",
|
157 |
+
"Կեցցե՛ Հայաստանը:",
|
158 |
+
"Այսօր լավ օր է:"
|
159 |
+
],
|
160 |
+
theme=gr.themes.Default(),
|
161 |
+
allow_flagging="never"
|
162 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
|
164 |
+
# Launch the app
|
165 |
if __name__ == "__main__":
|
166 |
+
demo.launch(
|
167 |
+
server_name="0.0.0.0",
|
168 |
+
server_port=7860,
|
169 |
+
share=False
|
170 |
+
)
|
app_deploy.py
ADDED
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
SpeechT5 Armenian TTS - Production Deployment
|
3 |
+
============================================
|
4 |
+
|
5 |
+
Production-ready version for HuggingFace Spaces with robust error handling.
|
6 |
+
"""
|
7 |
+
|
8 |
+
import gradio as gr
|
9 |
+
import numpy as np
|
10 |
+
import logging
|
11 |
+
import time
|
12 |
+
import os
|
13 |
+
import sys
|
14 |
+
from typing import Tuple, Optional, Union
|
15 |
+
|
16 |
+
# Setup logging first
|
17 |
+
logging.basicConfig(
|
18 |
+
level=logging.INFO,
|
19 |
+
format='%(asctime)s - %(levelname)s - %(message)s'
|
20 |
+
)
|
21 |
+
logger = logging.getLogger(__name__)
|
22 |
+
|
23 |
+
# Global pipeline variable
|
24 |
+
pipeline = None
|
25 |
+
|
26 |
+
def safe_import():
|
27 |
+
"""Safely import the TTS pipeline with fallbacks."""
|
28 |
+
global pipeline
|
29 |
+
|
30 |
+
try:
|
31 |
+
# Add src to path
|
32 |
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
33 |
+
src_path = os.path.join(current_dir, 'src')
|
34 |
+
if src_path not in sys.path:
|
35 |
+
sys.path.insert(0, src_path)
|
36 |
+
|
37 |
+
# Import pipeline
|
38 |
+
from src.pipeline import TTSPipeline
|
39 |
+
|
40 |
+
logger.info("Initializing TTS Pipeline...")
|
41 |
+
pipeline = TTSPipeline(
|
42 |
+
model_checkpoint="Edmon02/TTS_NB_2",
|
43 |
+
max_chunk_length=200,
|
44 |
+
crossfade_duration=0.1,
|
45 |
+
use_mixed_precision=True
|
46 |
+
)
|
47 |
+
|
48 |
+
# Optimize for production
|
49 |
+
pipeline.optimize_for_production()
|
50 |
+
logger.info("TTS Pipeline ready")
|
51 |
+
return True
|
52 |
+
|
53 |
+
except Exception as e:
|
54 |
+
logger.error(f"Failed to initialize pipeline: {e}")
|
55 |
+
logger.info("Creating fallback pipeline for testing")
|
56 |
+
|
57 |
+
# Create a simple fallback
|
58 |
+
class FallbackPipeline:
|
59 |
+
def synthesize(self, text, **kwargs):
|
60 |
+
# Generate simple tone as placeholder
|
61 |
+
duration = min(len(text) * 0.08, 3.0)
|
62 |
+
sample_rate = 16000
|
63 |
+
samples = int(duration * sample_rate)
|
64 |
+
t = np.linspace(0, duration, samples)
|
65 |
+
# Create a simple beep
|
66 |
+
audio = np.sin(2 * np.pi * 440 * t) * 0.3
|
67 |
+
return sample_rate, (audio * 32767).astype(np.int16)
|
68 |
+
|
69 |
+
pipeline = FallbackPipeline()
|
70 |
+
return False
|
71 |
+
|
72 |
+
|
73 |
+
def generate_audio(text: str) -> Tuple[int, np.ndarray]:
|
74 |
+
"""
|
75 |
+
Generate audio from Armenian text.
|
76 |
+
|
77 |
+
Args:
|
78 |
+
text: Armenian text to synthesize
|
79 |
+
|
80 |
+
Returns:
|
81 |
+
Tuple of (sample_rate, audio_data)
|
82 |
+
"""
|
83 |
+
if not text or not text.strip():
|
84 |
+
logger.warning("Empty text provided")
|
85 |
+
# Return silence
|
86 |
+
return 16000, np.zeros(1000, dtype=np.int16)
|
87 |
+
|
88 |
+
if pipeline is None:
|
89 |
+
logger.error("Pipeline not available")
|
90 |
+
return 16000, np.zeros(1000, dtype=np.int16)
|
91 |
+
|
92 |
+
try:
|
93 |
+
logger.info(f"Processing: {text[:50]}...")
|
94 |
+
start_time = time.time()
|
95 |
+
|
96 |
+
# Synthesize with basic parameters
|
97 |
+
sample_rate, audio = pipeline.synthesize(
|
98 |
+
text=text,
|
99 |
+
speaker="BDL",
|
100 |
+
enable_chunking=True,
|
101 |
+
apply_audio_processing=True
|
102 |
+
)
|
103 |
+
|
104 |
+
duration = time.time() - start_time
|
105 |
+
logger.info(f"Generated {len(audio)} samples in {duration:.2f}s")
|
106 |
+
|
107 |
+
return sample_rate, audio
|
108 |
+
|
109 |
+
except Exception as e:
|
110 |
+
logger.error(f"Synthesis error: {e}")
|
111 |
+
# Return silence on error
|
112 |
+
return 16000, np.zeros(1000, dtype=np.int16)
|
113 |
+
|
114 |
+
|
115 |
+
# Initialize the pipeline
|
116 |
+
logger.info("Starting TTS application...")
|
117 |
+
initialization_success = safe_import()
|
118 |
+
|
119 |
+
if initialization_success:
|
120 |
+
status_message = "✅ TTS System Ready"
|
121 |
+
else:
|
122 |
+
status_message = "⚠️ Running in Test Mode (Limited Functionality)"
|
123 |
+
|
124 |
+
# Create the Gradio interface using the simpler gr.Interface
|
125 |
+
demo = gr.Interface(
|
126 |
+
fn=generate_audio,
|
127 |
+
inputs=gr.Textbox(
|
128 |
+
label="Armenian Text",
|
129 |
+
placeholder="Գրեք ձեր տեքստը այստեղ...",
|
130 |
+
lines=3,
|
131 |
+
max_lines=8
|
132 |
+
),
|
133 |
+
outputs=gr.Audio(
|
134 |
+
label="Generated Speech",
|
135 |
+
type="numpy"
|
136 |
+
),
|
137 |
+
title="🎤 Armenian Text-to-Speech",
|
138 |
+
description=f"""
|
139 |
+
{status_message}
|
140 |
+
|
141 |
+
Convert Armenian text to speech using SpeechT5.
|
142 |
+
|
143 |
+
**How to use:**
|
144 |
+
1. Enter Armenian text in the box below
|
145 |
+
2. Click Submit to generate speech
|
146 |
+
3. Play the generated audio
|
147 |
+
|
148 |
+
**Tips:**
|
149 |
+
- Use standard Armenian script
|
150 |
+
- Shorter sentences work better
|
151 |
+
- Include punctuation for natural pauses
|
152 |
+
""",
|
153 |
+
examples=[
|
154 |
+
"Բարև ձեզ:",
|
155 |
+
"Ինչպե՞ս եք:",
|
156 |
+
"Շնորհակալություն:",
|
157 |
+
"Կեցցե՛ Հայաստանը:",
|
158 |
+
"Այսօր լավ օր է:"
|
159 |
+
],
|
160 |
+
theme=gr.themes.Default(),
|
161 |
+
allow_flagging="never"
|
162 |
+
)
|
163 |
+
|
164 |
+
# Launch the app
|
165 |
+
if __name__ == "__main__":
|
166 |
+
demo.launch(
|
167 |
+
server_name="0.0.0.0",
|
168 |
+
server_port=7860,
|
169 |
+
share=False
|
170 |
+
)
|
app_optimized.py
CHANGED
@@ -320,11 +320,14 @@ def create_interface():
|
|
320 |
|
321 |
# Examples section
|
322 |
gr.Markdown("### 💡 Example Texts")
|
323 |
-
|
|
|
|
|
324 |
examples=EXAMPLES,
|
325 |
inputs=[text_input, speaker_input, chunking_checkbox, processing_checkbox],
|
326 |
-
outputs=
|
327 |
fn=predict,
|
|
|
328 |
label="Click any example to try it:"
|
329 |
)
|
330 |
|
@@ -365,7 +368,7 @@ def main():
|
|
365 |
|
366 |
# Launch with optimized settings
|
367 |
interface.launch(
|
368 |
-
share=
|
369 |
inbrowser=False,
|
370 |
show_error=True,
|
371 |
quiet=False,
|
|
|
320 |
|
321 |
# Examples section
|
322 |
gr.Markdown("### 💡 Example Texts")
|
323 |
+
|
324 |
+
# Use simpler Examples component to avoid schema issues
|
325 |
+
examples = gr.Examples(
|
326 |
examples=EXAMPLES,
|
327 |
inputs=[text_input, speaker_input, chunking_checkbox, processing_checkbox],
|
328 |
+
outputs=audio_output,
|
329 |
fn=predict,
|
330 |
+
cache_examples=False, # Disable caching to avoid schema issues
|
331 |
label="Click any example to try it:"
|
332 |
)
|
333 |
|
|
|
368 |
|
369 |
# Launch with optimized settings
|
370 |
interface.launch(
|
371 |
+
share=False, # Disable share for HF Spaces
|
372 |
inbrowser=False,
|
373 |
show_error=True,
|
374 |
quiet=False,
|
app_simple.py
ADDED
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
SpeechT5 Armenian TTS - HuggingFace Spaces Deployment Version
|
3 |
+
============================================================
|
4 |
+
|
5 |
+
Simplified and optimized for HuggingFace Spaces deployment.
|
6 |
+
"""
|
7 |
+
|
8 |
+
import gradio as gr
|
9 |
+
import numpy as np
|
10 |
+
import logging
|
11 |
+
import time
|
12 |
+
from typing import Tuple, Optional
|
13 |
+
import os
|
14 |
+
import sys
|
15 |
+
|
16 |
+
# Add src to path for imports
|
17 |
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
18 |
+
src_path = os.path.join(current_dir, 'src')
|
19 |
+
if src_path not in sys.path:
|
20 |
+
sys.path.insert(0, src_path)
|
21 |
+
|
22 |
+
try:
|
23 |
+
from src.pipeline import TTSPipeline
|
24 |
+
HAS_PIPELINE = True
|
25 |
+
except ImportError as e:
|
26 |
+
logging.error(f"Failed to import pipeline: {e}")
|
27 |
+
# Fallback import attempt
|
28 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
|
29 |
+
try:
|
30 |
+
from src.pipeline import TTSPipeline
|
31 |
+
HAS_PIPELINE = True
|
32 |
+
except ImportError:
|
33 |
+
HAS_PIPELINE = False
|
34 |
+
# Create a dummy pipeline for testing
|
35 |
+
class TTSPipeline:
|
36 |
+
def __init__(self, *args, **kwargs):
|
37 |
+
pass
|
38 |
+
def synthesize(self, text, **kwargs):
|
39 |
+
# Return dummy audio for testing
|
40 |
+
duration = min(len(text) * 0.1, 5.0) # Approximate duration
|
41 |
+
sample_rate = 16000
|
42 |
+
samples = int(duration * sample_rate)
|
43 |
+
# Generate a simple sine wave as placeholder
|
44 |
+
t = np.linspace(0, duration, samples)
|
45 |
+
frequency = 440 # A4 note
|
46 |
+
audio = (np.sin(2 * np.pi * frequency * t) * 0.3).astype(np.float32)
|
47 |
+
return sample_rate, (audio * 32767).astype(np.int16)
|
48 |
+
def optimize_for_production(self):
|
49 |
+
pass
|
50 |
+
|
51 |
+
# Configure logging
|
52 |
+
logging.basicConfig(
|
53 |
+
level=logging.INFO,
|
54 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
55 |
+
)
|
56 |
+
logger = logging.getLogger(__name__)
|
57 |
+
|
58 |
+
# Global pipeline instance
|
59 |
+
tts_pipeline: Optional[TTSPipeline] = None
|
60 |
+
|
61 |
+
|
62 |
+
def initialize_pipeline():
|
63 |
+
"""Initialize the TTS pipeline with error handling."""
|
64 |
+
global tts_pipeline
|
65 |
+
|
66 |
+
if not HAS_PIPELINE:
|
67 |
+
logger.warning("Pipeline not available - using dummy implementation")
|
68 |
+
tts_pipeline = TTSPipeline()
|
69 |
+
return True
|
70 |
+
|
71 |
+
try:
|
72 |
+
logger.info("Initializing TTS Pipeline...")
|
73 |
+
tts_pipeline = TTSPipeline(
|
74 |
+
model_checkpoint="Edmon02/TTS_NB_2",
|
75 |
+
max_chunk_length=200,
|
76 |
+
crossfade_duration=0.1,
|
77 |
+
use_mixed_precision=True
|
78 |
+
)
|
79 |
+
|
80 |
+
# Apply production optimizations
|
81 |
+
tts_pipeline.optimize_for_production()
|
82 |
+
|
83 |
+
logger.info("TTS Pipeline initialized successfully")
|
84 |
+
return True
|
85 |
+
|
86 |
+
except Exception as e:
|
87 |
+
logger.error(f"Failed to initialize TTS pipeline: {e}")
|
88 |
+
# Fallback to dummy pipeline
|
89 |
+
tts_pipeline = TTSPipeline()
|
90 |
+
return False
|
91 |
+
|
92 |
+
|
93 |
+
def generate_speech(text: str) -> Tuple[int, np.ndarray]:
|
94 |
+
"""
|
95 |
+
Main synthesis function optimized for HF Spaces.
|
96 |
+
|
97 |
+
Args:
|
98 |
+
text: Input text to synthesize
|
99 |
+
|
100 |
+
Returns:
|
101 |
+
Tuple of (sample_rate, audio_array)
|
102 |
+
"""
|
103 |
+
global tts_pipeline
|
104 |
+
|
105 |
+
start_time = time.time()
|
106 |
+
|
107 |
+
try:
|
108 |
+
# Validate inputs
|
109 |
+
if not text or not text.strip():
|
110 |
+
logger.warning("Empty text provided")
|
111 |
+
return 16000, np.zeros(1000, dtype=np.int16)
|
112 |
+
|
113 |
+
if tts_pipeline is None:
|
114 |
+
logger.error("TTS pipeline not initialized")
|
115 |
+
return 16000, np.zeros(1000, dtype=np.int16)
|
116 |
+
|
117 |
+
# Log request
|
118 |
+
logger.info(f"Processing request: {len(text)} characters")
|
119 |
+
|
120 |
+
# Synthesize speech with default settings
|
121 |
+
sample_rate, audio = tts_pipeline.synthesize(
|
122 |
+
text=text,
|
123 |
+
speaker="BDL",
|
124 |
+
enable_chunking=True,
|
125 |
+
apply_audio_processing=True
|
126 |
+
)
|
127 |
+
|
128 |
+
# Log performance
|
129 |
+
total_time = time.time() - start_time
|
130 |
+
logger.info(f"Request completed in {total_time:.3f}s")
|
131 |
+
|
132 |
+
return sample_rate, audio
|
133 |
+
|
134 |
+
except Exception as e:
|
135 |
+
logger.error(f"Synthesis failed: {e}")
|
136 |
+
return 16000, np.zeros(1000, dtype=np.int16)
|
137 |
+
|
138 |
+
|
139 |
+
# Create the Gradio interface
|
140 |
+
def create_app():
|
141 |
+
"""Create the main Gradio application."""
|
142 |
+
|
143 |
+
# Simple interface definition
|
144 |
+
interface = gr.Interface(
|
145 |
+
fn=generate_speech,
|
146 |
+
inputs=[
|
147 |
+
gr.Textbox(
|
148 |
+
label="Armenian Text",
|
149 |
+
placeholder="Մուտքագրեք ձեր տեքստը այստեղ...",
|
150 |
+
lines=3,
|
151 |
+
max_lines=10
|
152 |
+
)
|
153 |
+
],
|
154 |
+
outputs=[
|
155 |
+
gr.Audio(
|
156 |
+
label="Generated Speech",
|
157 |
+
type="numpy"
|
158 |
+
)
|
159 |
+
],
|
160 |
+
title="🎤 SpeechT5 Armenian Text-to-Speech",
|
161 |
+
description="""
|
162 |
+
Convert Armenian text to natural speech using SpeechT5.
|
163 |
+
|
164 |
+
**Instructions:**
|
165 |
+
1. Enter Armenian text in the input box
|
166 |
+
2. Click Submit to generate speech
|
167 |
+
3. Listen to the generated audio
|
168 |
+
|
169 |
+
**Tips:**
|
170 |
+
- Works best with standard Armenian orthography
|
171 |
+
- Shorter sentences produce better quality
|
172 |
+
- Include proper punctuation for natural pauses
|
173 |
+
""",
|
174 |
+
examples=[
|
175 |
+
["Բարև ձեզ, ինչպե՞ս եք:"],
|
176 |
+
["Այսօր գեղեցիկ օր է:"],
|
177 |
+
["Հայաստանն ունի հարուստ պատմություն:"],
|
178 |
+
["Երևանը Հայաստանի մայրաքաղաքն է:"],
|
179 |
+
["Արարատ լեռը Հայաստանի խորհրդանիշն է:"]
|
180 |
+
],
|
181 |
+
theme=gr.themes.Soft(),
|
182 |
+
allow_flagging="never", # Disable flagging to avoid schema issues
|
183 |
+
cache_examples=False # Disable example caching
|
184 |
+
)
|
185 |
+
|
186 |
+
return interface
|
187 |
+
|
188 |
+
|
189 |
+
def main():
|
190 |
+
"""Main application entry point."""
|
191 |
+
logger.info("Starting SpeechT5 Armenian TTS Application")
|
192 |
+
|
193 |
+
# Initialize pipeline
|
194 |
+
if not initialize_pipeline():
|
195 |
+
logger.error("Failed to initialize TTS pipeline - continuing with limited functionality")
|
196 |
+
|
197 |
+
# Create and launch interface
|
198 |
+
app = create_app()
|
199 |
+
|
200 |
+
# Launch with HF Spaces settings
|
201 |
+
app.launch(
|
202 |
+
share=False, # Don't create share link on HF Spaces
|
203 |
+
server_name="0.0.0.0",
|
204 |
+
server_port=7860,
|
205 |
+
show_error=True
|
206 |
+
)
|
207 |
+
|
208 |
+
|
209 |
+
if __name__ == "__main__":
|
210 |
+
main()
|
deploy.py
CHANGED
@@ -24,12 +24,16 @@ def backup_original():
|
|
24 |
|
25 |
def deploy_optimized():
|
26 |
"""Deploy the optimized version."""
|
27 |
-
if os.path.exists("
|
|
|
|
|
|
|
|
|
28 |
shutil.copy2("app_optimized.py", "app.py")
|
29 |
print("✅ Optimized version deployed as app.py")
|
30 |
print("🚀 Ready for Hugging Face Spaces deployment!")
|
31 |
else:
|
32 |
-
print("❌
|
33 |
return False
|
34 |
return True
|
35 |
|
|
|
24 |
|
25 |
def deploy_optimized():
|
26 |
"""Deploy the optimized version."""
|
27 |
+
if os.path.exists("app_simple.py"):
|
28 |
+
shutil.copy2("app_simple.py", "app.py")
|
29 |
+
print("✅ Simple optimized version deployed as app.py")
|
30 |
+
print("🚀 Ready for Hugging Face Spaces deployment!")
|
31 |
+
elif os.path.exists("app_optimized.py"):
|
32 |
shutil.copy2("app_optimized.py", "app.py")
|
33 |
print("✅ Optimized version deployed as app.py")
|
34 |
print("🚀 Ready for Hugging Face Spaces deployment!")
|
35 |
else:
|
36 |
+
print("❌ No optimized version found")
|
37 |
return False
|
38 |
return True
|
39 |
|
requirements.txt
CHANGED
@@ -11,8 +11,8 @@ librosa==0.10.1
|
|
11 |
soundfile==0.12.1
|
12 |
scipy==1.11.4
|
13 |
|
14 |
-
# Gradio and web interface (
|
15 |
-
gradio==4.
|
16 |
|
17 |
# Text processing
|
18 |
inflect==7.0.0
|
|
|
11 |
soundfile==0.12.1
|
12 |
scipy==1.11.4
|
13 |
|
14 |
+
# Gradio and web interface (stable version for HF Spaces)
|
15 |
+
gradio==4.20.0
|
16 |
|
17 |
# Text processing
|
18 |
inflect==7.0.0
|