Spaces:
Runtime error
Runtime error
File size: 3,736 Bytes
3f1840e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
"""
SpeechT5 Armenian TTS - Ultra-Compatible Version
===============================================
Maximum compatibility version for HuggingFace Spaces.
"""
import gradio as gr
import numpy as np
import logging
import os
import sys
import warnings
# Suppress warnings that might interfere
warnings.filterwarnings("ignore")
# Setup minimal logging
logging.basicConfig(level=logging.WARNING) # Reduce log noise
logger = logging.getLogger(__name__)
def safe_tts(text):
"""
Ultra-safe TTS function that handles all errors gracefully.
"""
# Input validation
if not isinstance(text, str) or not text.strip():
return generate_silence()
try:
# Try to import and use the real pipeline
current_dir = os.path.dirname(os.path.abspath(__file__))
src_path = os.path.join(current_dir, 'src')
if src_path not in sys.path:
sys.path.insert(0, src_path)
from src.pipeline import TTSPipeline
# Initialize pipeline if not done
if not hasattr(safe_tts, 'pipeline'):
safe_tts.pipeline = TTSPipeline(
model_checkpoint="Edmon02/TTS_NB_2",
max_chunk_length=200,
use_mixed_precision=True
)
safe_tts.pipeline.optimize_for_production()
# Generate speech
sr, audio = safe_tts.pipeline.synthesize(
text=text,
speaker="BDL",
enable_chunking=True,
apply_audio_processing=True
)
return sr, audio
except Exception as e:
logger.warning(f"TTS failed, using fallback: {e}")
return generate_fallback_audio(text)
def generate_silence():
"""Generate short silence."""
return 16000, np.zeros(8000, dtype=np.int16)
def generate_fallback_audio(text):
"""Generate simple audio as fallback."""
# Create a simple beep based on text length
duration = min(len(text) * 0.08, 3.0)
sr = 16000
samples = int(duration * sr)
if samples == 0:
return generate_silence()
# Generate simple tone
t = np.linspace(0, duration, samples)
frequency = 440 # A4
audio = np.sin(2 * np.pi * frequency * t) * 0.2
# Add some variation for different text
if len(text) > 10:
audio += np.sin(2 * np.pi * 880 * t) * 0.1
return sr, (audio * 32767).astype(np.int16)
# Create the interface using the most basic approach
def create_interface():
"""Create interface with maximum compatibility."""
# Use the simplest possible interface
interface = gr.Interface(
fn=safe_tts,
inputs="text", # Simplest input type
outputs="audio", # Simplest output type
title="Armenian Text-to-Speech",
description="Enter Armenian text to generate speech.",
examples=[
"Բարև ձեզ",
"Ինչպե՞ս եք",
"Շնորհակալություն"
]
)
return interface
# Main execution
if __name__ == "__main__":
try:
# Create and launch interface
app = create_interface()
app.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
quiet=True # Reduce noise
)
except Exception as e:
print(f"Failed to launch: {e}")
# Emergency fallback - create the simplest possible app
emergency_app = gr.Interface(
fn=lambda x: generate_fallback_audio(x or "test"),
inputs="text",
outputs="audio",
title="Armenian TTS (Emergency Mode)"
)
emergency_app.launch(server_name="0.0.0.0", server_port=7860)
|