File size: 3,736 Bytes
3f1840e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
"""
SpeechT5 Armenian TTS - Ultra-Compatible Version
===============================================

Maximum compatibility version for HuggingFace Spaces.
"""

import gradio as gr
import numpy as np
import logging
import os
import sys
import warnings

# Suppress warnings that might interfere
warnings.filterwarnings("ignore")

# Setup minimal logging
logging.basicConfig(level=logging.WARNING)  # Reduce log noise
logger = logging.getLogger(__name__)

def safe_tts(text):
    """
    Ultra-safe TTS function that handles all errors gracefully.
    """
    # Input validation
    if not isinstance(text, str) or not text.strip():
        return generate_silence()
    
    try:
        # Try to import and use the real pipeline
        current_dir = os.path.dirname(os.path.abspath(__file__))
        src_path = os.path.join(current_dir, 'src')
        if src_path not in sys.path:
            sys.path.insert(0, src_path)
        
        from src.pipeline import TTSPipeline
        
        # Initialize pipeline if not done
        if not hasattr(safe_tts, 'pipeline'):
            safe_tts.pipeline = TTSPipeline(
                model_checkpoint="Edmon02/TTS_NB_2",
                max_chunk_length=200,
                use_mixed_precision=True
            )
            safe_tts.pipeline.optimize_for_production()
        
        # Generate speech
        sr, audio = safe_tts.pipeline.synthesize(
            text=text,
            speaker="BDL",
            enable_chunking=True,
            apply_audio_processing=True
        )
        
        return sr, audio
        
    except Exception as e:
        logger.warning(f"TTS failed, using fallback: {e}")
        return generate_fallback_audio(text)

def generate_silence():
    """Generate short silence."""
    return 16000, np.zeros(8000, dtype=np.int16)

def generate_fallback_audio(text):
    """Generate simple audio as fallback."""
    # Create a simple beep based on text length
    duration = min(len(text) * 0.08, 3.0)
    sr = 16000
    samples = int(duration * sr)
    
    if samples == 0:
        return generate_silence()
    
    # Generate simple tone
    t = np.linspace(0, duration, samples)
    frequency = 440  # A4
    audio = np.sin(2 * np.pi * frequency * t) * 0.2
    
    # Add some variation for different text
    if len(text) > 10:
        audio += np.sin(2 * np.pi * 880 * t) * 0.1
    
    return sr, (audio * 32767).astype(np.int16)

# Create the interface using the most basic approach
def create_interface():
    """Create interface with maximum compatibility."""
    
    # Use the simplest possible interface
    interface = gr.Interface(
        fn=safe_tts,
        inputs="text",  # Simplest input type
        outputs="audio",  # Simplest output type
        title="Armenian Text-to-Speech",
        description="Enter Armenian text to generate speech.",
        examples=[
            "Բարև ձեզ",
            "Ինչպե՞ս եք",
            "Շնորհակալություն"
        ]
    )
    
    return interface

# Main execution
if __name__ == "__main__":
    try:
        # Create and launch interface
        app = create_interface()
        app.launch(
            server_name="0.0.0.0",
            server_port=7860,
            share=False,
            quiet=True  # Reduce noise
        )
    except Exception as e:
        print(f"Failed to launch: {e}")
        # Emergency fallback - create the simplest possible app
        emergency_app = gr.Interface(
            fn=lambda x: generate_fallback_audio(x or "test"),
            inputs="text",
            outputs="audio",
            title="Armenian TTS (Emergency Mode)"
        )
        emergency_app.launch(server_name="0.0.0.0", server_port=7860)