File size: 4,038 Bytes
3f1840e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
"""
Armenian TTS - Minimal HF Spaces Version
=======================================

Absolutely minimal version to avoid all possible compatibility issues.
"""

import gradio as gr
import numpy as np
import logging
import os
import sys

# Simple logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def setup_pipeline():
    """Setup TTS pipeline with maximum error handling."""
    try:
        # Add source path
        current_dir = os.path.dirname(os.path.abspath(__file__))
        src_path = os.path.join(current_dir, 'src')
        if src_path not in sys.path:
            sys.path.insert(0, src_path)
        
        # Try to import and initialize
        from src.pipeline import TTSPipeline
        
        pipeline = TTSPipeline(
            model_checkpoint="Edmon02/TTS_NB_2",
            max_chunk_length=200,
            use_mixed_precision=True
        )
        pipeline.optimize_for_production()
        logger.info("TTS pipeline initialized successfully")
        return pipeline, True
        
    except Exception as e:
        logger.error(f"Pipeline initialization failed: {e}")
        return None, False

def tts_process(text):
    """Process text to speech with complete error handling."""
    global tts_pipeline, pipeline_available
    
    # Basic input validation
    if not text or not isinstance(text, str) or len(text.strip()) == 0:
        # Return 1 second of silence
        return 16000, np.zeros(16000, dtype=np.int16)
    
    text = text.strip()
    
    # If no pipeline available, create a simple audio response
    if not pipeline_available or tts_pipeline is None:
        logger.info(f"Using fallback for text: {text[:30]}...")
        
        # Create simple fallback audio
        duration = min(len(text) * 0.08, 4.0)  # Max 4 seconds
        sample_rate = 16000
        samples = int(duration * sample_rate)
        
        if samples <= 0:
            return sample_rate, np.zeros(8000, dtype=np.int16)
        
        # Generate a simple pleasant tone
        t = np.linspace(0, duration, samples)
        frequency = 440  # A4 note
        audio = np.sin(2 * np.pi * frequency * t) * 0.2
        
        # Add some harmonics for richer sound
        audio += np.sin(2 * np.pi * frequency * 2 * t) * 0.1
        audio += np.sin(2 * np.pi * frequency * 3 * t) * 0.05
        
        # Apply simple envelope
        envelope = np.exp(-t * 2)  # Exponential decay
        audio *= envelope
        
        # Convert to int16
        audio_int16 = (audio * 32767).astype(np.int16)
        return sample_rate, audio_int16
    
    # Try real TTS
    try:
        logger.info(f"Synthesizing: {text[:50]}...")
        
        sample_rate, audio = tts_pipeline.synthesize(
            text=text,
            speaker="BDL",
            enable_chunking=True,
            apply_audio_processing=True
        )
        
        logger.info(f"Successfully generated {len(audio)} samples")
        return sample_rate, audio
        
    except Exception as e:
        logger.error(f"TTS synthesis failed: {e}")
        # Fallback to silence
        return 16000, np.zeros(8000, dtype=np.int16)

# Initialize the pipeline once
logger.info("Initializing Armenian TTS application...")
tts_pipeline, pipeline_available = setup_pipeline()

if pipeline_available:
    title = "🇦🇲 Armenian Text-to-Speech (Ready)"
    description = "Convert Armenian text to speech using SpeechT5."
else:
    title = "🇦🇲 Armenian TTS (Test Mode)"
    description = "TTS system in test mode - will generate simple audio tones."

# Create the simplest possible Gradio interface
app = gr.Interface(
    fn=tts_process,
    inputs="text",
    outputs="audio",
    title=title,
    description=description,
    examples=[
        "Բարև ձեզ",
        "Շնորհակալություն",
        "Ինչպե՞ս եք"
    ]
)

# Launch the app
if __name__ == "__main__":
    app.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False
    )