Spaces:

Edmon02
/

SpeechT5_hy

Runtime error

File size: 3,046 Bytes

3f1840e

"""
SpeechT5 Armenian TTS - Minimal HF Spaces Version
================================================

Ultra-minimal version to avoid Gradio schema issues.
"""

import gradio as gr
import numpy as np
import logging
import os
import sys

# Setup basic logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Global pipeline
pipeline = None

def init_pipeline():
    """Initialize pipeline with maximum error protection."""
    global pipeline
    try:
        # Add src path
        current_dir = os.path.dirname(os.path.abspath(__file__))
        src_path = os.path.join(current_dir, 'src')
        if src_path not in sys.path:
            sys.path.insert(0, src_path)
        
        from src.pipeline import TTSPipeline
        
        pipeline = TTSPipeline(
            model_checkpoint="Edmon02/TTS_NB_2",
            max_chunk_length=200,
            use_mixed_precision=True
        )
        pipeline.optimize_for_production()
        logger.info("Pipeline initialized")
        return True
    except Exception as e:
        logger.error(f"Pipeline init failed: {e}")
        # Create minimal fallback
        class DummyPipeline:
            def synthesize(self, text, **kwargs):
                # Generate a simple tone
                duration = min(len(text) * 0.1, 2.0)
                sr = 16000
                samples = int(duration * sr)
                t = np.linspace(0, duration, samples)
                audio = np.sin(2 * np.pi * 440 * t) * 0.1
                return sr, (audio * 32767).astype(np.int16)
        
        pipeline = DummyPipeline()
        return False

def tts_function(text):
    """Main TTS function with minimal interface."""
    if not text or not text.strip():
        # Return short silence
        return 16000, np.zeros(8000, dtype=np.int16)
    
    try:
        logger.info(f"Processing: {text[:30]}...")
        sr, audio = pipeline.synthesize(
            text=text,
            speaker="BDL",
            enable_chunking=True,
            apply_audio_processing=True
        )
        logger.info(f"Generated {len(audio)} samples")
        return sr, audio
    except Exception as e:
        logger.error(f"TTS error: {e}")
        # Return silence on error
        return 16000, np.zeros(8000, dtype=np.int16)

# Initialize pipeline
logger.info("Starting TTS app...")
success = init_pipeline()
status = "✅ Ready" if success else "⚠️ Test Mode"

# Create minimal interface
iface = gr.Interface(
    fn=tts_function,
    inputs=gr.inputs.Textbox(
        lines=2, 
        placeholder="Enter Armenian text...",
        label="Armenian Text"
    ),
    outputs=gr.outputs.Audio(label="Speech"),
    title=f"🎤 Armenian TTS {status}",
    description="Convert Armenian text to speech.",
    examples=[
        "Բարև ձեզ:",
        "Ինչպե՞ս եք:",
        "Շնորհակալություն:",
    ]
)

# Launch with minimal config
if __name__ == "__main__":
    iface.launch(
        server_name="0.0.0.0",
        server_port=7860
    )