Spaces:

diabolic6045
/

tts-api

Sleeping

File size: 3,594 Bytes

71905d8

import torch
from TTS.api import TTS
import os

def test_coqui_tts():
    """Test Coqui TTS functionality"""
    
    # Get device
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using device: {device}")
    
    try:
        # List available 🐸TTS models
        print("\n=== Available TTS Models ===")
        tts_instance = TTS()
        models = tts_instance.list_models()
        
        # Print first 10 models to avoid overwhelming output
        print("First 10 available models:")
        for i, model in enumerate(models[:10]):
            print(f"{i+1}. {model}")
        
        if len(models) > 10:
            print(f"... and {len(models) - 10} more models")
            
    except Exception as e:
        print(f"Error listing models: {e}")
        return
    
    try:
        # Initialize TTS with XTTS v2 model
        print("\n=== Initializing XTTS v2 Model ===")
        tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
        print("XTTS v2 model loaded successfully!")
        
        # List speakers if available
        print("\n=== Available Speakers ===")
        if hasattr(tts, 'speakers') and tts.speakers:
            print("Available speakers:")
            for speaker in tts.speakers[:10]:  # Show first 10
                print(f"- {speaker}")
            if len(tts.speakers) > 10:
                print(f"... and {len(tts.speakers) - 10} more speakers")
        else:
            print("No preset speakers available or speakers list is empty")
            
    except Exception as e:
        print(f"Error initializing XTTS v2 model: {e}")
        print("This might be due to model download requirements or missing dependencies")
        return
    
    try:
        # Test TTS to file with preset speaker (if available)
        print("\n=== Testing TTS to File ===")
        output_file = "test_output.wav"
        
        # Check if we have speakers available
        if hasattr(tts, 'speakers') and tts.speakers:
            # Use first available speaker
            speaker_name = tts.speakers[0]
            print(f"Using speaker: {speaker_name}")
            
            tts.tts_to_file(
                text="Hello world! This is a test of Coqui TTS library.",
                speaker=speaker_name,
                language="en",
                file_path=output_file
            )
        else:
            # Try without speaker specification
            print("No speakers available, trying without speaker specification...")
            tts.tts_to_file(
                text="Hello world! This is a test of Coqui TTS library.",
                language="en",
                file_path=output_file
            )
            
        if os.path.exists(output_file):
            print(f"✅ TTS successful! Audio saved to: {output_file}")
            file_size = os.path.getsize(output_file)
            print(f"File size: {file_size} bytes")
        else:
            print("❌ TTS failed - output file not created")
            
    except Exception as e:
        print(f"Error during TTS generation: {e}")
        
    # Note about voice cloning
    print("\n=== Voice Cloning Information ===")
    print("To test voice cloning, you would need:")
    print("1. A reference audio file (speaker_wav parameter)")
    print("2. Use tts.tts() method instead of tts_to_file()")
    print("Example:")
    print('wav = tts.tts(text="Hello!", speaker_wav="reference.wav", language="en")')

if __name__ == "__main__":
    print("🐸 Testing Coqui TTS Library")
    print("=" * 50)
    test_coqui_tts()