cosyvoice / test_basic_tts.py
Marcos Remar
Initial CosyVoice code without binary files
0ea7b2a
#!/usr/bin/env python3
import os
import sys
print("=== Basic CosyVoice TTS Test ===\n")
# Check git clone status
git_model = "pretrained_models/CosyVoice-300M-git"
if os.path.exists(git_model):
print(f"Checking {git_model}...")
files = os.listdir(git_model)
print(f"Found {len(files)} files")
# Look for key files
key_files = ['speech_tokenizer_v1.onnx', 'flow.pt', 'campplus.onnx', 'cosyvoice.yaml']
for kf in key_files:
if kf in files:
size = os.path.getsize(os.path.join(git_model, kf)) / (1024*1024)
print(f" ✓ {kf} ({size:.1f} MB)")
else:
print(f" ✗ {kf} (missing)")
# Try basic import test
sys.path.append('third_party/Matcha-TTS')
try:
print("\nTesting imports...")
import torch
import torchaudio
import onnxruntime
from cosyvoice.utils.file_utils import load_wav
print("✓ All imports successful")
# If git model is complete, use it
if os.path.exists(os.path.join(git_model, 'speech_tokenizer_v1.onnx')):
print(f"\nUsing model from: {git_model}")
from cosyvoice.cli.cosyvoice import CosyVoice
# Initialize with git model
cosyvoice = CosyVoice(git_model, load_jit=False, load_trt=False, fp16=False)
# Load example audio
prompt_speech = load_wav('asset/zero_shot_prompt.wav', 16000)
# Simple test
text = "Teste de síntese de voz. Um, dois, três, testando!"
prompt = "Olá, teste."
print(f"\nGenerating: '{text}'")
output_count = 0
for i, j in enumerate(cosyvoice.inference_zero_shot(text, prompt, prompt_speech, stream=False)):
output_file = f'test_basic_{i}.wav'
torchaudio.save(output_file, j['tts_speech'], cosyvoice.sample_rate)
# Get info
duration = j['tts_speech'].shape[1] / cosyvoice.sample_rate
size = os.path.getsize(output_file) / 1024
print(f"✓ Generated: {output_file}")
print(f" Duration: {duration:.2f}s, Size: {size:.1f} KB")
print(f" Sample rate: {cosyvoice.sample_rate} Hz")
output_count += 1
print(f"\n✅ Success! Generated {output_count} audio file(s)")
# Try to play if possible
print("\nTo play the audio on server:")
print(f" aplay {output_file}")
print("\nTo download to local machine:")
print(f" scp -P 40053 [email protected]:~/CosyVoice/{output_file} .")
else:
print("\n⚠️ Model files still downloading. Please wait...")
print("You can check download progress with:")
print(" ps aux | grep modelscope")
except Exception as e:
print(f"\n❌ Error: {e}")
# Specific error handling
if "speech_tokenizer" in str(e):
print("\n💡 The speech tokenizer is missing. Checking alternative sources...")
# List all .onnx files
print("\nSearching for ONNX files:")
for root, dirs, files in os.walk('pretrained_models'):
for f in files:
if f.endswith('.onnx'):
print(f" Found: {os.path.join(root, f)}")
elif "No module named" in str(e):
print("\n💡 Missing module. Make sure you're in the virtual environment:")
print(" source cosyvoice_env/bin/activate")