Spaces:
Configuration error
Configuration error
#!/usr/bin/env python3 | |
import os | |
import sys | |
print("=== Basic CosyVoice TTS Test ===\n") | |
# Check git clone status | |
git_model = "pretrained_models/CosyVoice-300M-git" | |
if os.path.exists(git_model): | |
print(f"Checking {git_model}...") | |
files = os.listdir(git_model) | |
print(f"Found {len(files)} files") | |
# Look for key files | |
key_files = ['speech_tokenizer_v1.onnx', 'flow.pt', 'campplus.onnx', 'cosyvoice.yaml'] | |
for kf in key_files: | |
if kf in files: | |
size = os.path.getsize(os.path.join(git_model, kf)) / (1024*1024) | |
print(f" ✓ {kf} ({size:.1f} MB)") | |
else: | |
print(f" ✗ {kf} (missing)") | |
# Try basic import test | |
sys.path.append('third_party/Matcha-TTS') | |
try: | |
print("\nTesting imports...") | |
import torch | |
import torchaudio | |
import onnxruntime | |
from cosyvoice.utils.file_utils import load_wav | |
print("✓ All imports successful") | |
# If git model is complete, use it | |
if os.path.exists(os.path.join(git_model, 'speech_tokenizer_v1.onnx')): | |
print(f"\nUsing model from: {git_model}") | |
from cosyvoice.cli.cosyvoice import CosyVoice | |
# Initialize with git model | |
cosyvoice = CosyVoice(git_model, load_jit=False, load_trt=False, fp16=False) | |
# Load example audio | |
prompt_speech = load_wav('asset/zero_shot_prompt.wav', 16000) | |
# Simple test | |
text = "Teste de síntese de voz. Um, dois, três, testando!" | |
prompt = "Olá, teste." | |
print(f"\nGenerating: '{text}'") | |
output_count = 0 | |
for i, j in enumerate(cosyvoice.inference_zero_shot(text, prompt, prompt_speech, stream=False)): | |
output_file = f'test_basic_{i}.wav' | |
torchaudio.save(output_file, j['tts_speech'], cosyvoice.sample_rate) | |
# Get info | |
duration = j['tts_speech'].shape[1] / cosyvoice.sample_rate | |
size = os.path.getsize(output_file) / 1024 | |
print(f"✓ Generated: {output_file}") | |
print(f" Duration: {duration:.2f}s, Size: {size:.1f} KB") | |
print(f" Sample rate: {cosyvoice.sample_rate} Hz") | |
output_count += 1 | |
print(f"\n✅ Success! Generated {output_count} audio file(s)") | |
# Try to play if possible | |
print("\nTo play the audio on server:") | |
print(f" aplay {output_file}") | |
print("\nTo download to local machine:") | |
print(f" scp -P 40053 [email protected]:~/CosyVoice/{output_file} .") | |
else: | |
print("\n⚠️ Model files still downloading. Please wait...") | |
print("You can check download progress with:") | |
print(" ps aux | grep modelscope") | |
except Exception as e: | |
print(f"\n❌ Error: {e}") | |
# Specific error handling | |
if "speech_tokenizer" in str(e): | |
print("\n💡 The speech tokenizer is missing. Checking alternative sources...") | |
# List all .onnx files | |
print("\nSearching for ONNX files:") | |
for root, dirs, files in os.walk('pretrained_models'): | |
for f in files: | |
if f.endswith('.onnx'): | |
print(f" Found: {os.path.join(root, f)}") | |
elif "No module named" in str(e): | |
print("\n💡 Missing module. Make sure you're in the virtual environment:") | |
print(" source cosyvoice_env/bin/activate") |