cosyvoice / test_cosyvoice2_english.py
Marcos Remar
Add CosyVoice2-0.5B test scripts and download script
08991d5
#\!/usr/bin/env python3
import os
import sys
import time
print("=== CosyVoice2-0.5B English Test ===")
# Configurar ambiente
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
try:
from cosyvoice.cli.cosyvoice import CosyVoice
import torchaudio
# Usar o novo modelo CosyVoice2-0.5B
model_path = 'pretrained_models/CosyVoice2-0.5B'
# Verificar se o modelo existe
if not os.path.exists(model_path):
print(f"❌ Modelo ainda não baixado em {model_path}")
print("Aguarde o download terminar...")
sys.exit(1)
print("Carregando CosyVoice2-0.5B...")
start = time.time()
cosyvoice = CosyVoice(model_path, load_jit=False, load_trt=False, fp16=False)
print(f"✅ Modelo carregado em {time.time()-start:.1f}s")
# Teste em inglês
text = "Hello\! This is CosyVoice version two point five B. The new model has better streaming performance and improved pronunciation for English text to speech synthesis."
prompt_text = "Welcome to the demonstration of our advanced speech synthesis system."
print(f"\nTexto: {text}")
print("Gerando áudio...")
start = time.time()
output_file = "cosyvoice2_english_test.wav"
for i, j in enumerate(cosyvoice.inference_zero_shot(text, prompt_text, None, stream=False)):
torchaudio.save(output_file, j['tts_speech'], cosyvoice.sample_rate)
break
duration = time.time() - start
print(f"✅ Áudio gerado em {duration:.1f}s")
if os.path.exists(output_file):
size = os.path.getsize(output_file) / 1024
print(f"📊 Arquivo: {output_file} ({size:.1f} KB)")
except Exception as e:
print(f"❌ Erro: {e}")
import traceback
traceback.print_exc()