File size: 1,762 Bytes
08991d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#\!/usr/bin/env python3
import os
import sys
import time

print("=== CosyVoice2-0.5B English Test ===")

# Configurar ambiente
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'

try:
    from cosyvoice.cli.cosyvoice import CosyVoice
    import torchaudio
    
    # Usar o novo modelo CosyVoice2-0.5B
    model_path = 'pretrained_models/CosyVoice2-0.5B'
    
    # Verificar se o modelo existe
    if not os.path.exists(model_path):
        print(f"❌ Modelo ainda não baixado em {model_path}")
        print("Aguarde o download terminar...")
        sys.exit(1)
    
    print("Carregando CosyVoice2-0.5B...")
    start = time.time()
    cosyvoice = CosyVoice(model_path, load_jit=False, load_trt=False, fp16=False)
    print(f"✅ Modelo carregado em {time.time()-start:.1f}s")
    
    # Teste em inglês
    text = "Hello\! This is CosyVoice version two point five B. The new model has better streaming performance and improved pronunciation for English text to speech synthesis."
    prompt_text = "Welcome to the demonstration of our advanced speech synthesis system."
    
    print(f"\nTexto: {text}")
    print("Gerando áudio...")
    
    start = time.time()
    output_file = "cosyvoice2_english_test.wav"
    for i, j in enumerate(cosyvoice.inference_zero_shot(text, prompt_text, None, stream=False)):
        torchaudio.save(output_file, j['tts_speech'], cosyvoice.sample_rate)
        break
    
    duration = time.time() - start
    print(f"✅ Áudio gerado em {duration:.1f}s")
    
    if os.path.exists(output_file):
        size = os.path.getsize(output_file) / 1024
        print(f"📊 Arquivo: {output_file} ({size:.1f} KB)")
        
except Exception as e:
    print(f"❌ Erro: {e}")
    import traceback
    traceback.print_exc()