Marcos Remar commited on
Commit
08991d5
·
1 Parent(s): 0f60c53

Add CosyVoice2-0.5B test scripts and download script

Browse files
compare_models_english.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #\!/usr/bin/env python3
2
+ import os
3
+ import time
4
+
5
+ print("=== Comparação: CosyVoice-300M vs CosyVoice2-0.5B ===")
6
+
7
+ # Texto de teste em inglês
8
+ test_text = "Hello\! This is a comparison test between CosyVoice models. We are testing English pronunciation quality."
9
+
10
+ # Teste com modelo atual (300M)
11
+ print("\n1. Testando modelo CosyVoice-300M-direct...")
12
+ try:
13
+ from cosyvoice.cli.cosyvoice import CosyVoice
14
+ import torchaudio
15
+
16
+ model1 = CosyVoice('pretrained_models/CosyVoice-300M-direct', load_jit=False, load_trt=False, fp16=False)
17
+
18
+ start = time.time()
19
+ for i, j in enumerate(model1.inference_zero_shot(test_text, "Testing speech synthesis.", None, stream=False)):
20
+ torchaudio.save('english_300m.wav', j['tts_speech'], model1.sample_rate)
21
+ break
22
+ print(f"✅ Gerado em {time.time()-start:.1f}s - Arquivo: english_300m.wav")
23
+
24
+ except Exception as e:
25
+ print(f"❌ Erro no modelo 300M: {e}")
26
+
27
+ # Teste com novo modelo (0.5B) - quando estiver disponível
28
+ print("\n2. Modelo CosyVoice2-0.5B...")
29
+ if os.path.exists('pretrained_models/CosyVoice2-0.5B/llm.pt'):
30
+ print("✅ Modelo baixado\! Pronto para testar.")
31
+ else:
32
+ print("⏳ Ainda baixando... Execute novamente após o download.")
download_cosyvoice2.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from modelscope import snapshot_download
2
+ import os
3
+
4
+ print("Baixando CosyVoice2-0.5B (modelo mais recente)...")
5
+ snapshot_download('iic/CosyVoice2-0.5B',
6
+ local_dir='pretrained_models/CosyVoice2-0.5B',
7
+ cache_dir='./model_cache')
8
+ print("Download concluído\!")
test_cosyvoice2_english.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #\!/usr/bin/env python3
2
+ import os
3
+ import sys
4
+ import time
5
+
6
+ print("=== CosyVoice2-0.5B English Test ===")
7
+
8
+ # Configurar ambiente
9
+ os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
10
+
11
+ try:
12
+ from cosyvoice.cli.cosyvoice import CosyVoice
13
+ import torchaudio
14
+
15
+ # Usar o novo modelo CosyVoice2-0.5B
16
+ model_path = 'pretrained_models/CosyVoice2-0.5B'
17
+
18
+ # Verificar se o modelo existe
19
+ if not os.path.exists(model_path):
20
+ print(f"❌ Modelo ainda não baixado em {model_path}")
21
+ print("Aguarde o download terminar...")
22
+ sys.exit(1)
23
+
24
+ print("Carregando CosyVoice2-0.5B...")
25
+ start = time.time()
26
+ cosyvoice = CosyVoice(model_path, load_jit=False, load_trt=False, fp16=False)
27
+ print(f"✅ Modelo carregado em {time.time()-start:.1f}s")
28
+
29
+ # Teste em inglês
30
+ text = "Hello\! This is CosyVoice version two point five B. The new model has better streaming performance and improved pronunciation for English text to speech synthesis."
31
+ prompt_text = "Welcome to the demonstration of our advanced speech synthesis system."
32
+
33
+ print(f"\nTexto: {text}")
34
+ print("Gerando áudio...")
35
+
36
+ start = time.time()
37
+ output_file = "cosyvoice2_english_test.wav"
38
+ for i, j in enumerate(cosyvoice.inference_zero_shot(text, prompt_text, None, stream=False)):
39
+ torchaudio.save(output_file, j['tts_speech'], cosyvoice.sample_rate)
40
+ break
41
+
42
+ duration = time.time() - start
43
+ print(f"✅ Áudio gerado em {duration:.1f}s")
44
+
45
+ if os.path.exists(output_file):
46
+ size = os.path.getsize(output_file) / 1024
47
+ print(f"📊 Arquivo: {output_file} ({size:.1f} KB)")
48
+
49
+ except Exception as e:
50
+ print(f"❌ Erro: {e}")
51
+ import traceback
52
+ traceback.print_exc()