Spaces:

marcosremar2
/

cosyvoice

Configuration error

cosyvoice / test_tts_cosyvoice.py

Marcos Remar

Initial CosyVoice code without binary files

0ea7b2a about 1 month ago

4.74 kB

	#!/usr/bin/env python3
	import sys
	import os
	import time

	print("=== CosyVoice Text-to-Speech Test ===\n")

	# Add Matcha-TTS to path
	sys.path.append('third_party/Matcha-TTS')

	# Wait for model files
	required_files = [
	'pretrained_models/CosyVoice-300M/flow.pt',
	'pretrained_models/CosyVoice-300M/speech_tokenizer_v1.onnx',
	'pretrained_models/CosyVoice-300M/campplus.onnx'
	]

	print("Checking for required model files...")
	max_wait = 300 # 5 minutes
	start_time = time.time()

	while True:
	missing_files = [f for f in required_files if not os.path.exists(f)]
	if not missing_files:
	print("All required files found!")
	break

	elapsed = time.time() - start_time
	if elapsed > max_wait:
	print(f"Timeout waiting for files. Missing: {missing_files}")
	sys.exit(1)

	print(f"Waiting for downloads to complete... ({int(elapsed)}s elapsed)")
	time.sleep(10)

	try:
	from cosyvoice.cli.cosyvoice import CosyVoice
	from cosyvoice.utils.file_utils import load_wav
	import torchaudio

	print("\n1. Testing Zero-Shot Voice Cloning")
	print("-" * 40)

	# Initialize model
	cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M', load_jit=False, load_trt=False, fp16=False)

	# Test 1: Zero-shot with provided prompt audio
	if os.path.exists('asset/zero_shot_prompt.wav'):
	prompt_speech_16k = load_wav('asset/zero_shot_prompt.wav', 16000)

	text_pt = "Olá, este é um teste de síntese de voz em português brasileiro."
	prompt_pt = "Testando a conversão de texto para fala."

	print(f"Generating Portuguese speech: '{text_pt}'")
	for i, j in enumerate(cosyvoice.inference_zero_shot(text_pt, prompt_pt, prompt_speech_16k, stream=False)):
	output_file = f'output_portuguese_{i}.wav'
	torchaudio.save(output_file, j['tts_speech'], cosyvoice.sample_rate)
	print(f"✓ Saved: {output_file}")

	# Test 2: Chinese speech
	text_zh = "你好，我是通义生成式语音大模型，很高兴为您服务。"
	prompt_zh = "希望这个测试能够成功。"

	print(f"\nGenerating Chinese speech: '{text_zh}'")
	if os.path.exists('asset/zero_shot_prompt.wav'):
	for i, j in enumerate(cosyvoice.inference_zero_shot(text_zh, prompt_zh, prompt_speech_16k, stream=False)):
	output_file = f'output_chinese_{i}.wav'
	torchaudio.save(output_file, j['tts_speech'], cosyvoice.sample_rate)
	print(f"✓ Saved: {output_file}")

	# Test 3: English speech
	text_en = "Hello, this is a test of the CosyVoice text-to-speech system. It supports multiple languages."
	prompt_en = "Testing voice synthesis."

	print(f"\nGenerating English speech: '{text_en}'")
	if os.path.exists('asset/zero_shot_prompt.wav'):
	for i, j in enumerate(cosyvoice.inference_zero_shot(text_en, prompt_en, prompt_speech_16k, stream=False)):
	output_file = f'output_english_{i}.wav'
	torchaudio.save(output_file, j['tts_speech'], cosyvoice.sample_rate)
	print(f"✓ Saved: {output_file}")

	print("\n2. Testing SFT Mode (if available)")
	print("-" * 40)

	# Try SFT model if available
	if os.path.exists('pretrained_models/CosyVoice-300M-SFT/cosyvoice.yaml'):
	try:
	cosyvoice_sft = CosyVoice('pretrained_models/CosyVoice-300M-SFT', load_jit=False, load_trt=False, fp16=False)

	# List available speakers
	speakers = cosyvoice_sft.list_available_spks()
	print(f"Available speakers: {speakers[:5]}...") # Show first 5

	# Generate with first available speaker
	if speakers:
	text = "Este é um teste usando o modelo SFT com falantes pré-definidos."
	speaker = speakers[0]
	print(f"\nGenerating with speaker '{speaker}': '{text}'")

	for i, j in enumerate(cosyvoice_sft.inference_sft(text, speaker, stream=False)):
	output_file = f'output_sft_{i}.wav'
	torchaudio.save(output_file, j['tts_speech'], cosyvoice_sft.sample_rate)
	print(f"✓ Saved: {output_file}")
	except Exception as e:
	print(f"SFT model not ready yet: {e}")

	print("\n=== Test completed successfully! ===")
	print("\nGenerated audio files:")
	for f in os.listdir('.'):
	if f.startswith('output_') and f.endswith('.wav'):
	size = os.path.getsize(f) / 1024
	print(f" - {f} ({size:.1f} KB)")

	except Exception as e:
	print(f"\nError during test: {e}")
	import traceback
	traceback.print_exc()