Spaces:

marcosremar2
/

cosyvoice

Configuration error

cosyvoice / english_tts_test_timed.py

Marcos Remar

Add test scripts for CosyVoice 1.0 (300M model)

b65e164 about 1 month ago

2.58 kB

	#\!/usr/bin/env python3
	import os
	import sys
	import time

	# Configurar ambiente
	os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'

	print("=== CosyVoice English TTS Test with Timing ===")
	print()

	start_time = time.time()

	try:
	from cosyvoice.cli.cosyvoice import CosyVoice
	import torchaudio

	model_path = 'pretrained_models/CosyVoice-300M-direct'

	# Verificar se o modelo existe
	if not os.path.exists(model_path):
	print(f"❌ Error: Model not found at {model_path}")
	sys.exit(1)

	# Medir tempo de carregamento do modelo
	load_start = time.time()
	print("Loading CosyVoice model...")
	cosyvoice = CosyVoice(model_path, load_jit=False, load_trt=False, fp16=False)
	load_time = time.time() - load_start
	print(f"✅ Model loaded in {load_time:.2f} seconds")
	print()

	# Texto em inglês para síntese
	text = "Hello\! This is a test of the CosyVoice text-to-speech system. The synthesis is working perfectly and generating high quality audio."
	prompt_text = "Welcome to the speech synthesis demonstration."

	print(f"Text: {text}")
	print(f"Prompt: {prompt_text}")
	print()

	# Medir tempo de geração
	gen_start = time.time()
	print("Generating audio...")

	output_file = "english_test_output.wav"
	for i, j in enumerate(cosyvoice.inference_zero_shot(text, prompt_text, None, stream=False)):
	torchaudio.save(output_file, j['tts_speech'], cosyvoice.sample_rate)
	break

	gen_time = time.time() - gen_start
	print(f"✅ Audio generated in {gen_time:.2f} seconds")

	# Verificar arquivo gerado
	if os.path.exists(output_file):
	size = os.path.getsize(output_file)
	duration = j['tts_speech'].shape[1] / cosyvoice.sample_rate
	print()
	print(f"📊 File statistics:")
	print(f" - Filename: {output_file}")
	print(f" - Size: {size/1024:.1f} KB")
	print(f" - Duration: {duration:.2f} seconds")
	print(f" - Sample rate: {cosyvoice.sample_rate} Hz")

	total_time = time.time() - start_time
	print()
	print(f"⏱️ Total execution time: {total_time:.2f} seconds")
	print(f" - Model loading: {load_time:.2f}s ({load_time/total_time*100:.1f}%)")
	print(f" - Audio generation: {gen_time:.2f}s ({gen_time/total_time*100:.1f}%)")

	except Exception as e:
	print(f"❌ Error: {e}")
	import traceback
	traceback.print_exc()

	total_time = time.time() - start_time
	print(f"\nTotal time before error: {total_time:.2f} seconds")