File size: 669 Bytes
0011a8d
 
1b19131
bcd34ed
1b19131
0011a8d
 
 
 
 
bcd34ed
0011a8d
 
 
 
 
 
4531894
 
0011a8d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from TTS.api import TTS
import torch
import os
import spaces
os.environ["COQUI_TOS_AGREED"] = "1"

# Initialize TTS
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)

# Generate and save speaker embedding
@spaces.GPU
def save_speaker_embedding(speaker_wav, output_path):
    embedding = tts.speaker_manager.compute_embedding_from_clip(speaker_wav)
    torch.save(embedding, output_path)
    return output_path

# Example usage
speaker_wav = "KristenScottGradeAClip.wav"
output_embedding_path = "xttsv2_kristenscott_embedding.pth"
save_speaker_embedding(speaker_wav, output_embedding_path)
print(f"Speaker embedding saved at {output_embedding_path}")