Spaces:
Sleeping
Sleeping
import re | |
import numpy as np | |
from transformers import pipeline | |
import gradio as gr | |
# Available voices and their corresponding models | |
VOICES = { | |
"Amy (Female)": "microsoft/vits-piper-en-us-amy", | |
"Joe (Male)": "microsoft/vits-piper-en-us-joe", | |
"Clara (Female)": "microsoft/vits-piper-en-us-clb", | |
"Ryan (Male)": "microsoft/vits-piper-en-us-jvs" | |
} | |
def parse_segments(text): | |
"""Parse input text for speaker segments using regex""" | |
pattern = re.compile(r'$$(?P<speaker>[^$$]+)$$(?P<text>.*?)$$\/\1$$', re.DOTALL) | |
return [(match.group('speaker'), match.group('text').strip()) | |
for match in pattern.finditer(text)] | |
def generate_podcast(input_text): | |
"""Convert text to podcast with multiple voices""" | |
try: | |
segments = parse_segments(input_text) | |
if not segments: | |
return (22050, np.zeros(0)), "No valid speaker segments found" | |
all_audio = [] | |
current_pipe = None | |
current_model = "" | |
for speaker, text in segments: | |
if speaker not in VOICES: | |
return (22050, np.zeros(0)), f"Invalid speaker: {speaker}" | |
model_name = VOICES[speaker] | |
# Load model only when needed | |
if current_model != model_name: | |
if current_pipe: del current_pipe | |
current_pipe = pipeline("text-to-speech", model=model_name) | |
current_model = model_name | |
# Generate audio for this segment | |
output = current_pipe(text) | |
all_audio.append(output["audio"]) | |
# Combine all audio segments with short pauses | |
final_audio = np.concatenate([np.concatenate((audio, np.zeros(5000))) for audio in all_audio]) | |
return (output["sampling_rate"], final_audio), "Podcast generated successfully!" | |
except Exception as e: | |
return (22050, np.zeros(0)), f"Error: {str(e)}" | |
# Create Gradio interface | |
def podcast_interface(text): | |
(sr, audio), status = generate_podcast(text) | |
return (sr, audio) if audio.size > 0 else gr.update(), status | |
demo = gr.Interface( | |
fn=podcast_interface, | |
inputs=gr.Textbox( | |
label="Input Text with Speaker Tags", | |
lines=12, | |
placeholder="""Example format: | |
[Amy (Female)]Hello and welcome to today's episode![/Amy (Female)] | |
[Joe (Male)]Excited to have you here![/Joe (Male)]""" | |
), | |
outputs=[ | |
gr.Audio(label="Generated Podcast", type="numpy"), | |
gr.Textbox(label="Status", value="Ready") | |
], | |
examples=[ | |
["""[Amy (Female)]Welcome to our podcast![/Amy (Female)] | |
[Joe (Male)]Today we're discussing AI innovations.[/Joe (Male)]"""] | |
], | |
title="๐๏ธ Multi-Voice Podcast Generator", | |
description="Generate podcasts with multiple free AI voices using Microsoft's Piper TTS models. Use [SpeakerName] tags to assign different voices to different text segments.", | |
theme="soft", | |
allow_flagging="never" | |
) | |
if __name__ == "__main__": | |
demo.launch() |