Nick021402 commited on
Commit
4cfc491
·
verified ·
1 Parent(s): d8fb30b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import numpy as np
3
+ from transformers import pipeline
4
+ import gradio as gr
5
+
6
+ # Available voices and their corresponding models
7
+ VOICES = {
8
+ "Amy (Female)": "microsoft/vits-piper-en-us-amy",
9
+ "Joe (Male)": "microsoft/vits-piper-en-us-joe",
10
+ "Clara (Female)": "microsoft/vits-piper-en-us-clb",
11
+ "Ryan (Male)": "microsoft/vits-piper-en-us-jvs"
12
+ }
13
+
14
+ def parse_segments(text):
15
+ """Parse input text for speaker segments using regex"""
16
+ pattern = re.compile(r'$$(?P<speaker>[^$$]+)$$(?P<text>.*?)$$\/\1$$', re.DOTALL)
17
+ return [(match.group('speaker'), match.group('text').strip())
18
+ for match in pattern.finditer(text)]
19
+
20
+ def generate_podcast(input_text):
21
+ """Convert text to podcast with multiple voices"""
22
+ try:
23
+ segments = parse_segments(input_text)
24
+
25
+ if not segments:
26
+ return (22050, np.zeros(0)), "No valid speaker segments found"
27
+
28
+ all_audio = []
29
+ current_pipe = None
30
+ current_model = ""
31
+
32
+ for speaker, text in segments:
33
+ if speaker not in VOICES:
34
+ return (22050, np.zeros(0)), f"Invalid speaker: {speaker}"
35
+
36
+ model_name = VOICES[speaker]
37
+
38
+ # Load model only when needed
39
+ if current_model != model_name:
40
+ if current_pipe: del current_pipe
41
+ current_pipe = pipeline("text-to-speech", model=model_name)
42
+ current_model = model_name
43
+
44
+ # Generate audio for this segment
45
+ output = current_pipe(text)
46
+ all_audio.append(output["audio"])
47
+
48
+ # Combine all audio segments with short pauses
49
+ final_audio = np.concatenate([np.concatenate((audio, np.zeros(5000))) for audio in all_audio])
50
+
51
+ return (output["sampling_rate"], final_audio), "Podcast generated successfully!"
52
+
53
+ except Exception as e:
54
+ return (22050, np.zeros(0)), f"Error: {str(e)}"
55
+
56
+ # Create Gradio interface
57
+ def podcast_interface(text):
58
+ (sr, audio), status = generate_podcast(text)
59
+ return (sr, audio) if audio.size > 0 else gr.update(), status
60
+
61
+ demo = gr.Interface(
62
+ fn=podcast_interface,
63
+ inputs=gr.Textbox(
64
+ label="Input Text with Speaker Tags",
65
+ lines=12,
66
+ placeholder="""Example format:
67
+ [Amy (Female)]Hello and welcome to today's episode![/Amy (Female)]
68
+ [Joe (Male)]Excited to have you here![/Joe (Male)]"""
69
+ ),
70
+ outputs=[
71
+ gr.Audio(label="Generated Podcast", type="numpy"),
72
+ gr.Textbox(label="Status", value="Ready")
73
+ ],
74
+ examples=[
75
+ ["""[Amy (Female)]Welcome to our podcast![/Amy (Female)]
76
+ [Joe (Male)]Today we're discussing AI innovations.[/Joe (Male)]"""]
77
+ ],
78
+ title="🎙️ Multi-Voice Podcast Generator",
79
+ description="Generate podcasts with multiple free AI voices using Microsoft's Piper TTS models. Use [SpeakerName] tags to assign different voices to different text segments.",
80
+ theme="soft",
81
+ allow_flagging="never"
82
+ )
83
+
84
+ if __name__ == "__main__":
85
+ demo.launch()