bluenevus commited on
Commit
74245b5
·
verified ·
1 Parent(s): 0ad1c18

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -0
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
3
+ import gradio as gr
4
+ import torch
5
+ import torchaudio
6
+ import google.generativeai as genai
7
+ from transformers import AutoProcessor, AutoModel
8
+ import numpy as np
9
+ import os
10
+ import json
11
+
12
+ # Initialize Gemini AI
13
+ genai.configure(api_key='YOUR_GEMINI_API_KEY')
14
+ model = genai.GenerativeModel('gemini-pro')
15
+
16
+ # Initialize F5-TTS model
17
+ processor = AutoProcessor.from_pretrained("SWivid/F5-TTS")
18
+ f5_model = AutoModel.from_pretrained("SWivid/F5-TTS")
19
+
20
+ def clone_voice(audio_file):
21
+ waveform, sample_rate = torchaudio.load(audio_file)
22
+ # Implement voice cloning logic here
23
+ # For demonstration, we'll just return a placeholder
24
+ return "Voice cloned successfully"
25
+
26
+ def generate_podcast_script(content, duration):
27
+ prompt = f"""
28
+ Create a podcast script for two people discussing the following content:
29
+ {content}
30
+
31
+ The podcast should last approximately {duration}. Include natural speech patterns,
32
+ humor, and occasional off-topic chit-chat. Use speech fillers like "um", "ah",
33
+ "yes", "I see", "Ok now". Vary the emotional tone (e.g., regular, happy, sad, surprised)
34
+ and indicate these in [square brackets]. Format the script as follows:
35
+
36
+ Host 1: [emotion] Dialog
37
+ Host 2: [emotion] Dialog
38
+
39
+ Ensure the conversation flows naturally and stays relevant to the topic.
40
+ """
41
+ response = model.generate_content(prompt)
42
+ return response.text
43
+
44
+ def text_to_speech(text, speaker_id):
45
+ inputs = processor(text=text, return_tensors="pt")
46
+ speech = f5_model.generate_speech(inputs["input_ids"], speaker_id=speaker_id, vocoder_output=True)
47
+ return speech.cpu().numpy()
48
+
49
+ def create_podcast(content, duration, voice1, voice2):
50
+ script = generate_podcast_script(content, duration)
51
+ lines = script.split('\n')
52
+ audio_segments = []
53
+
54
+ for line in lines:
55
+ if line.startswith("Host 1:"):
56
+ audio = text_to_speech(line[7:], speaker_id=0) # Assuming speaker_id 0 for Host 1
57
+ audio_segments.append(audio)
58
+ elif line.startswith("Host 2:"):
59
+ audio = text_to_speech(line[7:], speaker_id=1) # Assuming speaker_id 1 for Host 2
60
+ audio_segments.append(audio)
61
+
62
+ # Concatenate audio segments
63
+ podcast_audio = np.concatenate(audio_segments)
64
+ return (22050, podcast_audio) # Assuming 22050 Hz sample rate
65
+
66
+ def gradio_interface(content, duration, voice1, voice2):
67
+ script = generate_podcast_script(content, duration)
68
+ return script
69
+
70
+ def render_podcast(script, voice1, voice2):
71
+ lines = script.split('\n')
72
+ audio_segments = []
73
+
74
+ for line in lines:
75
+ if line.startswith("Host 1:"):
76
+ audio = text_to_speech(line[7:], speaker_id=0)
77
+ audio_segments.append(audio)
78
+ elif line.startswith("Host 2:"):
79
+ audio = text_to_speech(line[7:], speaker_id=1)
80
+ audio_segments.append(audio)
81
+
82
+ podcast_audio = np.concatenate(audio_segments)
83
+ return (22050, podcast_audio)
84
+
85
+ # Gradio Interface
86
+ with gr.Blocks() as demo:
87
+ gr.Markdown("# AI Podcast Generator")
88
+
89
+ with gr.Row():
90
+ content_input = gr.Textbox(label="Paste your content or upload a document")
91
+ document_upload = gr.File(label="Upload Document")
92
+
93
+ duration = gr.Radio(["1-5 min", "5-10 min", "10-15 min"], label="Estimated podcast duration")
94
+
95
+ with gr.Row():
96
+ voice1_upload = gr.Audio(label="Upload Voice 1", type="filepath")
97
+ voice2_upload = gr.Audio(label="Upload Voice 2", type="filepath")
98
+
99
+ generate_btn = gr.Button("Generate Script")
100
+ script_output = gr.Textbox(label="Generated Script", lines=10)
101
+
102
+ render_btn = gr.Button("Render Podcast")
103
+ audio_output = gr.Audio(label="Generated Podcast")
104
+
105
+ generate_btn.click(gradio_interface, inputs=[content_input, duration, voice1_upload, voice2_upload], outputs=script_output)
106
+ render_btn.click(render_podcast, inputs=[script_output, voice1_upload, voice2_upload], outputs=audio_output)
107
+
108
+ demo.launch()