Update app.py
Browse files
app.py
CHANGED
@@ -10,6 +10,7 @@ import torchaudio
|
|
10 |
from torchaudio.functional import resample
|
11 |
import threading
|
12 |
import queue
|
|
|
13 |
|
14 |
# Set up logging
|
15 |
import logging
|
@@ -19,16 +20,21 @@ logger = logging.getLogger(__name__)
|
|
19 |
# Set up device
|
20 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
21 |
|
22 |
-
# Initialize model and tokenizer
|
23 |
model = None
|
24 |
tokenizer = None
|
25 |
|
26 |
-
def load_model(
|
27 |
global model, tokenizer
|
28 |
|
29 |
print("Loading Orpheus model...")
|
30 |
model_name = "canopylabs/orpheus-3b-0.1-ft"
|
31 |
|
|
|
|
|
|
|
|
|
|
|
32 |
login(token=hf_token)
|
33 |
|
34 |
snapshot_download(
|
@@ -57,7 +63,9 @@ def load_model(hf_token):
|
|
57 |
model.to(device)
|
58 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
59 |
print(f"Orpheus model loaded to {device}")
|
60 |
-
|
|
|
|
|
61 |
|
62 |
def generate_podcast_script(api_key, content, duration, num_hosts):
|
63 |
genai.configure(api_key=api_key)
|
@@ -76,6 +84,10 @@ def generate_podcast_script(api_key, content, duration, num_hosts):
|
|
76 |
Do not use any special characters or markdown. Only include the monologue with proper punctuation.
|
77 |
Ensure the content flows naturally and stays relevant to the topic.
|
78 |
Limit the script length to match the requested duration of {duration}.
|
|
|
|
|
|
|
|
|
79 |
"""
|
80 |
else:
|
81 |
prompt = f"""
|
@@ -90,6 +102,10 @@ def generate_podcast_script(api_key, content, duration, num_hosts):
|
|
90 |
Do not use any special characters or markdown. Only include the alternating dialogue lines with proper punctuation.
|
91 |
Ensure the conversation flows naturally and stays relevant to the topic.
|
92 |
Limit the script length to match the requested duration of {duration}.
|
|
|
|
|
|
|
|
|
93 |
"""
|
94 |
|
95 |
response = model.generate_content(prompt)
|
@@ -139,10 +155,6 @@ def render_podcast(api_key, script, voice1, voice2, num_hosts):
|
|
139 |
with gr.Blocks() as demo:
|
140 |
gr.Markdown("# AI Podcast Generator")
|
141 |
|
142 |
-
hf_token_input = gr.Textbox(label="Enter your Hugging Face API Token", type="password")
|
143 |
-
load_model_btn = gr.Button("Load Orpheus Model")
|
144 |
-
model_status = gr.Markdown("Model not loaded")
|
145 |
-
|
146 |
api_key_input = gr.Textbox(label="Enter your Gemini API Key", type="password")
|
147 |
|
148 |
with gr.Row():
|
@@ -167,8 +179,6 @@ with gr.Blocks() as demo:
|
|
167 |
render_btn = gr.Button("Render Podcast")
|
168 |
audio_output = gr.Audio(label="Generated Podcast")
|
169 |
|
170 |
-
load_model_btn.click(load_model, inputs=[hf_token_input], outputs=[model_status])
|
171 |
-
|
172 |
def generate_script_wrapper(api_key, content, duration, num_hosts):
|
173 |
return generate_podcast_script(api_key, content, duration, num_hosts)
|
174 |
|
|
|
10 |
from torchaudio.functional import resample
|
11 |
import threading
|
12 |
import queue
|
13 |
+
import os
|
14 |
|
15 |
# Set up logging
|
16 |
import logging
|
|
|
20 |
# Set up device
|
21 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
22 |
|
23 |
+
# Initialize model and tokenizer
|
24 |
model = None
|
25 |
tokenizer = None
|
26 |
|
27 |
+
def load_model():
|
28 |
global model, tokenizer
|
29 |
|
30 |
print("Loading Orpheus model...")
|
31 |
model_name = "canopylabs/orpheus-3b-0.1-ft"
|
32 |
|
33 |
+
# Get Hugging Face token from environment variable
|
34 |
+
hf_token = os.environ.get("HUGGINGFACE_TOKEN")
|
35 |
+
if not hf_token:
|
36 |
+
raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
|
37 |
+
|
38 |
login(token=hf_token)
|
39 |
|
40 |
snapshot_download(
|
|
|
63 |
model.to(device)
|
64 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
65 |
print(f"Orpheus model loaded to {device}")
|
66 |
+
|
67 |
+
# Load the model before creating the Gradio interface
|
68 |
+
load_model()
|
69 |
|
70 |
def generate_podcast_script(api_key, content, duration, num_hosts):
|
71 |
genai.configure(api_key=api_key)
|
|
|
84 |
Do not use any special characters or markdown. Only include the monologue with proper punctuation.
|
85 |
Ensure the content flows naturally and stays relevant to the topic.
|
86 |
Limit the script length to match the requested duration of {duration}.
|
87 |
+
To use emotion tags naturally in generative AI speech, incorporate them sparingly at key moments to enhance the dialogue's emotional context.
|
88 |
+
Place tags like <laugh> for joy, <sigh> for frustration or relief, <chuckle> for mild amusement, <cough> or <sniffle> for discomfort, <groan> for displeasure, <yawn> for tiredness, and <gasp> for surprise.
|
89 |
+
For example: "I can't believe I stayed up all night <yawn> only to find out the meeting was canceled <groan>. Oh well, at least I finished the project <chuckle>."
|
90 |
+
Remember, use tags judiciously to maintain a natural flow of conversation
|
91 |
"""
|
92 |
else:
|
93 |
prompt = f"""
|
|
|
102 |
Do not use any special characters or markdown. Only include the alternating dialogue lines with proper punctuation.
|
103 |
Ensure the conversation flows naturally and stays relevant to the topic.
|
104 |
Limit the script length to match the requested duration of {duration}.
|
105 |
+
To use emotion tags naturally in generative AI speech, incorporate them sparingly at key moments to enhance the dialogue's emotional context.
|
106 |
+
Place tags like <laugh> for joy, <sigh> for frustration or relief, <chuckle> for mild amusement, <cough> or <sniffle> for discomfort, <groan> for displeasure, <yawn> for tiredness, and <gasp> for surprise.
|
107 |
+
For example: "I can't believe I stayed up all night <yawn> only to find out the meeting was canceled <groan>. Oh well, at least I finished the project <chuckle>."
|
108 |
+
Remember, use tags judiciously to maintain a natural flow of conversation
|
109 |
"""
|
110 |
|
111 |
response = model.generate_content(prompt)
|
|
|
155 |
with gr.Blocks() as demo:
|
156 |
gr.Markdown("# AI Podcast Generator")
|
157 |
|
|
|
|
|
|
|
|
|
158 |
api_key_input = gr.Textbox(label="Enter your Gemini API Key", type="password")
|
159 |
|
160 |
with gr.Row():
|
|
|
179 |
render_btn = gr.Button("Render Podcast")
|
180 |
audio_output = gr.Audio(label="Generated Podcast")
|
181 |
|
|
|
|
|
182 |
def generate_script_wrapper(api_key, content, duration, num_hosts):
|
183 |
return generate_podcast_script(api_key, content, duration, num_hosts)
|
184 |
|