Spaces:

MicroHealth
/

ai-podcast-builder

Paused

App Files Files Community

bluenevus commited on Apr 15

Commit

3c8678d

verified ·

1 Parent(s): 1668d21

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -9

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ import torchaudio
 from torchaudio.functional import resample
 import threading
 import queue
 # Set up logging
 import logging
@@ -19,16 +20,21 @@ logger = logging.getLogger(__name__)
 # Set up device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Initialize model and tokenizer as None
 model = None
 tokenizer = None
-def load_model(hf_token):
     global model, tokenizer
     print("Loading Orpheus model...")
     model_name = "canopylabs/orpheus-3b-0.1-ft"
     login(token=hf_token)
     snapshot_download(
@@ -57,7 +63,9 @@ def load_model(hf_token):
     model.to(device)
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     print(f"Orpheus model loaded to {device}")
-    return "Model loaded successfully"
 def generate_podcast_script(api_key, content, duration, num_hosts):
     genai.configure(api_key=api_key)
@@ -76,6 +84,10 @@ def generate_podcast_script(api_key, content, duration, num_hosts):
         Do not use any special characters or markdown. Only include the monologue with proper punctuation.
         Ensure the content flows naturally and stays relevant to the topic.
         Limit the script length to match the requested duration of {duration}.
         """
     else:
         prompt = f"""
@@ -90,6 +102,10 @@ def generate_podcast_script(api_key, content, duration, num_hosts):
         Do not use any special characters or markdown. Only include the alternating dialogue lines with proper punctuation.
         Ensure the conversation flows naturally and stays relevant to the topic.
         Limit the script length to match the requested duration of {duration}.
         """
     response = model.generate_content(prompt)
@@ -139,10 +155,6 @@ def render_podcast(api_key, script, voice1, voice2, num_hosts):
 with gr.Blocks() as demo:
     gr.Markdown("# AI Podcast Generator")
-    hf_token_input = gr.Textbox(label="Enter your Hugging Face API Token", type="password")
-    load_model_btn = gr.Button("Load Orpheus Model")
-    model_status = gr.Markdown("Model not loaded")
     api_key_input = gr.Textbox(label="Enter your Gemini API Key", type="password")
     with gr.Row():
@@ -167,8 +179,6 @@ with gr.Blocks() as demo:
     render_btn = gr.Button("Render Podcast")
     audio_output = gr.Audio(label="Generated Podcast")
-    load_model_btn.click(load_model, inputs=[hf_token_input], outputs=[model_status])
     def generate_script_wrapper(api_key, content, duration, num_hosts):
         return generate_podcast_script(api_key, content, duration, num_hosts)

 from torchaudio.functional import resample
 import threading
 import queue
+import os
 # Set up logging
 import logging
 # Set up device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Initialize model and tokenizer
 model = None
 tokenizer = None
+def load_model():
     global model, tokenizer
     print("Loading Orpheus model...")
     model_name = "canopylabs/orpheus-3b-0.1-ft"
+    # Get Hugging Face token from environment variable
+    hf_token = os.environ.get("HUGGINGFACE_TOKEN")
+    if not hf_token:
+        raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
     login(token=hf_token)
     snapshot_download(
     model.to(device)
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     print(f"Orpheus model loaded to {device}")
+# Load the model before creating the Gradio interface
+load_model()
 def generate_podcast_script(api_key, content, duration, num_hosts):
     genai.configure(api_key=api_key)
         Do not use any special characters or markdown. Only include the monologue with proper punctuation.
         Ensure the content flows naturally and stays relevant to the topic.
         Limit the script length to match the requested duration of {duration}.
+        To use emotion tags naturally in generative AI speech, incorporate them sparingly at key moments to enhance the dialogue's emotional context.
+        Place tags like <laugh> for joy, <sigh> for frustration or relief, <chuckle> for mild amusement, <cough> or <sniffle> for discomfort, <groan> for displeasure, <yawn> for tiredness, and <gasp> for surprise.
+        For example: "I can't believe I stayed up all night <yawn> only to find out the meeting was canceled <groan>. Oh well, at least I finished the project <chuckle>."
+        Remember, use tags judiciously to maintain a natural flow of conversation
         """
     else:
         prompt = f"""
         Do not use any special characters or markdown. Only include the alternating dialogue lines with proper punctuation.
         Ensure the conversation flows naturally and stays relevant to the topic.
         Limit the script length to match the requested duration of {duration}.
+        To use emotion tags naturally in generative AI speech, incorporate them sparingly at key moments to enhance the dialogue's emotional context.
+        Place tags like <laugh> for joy, <sigh> for frustration or relief, <chuckle> for mild amusement, <cough> or <sniffle> for discomfort, <groan> for displeasure, <yawn> for tiredness, and <gasp> for surprise.
+        For example: "I can't believe I stayed up all night <yawn> only to find out the meeting was canceled <groan>. Oh well, at least I finished the project <chuckle>."
+        Remember, use tags judiciously to maintain a natural flow of conversation
         """
     response = model.generate_content(prompt)
 with gr.Blocks() as demo:
     gr.Markdown("# AI Podcast Generator")
     api_key_input = gr.Textbox(label="Enter your Gemini API Key", type="password")
     with gr.Row():
     render_btn = gr.Button("Render Podcast")
     audio_output = gr.Audio(label="Generated Podcast")
     def generate_script_wrapper(api_key, content, duration, num_hosts):
         return generate_podcast_script(api_key, content, duration, num_hosts)