Spaces:

MicroHealth
/

ai-podcast-builder

Paused

App Files Files Community

bluenevus commited on Apr 15

Commit

ca79387

verified ·

1 Parent(s): 1f274e9

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -58

app.py CHANGED Viewed

@@ -16,47 +16,45 @@ import logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # Set up device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Model name
 model_name = "canopylabs/orpheus-3b-0.1-ft"
-def load_model(hf_token):
-    login(token=hf_token)
-    print("Loading Orpheus model...")
-    snapshot_download(
-        repo_id=model_name,
-        use_auth_token=hf_token,
-        allow_patterns=[
-            "config.json",
-            "*.safetensors",
-            "model.safetensors.index.json",
-        ],
-        ignore_patterns=[
-            "optimizer.pt",
-            "pytorch_model.bin",
-            "training_args.bin",
-            "scheduler.pt",
-            "tokenizer.json",
-            "tokenizer_config.json",
-            "special_tokens_map.json",
-            "vocab.json",
-            "merges.txt",
-            "tokenizer.*"
-        ]
-    )
-    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
-    model.to(device)
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    print(f"Orpheus model loaded to {device}")
-    return model, tokenizer
-# Initialize as None, will be loaded when HF token is provided
-model = None
-tokenizer = None
 def generate_podcast_script(api_key, content, duration, num_hosts):
     genai.configure(api_key=api_key)
@@ -75,10 +73,6 @@ def generate_podcast_script(api_key, content, duration, num_hosts):
         Do not use any special characters or markdown. Only include the monologue with proper punctuation.
         Ensure the content flows naturally and stays relevant to the topic.
         Limit the script length to match the requested duration of {duration}.
-        Use emotion tags naturally in generative AI speech, incorporate them sparingly at key moments to enhance the dialogue's emotional context.
-        Place tags like <laugh> for joy, <sigh> for frustration or relief, <chuckle> for mild amusement, <cough> or <sniffle> for discomfort, <groan> for displeasure, <yawn> for tiredness, and <gasp> for surprise.
-        For example: "I can't believe I stayed up all night <yawn> only to find out the meeting was canceled <groan>. Oh well, at least I finished the project <chuckle>."
-        Remember, use tags judiciously to maintain a natural flow of conversation.
         """
     else:
         prompt = f"""
@@ -93,10 +87,6 @@ def generate_podcast_script(api_key, content, duration, num_hosts):
         Do not use any special characters or markdown. Only include the alternating dialogue lines with proper punctuation.
         Ensure the conversation flows naturally and stays relevant to the topic.
         Limit the script length to match the requested duration of {duration}.
-        Use emotion tags naturally in generative AI speech, incorporate them sparingly at key moments to enhance the dialogue's emotional context.
-        Place tags like <laugh> for joy, <sigh> for frustration or relief, <chuckle> for mild amusement, <cough> or <sniffle> for discomfort, <groan> for displeasure, <yawn> for tiredness, and <gasp> for surprise.
-        For example: "I can't believe I stayed up all night <yawn> only to find out the meeting was canceled <groan>. Oh well, at least I finished the project <chuckle>."
-        Remember, use tags judiciously to maintain a natural flow of conversation.
         """
     response = model.generate_content(prompt)
@@ -104,7 +94,6 @@ def generate_podcast_script(api_key, content, duration, num_hosts):
     return clean_text
 def text_to_speech(text, voice):
-    global model, tokenizer
     inputs = tokenizer(text, return_tensors="pt").to(device)
     with torch.no_grad():
         output = model.generate(**inputs, max_new_tokens=256)
@@ -146,10 +135,6 @@ def render_podcast(api_key, script, voice1, voice2, num_hosts):
 with gr.Blocks() as demo:
     gr.Markdown("# AI Podcast Generator")
-    hf_token_input = gr.Textbox(label="Enter your Hugging Face API Token", type="password")
-    load_model_btn = gr.Button("Load Orpheus Model")
-    model_status = gr.Markdown("Model not loaded")
     api_key_input = gr.Textbox(label="Enter your Gemini API Key", type="password")
     with gr.Row():
@@ -160,11 +145,13 @@ with gr.Blocks() as demo:
     num_hosts = gr.Radio([1, 2], label="Number of podcast hosts", value=2)
     with gr.Row():
-        voice1_select = gr.Dropdown(label="Select Voice 1", choices=["Voice 1", "Voice 2", "Voice 3"], value="Voice 1")
     with gr.Row():
-        voice2_select = gr.Dropdown(label="Select Voice 2", choices=["Voice 1", "Voice 2", "Voice 3"], value="Voice 2")
     generate_btn = gr.Button("Generate Script")
     script_output = gr.Textbox(label="Generated Script", lines=10)
@@ -172,13 +159,6 @@ with gr.Blocks() as demo:
     render_btn = gr.Button("Render Podcast")
     audio_output = gr.Audio(label="Generated Podcast")
-    def load_model_wrapper(hf_token):
-        global model, tokenizer
-        model, tokenizer = load_model(hf_token)
-        return "Model loaded successfully"
-    load_model_btn.click(load_model_wrapper, inputs=[hf_token_input], outputs=[model_status])
     def generate_script_wrapper(api_key, content, duration, num_hosts):
         return generate_podcast_script(api_key, content, duration, num_hosts)

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Initialize Gemini AI
+genai.configure(api_key='YOUR_GEMINI_API_KEY')
 # Set up device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Load Orpheus model
+print("Loading Orpheus model...")
 model_name = "canopylabs/orpheus-3b-0.1-ft"
+HF_TOKEN = "YOUR_HUGGINGFACE_TOKEN"
+login(token=HF_TOKEN)
+snapshot_download(
+    repo_id=model_name,
+    use_auth_token=HF_TOKEN,
+    allow_patterns=[
+        "config.json",
+        "*.safetensors",
+        "model.safetensors.index.json",
+    ],
+    ignore_patterns=[
+        "optimizer.pt",
+        "pytorch_model.bin",
+        "training_args.bin",
+        "scheduler.pt",
+        "tokenizer.json",
+        "tokenizer_config.json",
+        "special_tokens_map.json",
+        "vocab.json",
+        "merges.txt",
+        "tokenizer.*"
+    ]
+)
+model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
+model.to(device)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+print(f"Orpheus model loaded to {device}")
 def generate_podcast_script(api_key, content, duration, num_hosts):
     genai.configure(api_key=api_key)
         Do not use any special characters or markdown. Only include the monologue with proper punctuation.
         Ensure the content flows naturally and stays relevant to the topic.
         Limit the script length to match the requested duration of {duration}.
         """
     else:
         prompt = f"""
         Do not use any special characters or markdown. Only include the alternating dialogue lines with proper punctuation.
         Ensure the conversation flows naturally and stays relevant to the topic.
         Limit the script length to match the requested duration of {duration}.
         """
     response = model.generate_content(prompt)
     return clean_text
 def text_to_speech(text, voice):
     inputs = tokenizer(text, return_tensors="pt").to(device)
     with torch.no_grad():
         output = model.generate(**inputs, max_new_tokens=256)
 with gr.Blocks() as demo:
     gr.Markdown("# AI Podcast Generator")
     api_key_input = gr.Textbox(label="Enter your Gemini API Key", type="password")
     with gr.Row():
     num_hosts = gr.Radio([1, 2], label="Number of podcast hosts", value=2)
+    voice_options = ["tara", "leah", "jess", "leo", "dan", "mia", "zac", "zoe"]
     with gr.Row():
+        voice1_select = gr.Dropdown(label="Select Voice 1", choices=voice_options, value="tara")
     with gr.Row():
+        voice2_select = gr.Dropdown(label="Select Voice 2", choices=voice_options, value="leo")
     generate_btn = gr.Button("Generate Script")
     script_output = gr.Textbox(label="Generated Script", lines=10)
     render_btn = gr.Button("Render Podcast")
     audio_output = gr.Audio(label="Generated Podcast")
     def generate_script_wrapper(api_key, content, duration, num_hosts):
         return generate_podcast_script(api_key, content, duration, num_hosts)