Spaces:

HaiderAUT
/

PodCastIt

Build error

App Files Files Community

HaiderAUT commited on May 8

Commit

cca7e91

verified ·

1 Parent(s): d4adc2b

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -77

app.py CHANGED Viewed

@@ -1,29 +1,28 @@
 # =============================================================
 # Lecture → Podcast & Script Generator (English Only)
-# • Text: Google Gemini API (via UI-provided key)
-# • Audio: Hugging Face InferenceClient.text_to_speech (public MMS-TTS for English)
 # =============================================================
-import os
 import re
 import tempfile
 import textwrap
 from pathlib import Path
-from typing import List, Optional, Any
 import gradio as gr
 from PyPDF2 import PdfReader
 from pydub import AudioSegment
 from pydub.exceptions import CouldntDecodeError
-# Hugging Face TTS client (anonymous/public access)
-from huggingface_hub import InferenceClient
 # Google Gemini SDK
 try:
     import google.generativeai as genai
 except ImportError:
-    raise ImportError("Please install Google Generative AI SDK: pip install google-generativeai")
 # ------------------------------------------------------------------
 # Globals & templates
@@ -43,10 +42,11 @@ PROMPT_TEMPLATE = textwrap.dedent(
 HF_TTS_MODEL = "facebook/mms-tts-eng"
 CHUNK_CHAR_LIMIT = 280
 tts_client = InferenceClient()
 # ------------------------------------------------------------------
-# Helpers
 # ------------------------------------------------------------------
 def extract_pdf_text(pdf_path: str) -> str:
     reader = PdfReader(pdf_path)
@@ -69,98 +69,111 @@ def split_to_chunks(text: str, limit: int = CHUNK_CHAR_LIMIT) -> List[str]:
         chunks.append(current)
     return chunks
-def synthesize_speech(text: str, model_id: str, out_dir: Path) -> Path:
-    chunks = split_to_chunks(text)
     if not chunks:
-        raise ValueError("No text to synthesize.")
     segments = []
-    for i, chunk in enumerate(chunks):
-        try:
-            audio_bytes = tts_client.text_to_speech(chunk, model=model_id)
-        except Exception as e:
-            raise RuntimeError(f"TTS failed on chunk {i+1}: {e}")
-        part_path = out_dir / f"seg_{i}.flac"
         part_path.write_bytes(audio_bytes)
         try:
             seg = AudioSegment.from_file(part_path, format="flac")
             segments.append(seg)
         except CouldntDecodeError as e:
-            raise RuntimeError(f"Could not decode segment {i+1}: {e}")
     final_audio = sum(segments, AudioSegment.empty())
-    out_path = out_dir / "podcast_audio.flac"
-    final_audio.export(out_path, format="flac")
-    return out_path
 # ------------------------------------------------------------------
-# Main pipeline
 # ------------------------------------------------------------------
-def generate_podcast(
-    gemini_api_key: Optional[str],
-    lecture_pdf: Optional[gr.File]
-) -> List[Optional[Any]]:
     if not gemini_api_key:
-        raise gr.Error("Enter your Google AI Studio API Key.")
     if not lecture_pdf:
-        raise gr.Error("Upload a lecture PDF file.")
-    genai.configure(api_key=gemini_api_key)
-    raw = extract_pdf_text(lecture_pdf.name)
-    content = truncate_text(raw)
-    if not content.strip():
-        raise gr.Error("Lecture PDF contained no extractable text.")
     try:
-        gemini_model = genai.GenerativeModel("gemini-1.5-flash-latest")
     except Exception as e:
-        raise gr.Error(f"Gemini init failed: {e}")
     prompt = PROMPT_TEMPLATE.format(content=content)
     try:
-        resp = gemini_model.generate_content(prompt)
-        script = resp.text or ""
     except Exception as e:
         raise gr.Error(f"Gemini generation error: {e}")
     with tempfile.TemporaryDirectory() as td:
-        tmp = Path(td)
-        # Save script file
-        script_path = tmp / "podcast_script.txt"
-        script_path.write_text(script, encoding="utf-8")
-        # Synthesize audio
-        try:
-            audio_path = synthesize_speech(script, HF_TTS_MODEL, tmp)
-        except Exception as e:
-            raise gr.Error(f"Speech synthesis error: {e}")
-        # Return [audio, markdown script, txt file]
-        return [str(audio_path), script, str(script_path)]
 # ------------------------------------------------------------------
-# Gradio Interface
 # ------------------------------------------------------------------
-iface = gr.Interface(
-    fn=generate_podcast,
-    inputs=[
-        gr.Textbox(
             label="Google Gemini API Key",
             type="password",
-            placeholder="Paste your key here"
-        ),
-        gr.File(label="Upload Lecture PDF", file_types=[".pdf"]),
-    ],
-    outputs=[
-        gr.Audio(label="English Podcast", type="filepath"),
-        gr.Markdown(label="English Script"),            # renders the script
-        gr.File(label="Download English Script (.txt)", type="filepath"),
-    ],
-    title="Lecture → English Podcast & Script",
-    description=(
-        "Enter your Gemini API Key and upload a lecture PDF. "
-        "Generates a two-host podcast audio and a Markdown script in English "
-        "using Google Gemini for text and Hugging Face MMS-TTS for audio."
-    ),
-    allow_flagging="never",
-)
-if __name__ == "__main__":
-    iface.launch()

 # =============================================================
 # Lecture → Podcast & Script Generator (English Only)
+# Two-step: 1) Gemini script  2) HF MMS-TTS audio
 # =============================================================
 import re
 import tempfile
 import textwrap
 from pathlib import Path
+from typing import List, Optional
 import gradio as gr
 from PyPDF2 import PdfReader
 from pydub import AudioSegment
 from pydub.exceptions import CouldntDecodeError
 # Google Gemini SDK
 try:
     import google.generativeai as genai
 except ImportError:
+    raise ImportError("Please install the Google Generative AI SDK:\n"
+                      "    pip install google-generativeai")
+# Hugging Face TTS client (anonymous/public)
+from huggingface_hub import InferenceClient
 # ------------------------------------------------------------------
 # Globals & templates
 HF_TTS_MODEL = "facebook/mms-tts-eng"
 CHUNK_CHAR_LIMIT = 280
+# Initialize the HF TTS client once
 tts_client = InferenceClient()
 # ------------------------------------------------------------------
+# Helper functions
 # ------------------------------------------------------------------
 def extract_pdf_text(pdf_path: str) -> str:
     reader = PdfReader(pdf_path)
         chunks.append(current)
     return chunks
+def synthesize_speech(script: str, model_id: str, out_dir: Path) -> str:
+    chunks = split_to_chunks(script)
     if not chunks:
+        raise RuntimeError("No text chunks to synthesize.")
     segments = []
+    for idx, chunk in enumerate(chunks):
+        audio_bytes = tts_client.text_to_speech(chunk, model=model_id)
+        part_path = out_dir / f"seg_{idx}.flac"
         part_path.write_bytes(audio_bytes)
         try:
             seg = AudioSegment.from_file(part_path, format="flac")
             segments.append(seg)
         except CouldntDecodeError as e:
+            raise RuntimeError(f"Failed to decode chunk {idx}: {e}") from e
     final_audio = sum(segments, AudioSegment.empty())
+    final_path = out_dir / "podcast_audio.flac"
+    final_audio.export(final_path, format="flac")
+    return str(final_path)
 # ------------------------------------------------------------------
+# Step 1: Generate script via Gemini
 # ------------------------------------------------------------------
+def generate_script(
+    gemini_api_key: str,
+    lecture_pdf: gr.File
+) -> List[str]:
     if not gemini_api_key:
+        raise gr.Error("Please enter your Google AI Studio API Key.")
     if not lecture_pdf:
+        raise gr.Error("Please upload a lecture PDF.")
+    # Configure Gemini
     try:
+        genai.configure(api_key=gemini_api_key)
+        model = genai.GenerativeModel("gemini-1.5-flash-latest")
     except Exception as e:
+        raise gr.Error(f"Gemini init/config error: {e}")
+    # Extract and truncate text
+    raw_text = extract_pdf_text(lecture_pdf.name)
+    content = truncate_text(raw_text)
+    if not content.strip():
+        raise gr.Error("No extractable text found in the PDF.")
+    # Generate dialogue script
     prompt = PROMPT_TEMPLATE.format(content=content)
     try:
+        response = model.generate_content(prompt)
+        script = response.text or ""
     except Exception as e:
         raise gr.Error(f"Gemini generation error: {e}")
+    return [script, script]  # [for Markdown display, for state storage]
+# ------------------------------------------------------------------
+# Step 2: Generate audio from provided script
+# ------------------------------------------------------------------
+def generate_audio(
+    script: str
+) -> str:
+    if not script:
+        raise gr.Error("No script available. Please generate the script first.")
+    # Create a temp dir for audio parts
     with tempfile.TemporaryDirectory() as td:
+        out_dir = Path(td)
+        audio_path = synthesize_speech(script, HF_TTS_MODEL, out_dir)
+        return audio_path
 # ------------------------------------------------------------------
+# Gradio UI
 # ------------------------------------------------------------------
+with gr.Blocks() as demo:
+    # Shared state for the script
+    script_state = gr.State()
+    with gr.Tab("Generate Script"):
+        api_key_input = gr.Textbox(
             label="Google Gemini API Key",
             type="password",
+            placeholder="Enter your key"
+        )
+        pdf_input = gr.File(
+            label="Upload Lecture PDF",
+            file_types=[".pdf"]
+        )
+        script_md = gr.Markdown(
+            label="Generated Script",
+            placeholder="Your script will appear here..."
+        )
+        gen_script_btn = gr.Button("Generate Script")
+        gen_script_btn.click(
+            fn=generate_script,
+            inputs=[api_key_input, pdf_input],
+            outputs=[script_md, script_state]
+        )
+    with gr.Tab("Generate Audio"):
+        gen_audio_btn = gr.Button("Generate Audio")
+        audio_out = gr.Audio(
+            label="Podcast Audio",
+            type="filepath"
+        )
+        gen_audio_btn.click(
+            fn=generate_audio,
+            inputs=[script_state],
+            outputs=[audio_out]
+        )
+    demo.launch()