Spaces:

HaiderAUT
/

PodCastIt

Build error

App Files Files Community

HaiderAUT commited on May 8

Commit

d4adc2b

verified ·

1 Parent(s): 617d576

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -19

app.py CHANGED Viewed

@@ -28,7 +28,6 @@ except ImportError:
 # ------------------------------------------------------------------
 # Globals & templates
 # ------------------------------------------------------------------
-# Gemini prompt for ~300-word two-host dialogue in English
 PROMPT_TEMPLATE = textwrap.dedent(
     """
     You are producing a lively two-host educational podcast in English.
@@ -41,29 +40,23 @@ PROMPT_TEMPLATE = textwrap.dedent(
     """
 )
-# TTS model ID for English MMS-TTS
 HF_TTS_MODEL = "facebook/mms-tts-eng"
-# Safe chunk size for HF text-to-speech
 CHUNK_CHAR_LIMIT = 280
-# Initialize HF TTS client (no token required for public models)
 tts_client = InferenceClient()
 # ------------------------------------------------------------------
 # Helpers
 # ------------------------------------------------------------------
 def extract_pdf_text(pdf_path: str) -> str:
-    """Extracts all text from a PDF file."""
     reader = PdfReader(pdf_path)
     return "\n".join(page.extract_text() or "" for page in reader.pages)
 def truncate_text(text: str, max_words: int = 8000) -> str:
-    """Truncate to max_words to fit LLM context."""
     words = text.split()
     return " ".join(words[:max_words])
 def split_to_chunks(text: str, limit: int = CHUNK_CHAR_LIMIT) -> List[str]:
-    """Split text into ≤limit-char chunks at sentence boundaries."""
     sentences = [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()]
     chunks, current = [], ""
     for sent in sentences:
@@ -77,7 +70,6 @@ def split_to_chunks(text: str, limit: int = CHUNK_CHAR_LIMIT) -> List[str]:
     return chunks
 def synthesize_speech(text: str, model_id: str, out_dir: Path) -> Path:
-    """Chunk-safe TTS via HF Inference API, concatenating FLAC segments."""
     chunks = split_to_chunks(text)
     if not chunks:
         raise ValueError("No text to synthesize.")
@@ -94,10 +86,9 @@ def synthesize_speech(text: str, model_id: str, out_dir: Path) -> Path:
             segments.append(seg)
         except CouldntDecodeError as e:
             raise RuntimeError(f"Could not decode segment {i+1}: {e}")
-    # Concatenate
-    final = sum(segments, AudioSegment.empty())
     out_path = out_dir / "podcast_audio.flac"
-    final.export(out_path, format="flac")
     return out_path
 # ------------------------------------------------------------------
@@ -107,31 +98,30 @@ def generate_podcast(
     gemini_api_key: Optional[str],
     lecture_pdf: Optional[gr.File]
 ) -> List[Optional[Any]]:
-    # Validate inputs
     if not gemini_api_key:
         raise gr.Error("Enter your Google AI Studio API Key.")
     if not lecture_pdf:
         raise gr.Error("Upload a lecture PDF file.")
-    # Configure Gemini
     genai.configure(api_key=gemini_api_key)
-    # Extract & truncate lecture text
     raw = extract_pdf_text(lecture_pdf.name)
     content = truncate_text(raw)
     if not content.strip():
         raise gr.Error("Lecture PDF contained no extractable text.")
-    # Initialize Gemini model
     try:
         gemini_model = genai.GenerativeModel("gemini-1.5-flash-latest")
     except Exception as e:
         raise gr.Error(f"Gemini init failed: {e}")
-    # Generate script
     prompt = PROMPT_TEMPLATE.format(content=content)
     try:
         resp = gemini_model.generate_content(prompt)
         script = resp.text or ""
     except Exception as e:
         raise gr.Error(f"Gemini generation error: {e}")
-    # Prepare temp directory
     with tempfile.TemporaryDirectory() as td:
         tmp = Path(td)
         # Save script file
@@ -151,12 +141,16 @@ def generate_podcast(
 iface = gr.Interface(
     fn=generate_podcast,
     inputs=[
-        gr.Textbox(label="Google Gemini API Key", type="password", placeholder="Paste your key"),
         gr.File(label="Upload Lecture PDF", file_types=[".pdf"]),
     ],
     outputs=[
         gr.Audio(label="English Podcast", type="filepath"),
-        gr.Markdown(label="English Script"),
         gr.File(label="Download English Script (.txt)", type="filepath"),
     ],
     title="Lecture → English Podcast & Script",

 # ------------------------------------------------------------------
 # Globals & templates
 # ------------------------------------------------------------------
 PROMPT_TEMPLATE = textwrap.dedent(
     """
     You are producing a lively two-host educational podcast in English.
     """
 )
 HF_TTS_MODEL = "facebook/mms-tts-eng"
 CHUNK_CHAR_LIMIT = 280
 tts_client = InferenceClient()
 # ------------------------------------------------------------------
 # Helpers
 # ------------------------------------------------------------------
 def extract_pdf_text(pdf_path: str) -> str:
     reader = PdfReader(pdf_path)
     return "\n".join(page.extract_text() or "" for page in reader.pages)
 def truncate_text(text: str, max_words: int = 8000) -> str:
     words = text.split()
     return " ".join(words[:max_words])
 def split_to_chunks(text: str, limit: int = CHUNK_CHAR_LIMIT) -> List[str]:
     sentences = [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()]
     chunks, current = [], ""
     for sent in sentences:
     return chunks
 def synthesize_speech(text: str, model_id: str, out_dir: Path) -> Path:
     chunks = split_to_chunks(text)
     if not chunks:
         raise ValueError("No text to synthesize.")
             segments.append(seg)
         except CouldntDecodeError as e:
             raise RuntimeError(f"Could not decode segment {i+1}: {e}")
+    final_audio = sum(segments, AudioSegment.empty())
     out_path = out_dir / "podcast_audio.flac"
+    final_audio.export(out_path, format="flac")
     return out_path
 # ------------------------------------------------------------------
     gemini_api_key: Optional[str],
     lecture_pdf: Optional[gr.File]
 ) -> List[Optional[Any]]:
     if not gemini_api_key:
         raise gr.Error("Enter your Google AI Studio API Key.")
     if not lecture_pdf:
         raise gr.Error("Upload a lecture PDF file.")
     genai.configure(api_key=gemini_api_key)
     raw = extract_pdf_text(lecture_pdf.name)
     content = truncate_text(raw)
     if not content.strip():
         raise gr.Error("Lecture PDF contained no extractable text.")
     try:
         gemini_model = genai.GenerativeModel("gemini-1.5-flash-latest")
     except Exception as e:
         raise gr.Error(f"Gemini init failed: {e}")
     prompt = PROMPT_TEMPLATE.format(content=content)
     try:
         resp = gemini_model.generate_content(prompt)
         script = resp.text or ""
     except Exception as e:
         raise gr.Error(f"Gemini generation error: {e}")
     with tempfile.TemporaryDirectory() as td:
         tmp = Path(td)
         # Save script file
 iface = gr.Interface(
     fn=generate_podcast,
     inputs=[
+        gr.Textbox(
+            label="Google Gemini API Key",
+            type="password",
+            placeholder="Paste your key here"
+        ),
         gr.File(label="Upload Lecture PDF", file_types=[".pdf"]),
     ],
     outputs=[
         gr.Audio(label="English Podcast", type="filepath"),
+        gr.Markdown(label="English Script"),            # renders the script
         gr.File(label="Download English Script (.txt)", type="filepath"),
     ],
     title="Lecture → English Podcast & Script",