PodCastIt / app.py
HaiderAUT's picture
Update app.py
03ef672 verified
# =============================================================
# Lecture β†’ English Podcast Generator
# β€’ Script: HF Inference API (Qwen/Qwen2.5-Coder-32B-Instruct)
# β€’ Audio: MeloTTS (English)
# =============================================================
import io
import re
import tempfile
import textwrap
from pathlib import Path
from typing import List
import gradio as gr
from PyPDF2 import PdfReader
from huggingface_hub import InferenceClient
import torch
import nltk
nltk.download('averaged_perceptron_tagger_eng')
from melo.api import TTS
# ────────────────────────────────────────────────────────────────────
# 1) Setup HF client & MeloTTS for English
# ────────────────────────────────────────────────────────────────────
hf_client = InferenceClient() # anonymous/public access
device = 'cuda' if torch.cuda.is_available() else 'cpu'
melo_en = TTS(language='EN', device=device)
speaker_ids = melo_en.hps.data.spk2id
default_speaker = next(iter(speaker_ids.keys()))
# ────────────────────────────────────────────────────────────────────
# 2) Prompt template
# ────────────────────────────────────────────────────────────────────
PROMPT = textwrap.dedent("""
You are producing a lively two-host educational podcast in English.
Summarize the following lecture content into a dialogue of approximately 300 words.
Make it engaging: hosts ask questions, clarify ideas with analogies,
and wrap up with a concise recap. Preserve technical accuracy.
Use Markdown for host names (e.g., **Host 1:**).
### Lecture Content
{content}
""")
# ────────────────────────────────────────────────────────────────────
# 3) Helpers
# ────────────────────────────────────────────────────────────────────
def extract_pdf_text(pdf_path: str) -> str:
reader = PdfReader(pdf_path)
return "\n".join(page.extract_text() or "" for page in reader.pages)
def split_to_chunks(text: str, limit: int = 280) -> List[str]:
sents = [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()]
chunks, curr = [], ""
for sent in sents:
if curr and len(curr) + len(sent) + 1 > limit:
chunks.append(curr)
curr = sent
else:
curr = f"{curr} {sent}".strip() if curr else sent
if curr:
chunks.append(curr)
return chunks
# ────────────────────────────────────────────────────────────────────
# 4) Main generate function
# ────────────────────────────────────────────────────────────────────
def generate_podcast(lecture_pdf: gr.File):
if not lecture_pdf:
raise gr.Error("Please upload a lecture PDF.")
# 1️⃣ Extract & prompt
raw = extract_pdf_text(lecture_pdf.name)
prompt = PROMPT.format(content=raw)
# 2️⃣ HF text generation
out = hf_client.text_generation(
inputs=prompt,
model="Qwen/Qwen2.5-Coder-32B-Instruct",
parameters={"max_new_tokens": 512, "temperature": 0.5}
)
# InferenceClient returns a dict or a str depending on version
script = out.get("generated_text") if isinstance(out, dict) else out
# 3️⃣ MeloTTS audio
tmpdir = Path(tempfile.mkdtemp())
bio = io.BytesIO()
progress = gr.Progress()
# use the default English speaker
melo_en.tts_to_file(
script,
speaker_ids[default_speaker],
bio,
speed=1.0,
pbar=progress.tqdm,
format="wav"
)
audio_bytes = bio.getvalue()
return script, audio_bytes
# ────────────────────────────────────────────────────────────────────
# 5) Gradio UI
# ────────────────────────────────────────────────────────────────────
with gr.Blocks() as demo:
gr.Markdown("## Lecture β†’ English Podcast")
pdf_in = gr.File(label="Upload Lecture PDF", file_types=[".pdf"])
btn = gr.Button("Generate Podcast")
script_md = gr.Markdown(label="Podcast Script")
audio_out = gr.Audio(label="Podcast Audio", type="bytes")
btn.click(fn=generate_podcast, inputs=[pdf_in], outputs=[script_md, audio_out])
demo.launch()