# ============================================================= # Lecture → English Podcast Generator # • Script: HF Inference API (Qwen/Qwen2.5-Coder-32B-Instruct) # • Audio: MeloTTS (English) # ============================================================= import io import re import tempfile import textwrap from pathlib import Path from typing import List import gradio as gr from PyPDF2 import PdfReader from huggingface_hub import InferenceClient import torch import nltk nltk.download('averaged_perceptron_tagger_eng') from melo.api import TTS # ──────────────────────────────────────────────────────────────────── # 1) Setup HF client & MeloTTS for English # ──────────────────────────────────────────────────────────────────── hf_client = InferenceClient() # anonymous/public access device = 'cuda' if torch.cuda.is_available() else 'cpu' melo_en = TTS(language='EN', device=device) speaker_ids = melo_en.hps.data.spk2id default_speaker = next(iter(speaker_ids.keys())) # ──────────────────────────────────────────────────────────────────── # 2) Prompt template # ──────────────────────────────────────────────────────────────────── PROMPT = textwrap.dedent(""" You are producing a lively two-host educational podcast in English. Summarize the following lecture content into a dialogue of approximately 300 words. Make it engaging: hosts ask questions, clarify ideas with analogies, and wrap up with a concise recap. Preserve technical accuracy. Use Markdown for host names (e.g., **Host 1:**). ### Lecture Content {content} """) # ──────────────────────────────────────────────────────────────────── # 3) Helpers # ──────────────────────────────────────────────────────────────────── def extract_pdf_text(pdf_path: str) -> str: reader = PdfReader(pdf_path) return "\n".join(page.extract_text() or "" for page in reader.pages) def split_to_chunks(text: str, limit: int = 280) -> List[str]: sents = [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()] chunks, curr = [], "" for sent in sents: if curr and len(curr) + len(sent) + 1 > limit: chunks.append(curr) curr = sent else: curr = f"{curr} {sent}".strip() if curr else sent if curr: chunks.append(curr) return chunks # ──────────────────────────────────────────────────────────────────── # 4) Main generate function # ──────────────────────────────────────────────────────────────────── def generate_podcast(lecture_pdf: gr.File): if not lecture_pdf: raise gr.Error("Please upload a lecture PDF.") # 1️⃣ Extract & prompt raw = extract_pdf_text(lecture_pdf.name) prompt = PROMPT.format(content=raw) # 2️⃣ HF text generation out = hf_client.text_generation( inputs=prompt, model="Qwen/Qwen2.5-Coder-32B-Instruct", parameters={"max_new_tokens": 512, "temperature": 0.5} ) # InferenceClient returns a dict or a str depending on version script = out.get("generated_text") if isinstance(out, dict) else out # 3️⃣ MeloTTS audio tmpdir = Path(tempfile.mkdtemp()) bio = io.BytesIO() progress = gr.Progress() # use the default English speaker melo_en.tts_to_file( script, speaker_ids[default_speaker], bio, speed=1.0, pbar=progress.tqdm, format="wav" ) audio_bytes = bio.getvalue() return script, audio_bytes # ──────────────────────────────────────────────────────────────────── # 5) Gradio UI # ──────────────────────────────────────────────────────────────────── with gr.Blocks() as demo: gr.Markdown("## Lecture → English Podcast") pdf_in = gr.File(label="Upload Lecture PDF", file_types=[".pdf"]) btn = gr.Button("Generate Podcast") script_md = gr.Markdown(label="Podcast Script") audio_out = gr.Audio(label="Podcast Audio", type="bytes") btn.click(fn=generate_podcast, inputs=[pdf_in], outputs=[script_md, audio_out]) demo.launch()