File size: 5,366 Bytes
53744b5 03ef672 53744b5 4c19533 03ef672 fe00684 f1adb14 50d2a40 03ef672 f1adb14 53744b5 cca7e91 369b2d2 03ef672 617d576 03ef672 fe00684 03ef672 617d576 03ef672 617d576 cca7e91 03ef672 cca7e91 03ef672 cca7e91 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
# =============================================================
# Lecture β English Podcast Generator
# β’ Script: HF Inference API (Qwen/Qwen2.5-Coder-32B-Instruct)
# β’ Audio: MeloTTS (English)
# =============================================================
import io
import re
import tempfile
import textwrap
from pathlib import Path
from typing import List
import gradio as gr
from PyPDF2 import PdfReader
from huggingface_hub import InferenceClient
import torch
import nltk
nltk.download('averaged_perceptron_tagger_eng')
from melo.api import TTS
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 1) Setup HF client & MeloTTS for English
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
hf_client = InferenceClient() # anonymous/public access
device = 'cuda' if torch.cuda.is_available() else 'cpu'
melo_en = TTS(language='EN', device=device)
speaker_ids = melo_en.hps.data.spk2id
default_speaker = next(iter(speaker_ids.keys()))
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 2) Prompt template
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
PROMPT = textwrap.dedent("""
You are producing a lively two-host educational podcast in English.
Summarize the following lecture content into a dialogue of approximately 300 words.
Make it engaging: hosts ask questions, clarify ideas with analogies,
and wrap up with a concise recap. Preserve technical accuracy.
Use Markdown for host names (e.g., **Host 1:**).
### Lecture Content
{content}
""")
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 3) Helpers
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def extract_pdf_text(pdf_path: str) -> str:
reader = PdfReader(pdf_path)
return "\n".join(page.extract_text() or "" for page in reader.pages)
def split_to_chunks(text: str, limit: int = 280) -> List[str]:
sents = [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()]
chunks, curr = [], ""
for sent in sents:
if curr and len(curr) + len(sent) + 1 > limit:
chunks.append(curr)
curr = sent
else:
curr = f"{curr} {sent}".strip() if curr else sent
if curr:
chunks.append(curr)
return chunks
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 4) Main generate function
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def generate_podcast(lecture_pdf: gr.File):
if not lecture_pdf:
raise gr.Error("Please upload a lecture PDF.")
# 1οΈβ£ Extract & prompt
raw = extract_pdf_text(lecture_pdf.name)
prompt = PROMPT.format(content=raw)
# 2οΈβ£ HF text generation
out = hf_client.text_generation(
inputs=prompt,
model="Qwen/Qwen2.5-Coder-32B-Instruct",
parameters={"max_new_tokens": 512, "temperature": 0.5}
)
# InferenceClient returns a dict or a str depending on version
script = out.get("generated_text") if isinstance(out, dict) else out
# 3οΈβ£ MeloTTS audio
tmpdir = Path(tempfile.mkdtemp())
bio = io.BytesIO()
progress = gr.Progress()
# use the default English speaker
melo_en.tts_to_file(
script,
speaker_ids[default_speaker],
bio,
speed=1.0,
pbar=progress.tqdm,
format="wav"
)
audio_bytes = bio.getvalue()
return script, audio_bytes
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 5) Gradio UI
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
with gr.Blocks() as demo:
gr.Markdown("## Lecture β English Podcast")
pdf_in = gr.File(label="Upload Lecture PDF", file_types=[".pdf"])
btn = gr.Button("Generate Podcast")
script_md = gr.Markdown(label="Podcast Script")
audio_out = gr.Audio(label="Podcast Audio", type="bytes")
btn.click(fn=generate_podcast, inputs=[pdf_in], outputs=[script_md, audio_out])
demo.launch()
|