Spaces:

HaiderAUT
/

PodCastIt

Build error

App Files Files Community

PodCastIt / app.py

HaiderAUT

Update app.py

f1adb14 verified about 1 month ago

raw

history blame

4.57 kB

	# =============================================================
	# Hugging Face Space – Lecture → Multilingual Podcast Generator
	# =============================================================
	# Uses SmolAgents HfApiModel for text generation and HF audio
	# pipeline for speech. Generates two‑host dialogues in five
	# languages (English, Bangla, Chinese, Urdu, Nepali) directly
	# from a PDF lecture upload.
	# -----------------------------------------------------------------

	import os
	import tempfile
	import uuid
	import textwrap
	from typing import List, Dict

	import gradio as gr
	from PyPDF2 import PdfReader
	from transformers import pipeline # for audio generation (e.g., xtts)
	from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool

	# ------------------------------------------------------------------
	# LLM configuration (SmolAgents wrapper for HF Inference API)
	# ------------------------------------------------------------------
	llm = HfApiModel(
	model_id='Qwen/Qwen2.5-Coder-32B-Instruct', # 34B parameter instruct model
	max_tokens=2096,
	temperature=0.5,
	custom_role_conversions=None,
	)

	# ------------------------------------------------------------------
	# Audio model (multilingual text ➜ speech); choose an open xtts‑v2
	# model that supports our languages. Switch model id if you prefer.
	# ------------------------------------------------------------------
	audio_pipe = pipeline(
	"text-to-audio",
	model="suno/xtts_v2",
	framework="pt",
	)

	LANG_INFO: Dict[str, Dict[str, str]] = {
	"en": {"name": "English", "speaker": "hostA"},
	"bn": {"name": "Bangla", "speaker": "hostB"},
	"zh": {"name": "Chinese", "speaker": "hostC"},
	"ur": {"name": "Urdu", "speaker": "hostD"},
	"ne": {"name": "Nepali", "speaker": "hostE"},
	}

	PROMPT_TEMPLATE = textwrap.dedent(
	"""
	You are producing a lively two‑host educational podcast in {lang_name}.
	Summarize the following lecture content into a dialogue of about 1200 words.
	Use an engaging style: hosts ask each other questions, clarify ideas, add
	simple analogies, and conclude with a short recap. Keep technical accuracy.

	### Lecture Content
	{content}
	"""
	)

	# ------------------------------------------------------------------
	# Utility: extract & truncate PDF text to fit LLM token budget
	# ------------------------------------------------------------------

	def extract_pdf_text(pdf_file) -> str:
	reader = PdfReader(pdf_file)
	raw = "\n".join(p.extract_text() or "" for p in reader.pages)
	return raw

	TOKEN_LIMIT = 6000 # conservative words (≈ tokens) for prompt+response


	def truncate_text(text: str, limit: int = TOKEN_LIMIT) -> str:
	words = text.split()
	return " ".join(words[:limit])

	# ------------------------------------------------------------------
	# Main generation function
	# ------------------------------------------------------------------

	def generate_podcast(pdf: gr.File) -> List[gr.Audio]:
	with tempfile.TemporaryDirectory() as tmpdir:
	lecture_text = truncate_text(extract_pdf_text(pdf.name))
	audio_outputs = []
	for lang_code, info in LANG_INFO.items():
	prompt = PROMPT_TEMPLATE.format(lang_name=info["name"], content=lecture_text)
	# --- Generate dialogue ---
	dialogue = llm(prompt)

	# Save text for transparency/debug
	text_path = os.path.join(tmpdir, f"podcast_{lang_code}.txt")
	with open(text_path, "w", encoding="utf-8") as f:
	f.write(dialogue)

	# --- TTS ---
	audio = audio_pipe(dialogue, forward_params={"language": lang_code})
	wav_path = os.path.join(tmpdir, f"podcast_{lang_code}.wav")
	audio["audio"].export(wav_path, format="wav")
	audio_outputs.append((wav_path, None)) # Gradio Audio expects (file, label)

	return audio_outputs

	# ------------------------------------------------------------------
	# Gradio Interface
	# ------------------------------------------------------------------

	audio_components = [gr.Audio(label=f"{info['name']} Podcast", type="filepath") for info in LANG_INFO.values()]

	iface = gr.Interface(
	fn=generate_podcast,
	inputs=gr.File(label="Upload Lecture PDF", file_types=[".pdf"]),
	outputs=audio_components,
	title="Lecture → Multilingual Podcast Generator",
	description="Upload a lecture PDF and get a two‑host audio podcast in English, Bangla, Chinese, Urdu, and Nepali."
	)

	if __name__ == "__main__":
	iface.launch()