Spaces:

HaiderAUT
/

PodCastIt

Build error

App Files Files Community

PodCastIt / app.py

HaiderAUT

Update app.py

cf56cc8 verified about 1 month ago

raw

history blame

6.21 kB

	# =============================================================
	# Lecture → Podcast & Script Generator (English Only)
	# Two-step: 1) Gemini script 2) HF MMS-TTS audio
	# =============================================================

	import re
	import tempfile
	import textwrap
	from pathlib import Path
	from typing import List, Optional

	import gradio as gr
	from PyPDF2 import PdfReader
	from pydub import AudioSegment
	from pydub.exceptions import CouldntDecodeError

	# Google Gemini SDK
	try:
	import google.generativeai as genai
	except ImportError:
	raise ImportError("Please install the Google Generative AI SDK:\n"
	" pip install google-generativeai")

	# Hugging Face TTS client (anonymous/public)
	from huggingface_hub import InferenceClient

	# ------------------------------------------------------------------
	# Globals & templates
	# ------------------------------------------------------------------
	PROMPT_TEMPLATE = textwrap.dedent(
	"""
	You are producing a lively two-host educational podcast in English.
	Summarize the following lecture content into a dialogue of approximately 300 words.
	Make it engaging: hosts ask questions, clarify ideas with analogies, and wrap up with a concise recap.
	Preserve technical accuracy. Use Markdown for host names (e.g., Host 1:).

	### Lecture Content
	{content}
	"""
	)

	HF_TTS_MODEL = "facebook/mms-tts-eng"
	CHUNK_CHAR_LIMIT = 280

	# Initialize the HF TTS client once
	tts_client = InferenceClient()

	# ------------------------------------------------------------------
	# Helper functions
	# ------------------------------------------------------------------
	def extract_pdf_text(pdf_path: str) -> str:
	reader = PdfReader(pdf_path)
	return "\n".join(page.extract_text() or "" for page in reader.pages)

	def truncate_text(text: str, max_words: int = 8000) -> str:
	words = text.split()
	return " ".join(words[:max_words])

	def split_to_chunks(text: str, limit: int = CHUNK_CHAR_LIMIT) -> List[str]:
	sentences = [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()]
	chunks, current = [], ""
	for sent in sentences:
	if current and len(current) + len(sent) + 1 > limit:
	chunks.append(current)
	current = sent
	else:
	current = f"{current} {sent}".strip() if current else sent
	if current:
	chunks.append(current)
	return chunks

	def synthesize_speech(script: str, model_id: str, out_dir: Path) -> str:
	chunks = split_to_chunks(script)
	if not chunks:
	raise RuntimeError("No text chunks to synthesize.")
	segments = []
	for idx, chunk in enumerate(chunks):
	audio_bytes = tts_client.text_to_speech(chunk, model=model_id)
	part_path = out_dir / f"seg_{idx}.flac"
	part_path.write_bytes(audio_bytes)
	try:
	seg = AudioSegment.from_file(part_path, format="flac")
	segments.append(seg)
	except CouldntDecodeError as e:
	raise RuntimeError(f"Failed to decode chunk {idx}: {e}") from e
	final_audio = sum(segments, AudioSegment.empty())
	final_path = out_dir / "podcast_audio.flac"
	final_audio.export(final_path, format="flac")
	return str(final_path)

	# ------------------------------------------------------------------
	# Step 1: Generate script via Gemini
	# ------------------------------------------------------------------
	def generate_script(
	gemini_api_key: str,
	lecture_pdf: gr.File
	) -> List[str]:
	if not gemini_api_key:
	raise gr.Error("Please enter your Google AI Studio API Key.")
	if not lecture_pdf:
	raise gr.Error("Please upload a lecture PDF.")
	# Configure Gemini
	try:
	genai.configure(api_key=gemini_api_key)
	model = genai.GenerativeModel("gemini-1.5-flash-latest")
	except Exception as e:
	raise gr.Error(f"Gemini init/config error: {e}")

	# Extract and truncate text
	raw_text = extract_pdf_text(lecture_pdf.name)
	content = truncate_text(raw_text)
	if not content.strip():
	raise gr.Error("No extractable text found in the PDF.")

	# Generate dialogue script
	prompt = PROMPT_TEMPLATE.format(content=content)
	try:
	response = model.generate_content(prompt)
	script = response.text or ""
	except Exception as e:
	raise gr.Error(f"Gemini generation error: {e}")

	return [script, script] # [for Markdown display, for state storage]

	# ------------------------------------------------------------------
	# Step 2: Generate audio from provided script
	# ------------------------------------------------------------------
	def generate_audio(
	script: str
	) -> str:
	if not script:
	raise gr.Error("No script available. Please generate the script first.")
	# Create a temp dir for audio parts
	with tempfile.TemporaryDirectory() as td:
	out_dir = Path(td)
	audio_path = synthesize_speech(script, HF_TTS_MODEL, out_dir)
	return audio_path

	# ------------------------------------------------------------------
	# Gradio UI
	# ------------------------------------------------------------------
	with gr.Blocks() as demo:
	# Shared state for the script
	script_state = gr.State()

	with gr.Tab("Generate Script"):
	api_key_input = gr.Textbox(
	label="Google Gemini API Key",
	type="password",
	placeholder="Enter your key"
	)
	pdf_input = gr.File(
	label="Upload Lecture PDF",
	file_types=[".pdf"]
	)
	script_md = gr.Markdown(
	label="Generated Script",

	)
	gen_script_btn = gr.Button("Generate Script")
	gen_script_btn.click(
	fn=generate_script,
	inputs=[api_key_input, pdf_input],
	outputs=[script_md, script_state]
	)

	with gr.Tab("Generate Audio"):
	gen_audio_btn = gr.Button("Generate Audio")
	audio_out = gr.Audio(
	label="Podcast Audio",
	type="filepath"
	)
	gen_audio_btn.click(
	fn=generate_audio,
	inputs=[script_state],
	outputs=[audio_out]
	)

	demo.launch()