Update app.py
Browse files
app.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
# =============================================================
|
2 |
-
# Hugging
|
3 |
# =============================================================
|
4 |
-
#
|
5 |
-
#
|
6 |
-
#
|
7 |
-
#
|
8 |
# -----------------------------------------------------------------
|
9 |
|
10 |
import os
|
@@ -15,35 +15,33 @@ from typing import List, Dict
|
|
15 |
|
16 |
import gradio as gr
|
17 |
from PyPDF2 import PdfReader
|
18 |
-
from
|
19 |
-
from
|
20 |
|
21 |
# ------------------------------------------------------------------
|
22 |
# LLM configuration (SmolAgents wrapper for HF Inference API)
|
23 |
# ------------------------------------------------------------------
|
24 |
llm = HfApiModel(
|
25 |
-
model_id=
|
26 |
max_tokens=2096,
|
27 |
temperature=0.5,
|
28 |
custom_role_conversions=None,
|
29 |
)
|
30 |
|
31 |
# ------------------------------------------------------------------
|
32 |
-
#
|
33 |
-
# model that supports our languages. Switch model id if you prefer.
|
34 |
# ------------------------------------------------------------------
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
)
|
40 |
|
41 |
LANG_INFO: Dict[str, Dict[str, str]] = {
|
42 |
-
"en": {"name": "English"
|
43 |
-
"bn": {"name": "Bangla"
|
44 |
-
"zh": {"name": "Chinese"
|
45 |
-
"ur": {"name": "Urdu"
|
46 |
-
"ne": {"name": "Nepali"
|
47 |
}
|
48 |
|
49 |
PROMPT_TEMPLATE = textwrap.dedent(
|
@@ -59,59 +57,66 @@ PROMPT_TEMPLATE = textwrap.dedent(
|
|
59 |
)
|
60 |
|
61 |
# ------------------------------------------------------------------
|
62 |
-
# Utility: extract & truncate PDF text to fit LLM token budget
|
63 |
# ------------------------------------------------------------------
|
64 |
|
65 |
def extract_pdf_text(pdf_file) -> str:
|
66 |
reader = PdfReader(pdf_file)
|
67 |
-
|
68 |
-
return raw
|
69 |
-
|
70 |
-
TOKEN_LIMIT = 6000 # conservative words (≈ tokens) for prompt+response
|
71 |
|
|
|
72 |
|
73 |
def truncate_text(text: str, limit: int = TOKEN_LIMIT) -> str:
|
74 |
words = text.split()
|
75 |
return " ".join(words[:limit])
|
76 |
|
77 |
# ------------------------------------------------------------------
|
78 |
-
# Main generation
|
79 |
# ------------------------------------------------------------------
|
80 |
|
81 |
def generate_podcast(pdf: gr.File) -> List[gr.Audio]:
|
82 |
with tempfile.TemporaryDirectory() as tmpdir:
|
83 |
lecture_text = truncate_text(extract_pdf_text(pdf.name))
|
84 |
audio_outputs = []
|
|
|
85 |
for lang_code, info in LANG_INFO.items():
|
|
|
86 |
prompt = PROMPT_TEMPLATE.format(lang_name=info["name"], content=lecture_text)
|
87 |
-
# --- Generate dialogue ---
|
88 |
dialogue = llm(prompt)
|
89 |
|
90 |
-
# Save text for
|
91 |
-
|
92 |
-
with open(
|
93 |
f.write(dialogue)
|
94 |
|
95 |
-
#
|
96 |
-
audio = audio_pipe(dialogue, forward_params={"language": lang_code})
|
97 |
wav_path = os.path.join(tmpdir, f"podcast_{lang_code}.wav")
|
98 |
-
|
99 |
-
|
|
|
|
|
100 |
|
101 |
return audio_outputs
|
102 |
|
103 |
# ------------------------------------------------------------------
|
104 |
-
# Gradio
|
105 |
# ------------------------------------------------------------------
|
106 |
|
107 |
-
audio_components = [
|
|
|
|
|
108 |
|
109 |
iface = gr.Interface(
|
110 |
fn=generate_podcast,
|
111 |
inputs=gr.File(label="Upload Lecture PDF", file_types=[".pdf"]),
|
112 |
outputs=audio_components,
|
113 |
title="Lecture → Multilingual Podcast Generator",
|
114 |
-
description=
|
|
|
|
|
|
|
|
|
|
|
115 |
)
|
116 |
|
117 |
if __name__ == "__main__":
|
|
|
1 |
# =============================================================
|
2 |
+
# Hugging Face Space – Lecture → Multilingual Podcast Generator
|
3 |
# =============================================================
|
4 |
+
# * Text generation: SmolAgents HfApiModel (Qwen/Qwen2.5‑Coder‑32B)
|
5 |
+
# * Speech synthesis: **Coqui XTTS‑v2** open model via the TTS lib
|
6 |
+
# (no private / gated repo, so it runs without a HF token).
|
7 |
+
# * Outputs five WAV files: English, Bangla, Chinese, Urdu, Nepali.
|
8 |
# -----------------------------------------------------------------
|
9 |
|
10 |
import os
|
|
|
15 |
|
16 |
import gradio as gr
|
17 |
from PyPDF2 import PdfReader
|
18 |
+
from smolagents import HfApiModel
|
19 |
+
from TTS.api import TTS # ↳ Coqui TTS
|
20 |
|
21 |
# ------------------------------------------------------------------
|
22 |
# LLM configuration (SmolAgents wrapper for HF Inference API)
|
23 |
# ------------------------------------------------------------------
|
24 |
llm = HfApiModel(
|
25 |
+
model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
|
26 |
max_tokens=2096,
|
27 |
temperature=0.5,
|
28 |
custom_role_conversions=None,
|
29 |
)
|
30 |
|
31 |
# ------------------------------------------------------------------
|
32 |
+
# XTTS‑v2 multilingual text‑to‑speech (≈ 1.2 GB, CPU OK)
|
|
|
33 |
# ------------------------------------------------------------------
|
34 |
+
TTS_MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
|
35 |
+
|
36 |
+
tts = TTS(model_name=TTS_MODEL_NAME, progress_bar=False)
|
37 |
+
# Automatically downloads and caches the model on first run.
|
|
|
38 |
|
39 |
LANG_INFO: Dict[str, Dict[str, str]] = {
|
40 |
+
"en": {"name": "English"},
|
41 |
+
"bn": {"name": "Bangla"},
|
42 |
+
"zh": {"name": "Chinese"},
|
43 |
+
"ur": {"name": "Urdu"},
|
44 |
+
"ne": {"name": "Nepali"},
|
45 |
}
|
46 |
|
47 |
PROMPT_TEMPLATE = textwrap.dedent(
|
|
|
57 |
)
|
58 |
|
59 |
# ------------------------------------------------------------------
|
60 |
+
# Utility: extract & truncate PDF text to fit the LLM token budget
|
61 |
# ------------------------------------------------------------------
|
62 |
|
63 |
def extract_pdf_text(pdf_file) -> str:
|
64 |
reader = PdfReader(pdf_file)
|
65 |
+
return "\n".join(p.extract_text() or "" for p in reader.pages)
|
|
|
|
|
|
|
66 |
|
67 |
+
TOKEN_LIMIT = 6000 # ≈ tokens (safe margin for prompt + response)
|
68 |
|
69 |
def truncate_text(text: str, limit: int = TOKEN_LIMIT) -> str:
|
70 |
words = text.split()
|
71 |
return " ".join(words[:limit])
|
72 |
|
73 |
# ------------------------------------------------------------------
|
74 |
+
# Main generation routine
|
75 |
# ------------------------------------------------------------------
|
76 |
|
77 |
def generate_podcast(pdf: gr.File) -> List[gr.Audio]:
|
78 |
with tempfile.TemporaryDirectory() as tmpdir:
|
79 |
lecture_text = truncate_text(extract_pdf_text(pdf.name))
|
80 |
audio_outputs = []
|
81 |
+
|
82 |
for lang_code, info in LANG_INFO.items():
|
83 |
+
# 1️⃣ Create prompt + generate dialogue
|
84 |
prompt = PROMPT_TEMPLATE.format(lang_name=info["name"], content=lecture_text)
|
|
|
85 |
dialogue = llm(prompt)
|
86 |
|
87 |
+
# 2️⃣ Save raw dialogue text (for reference)
|
88 |
+
txt_path = os.path.join(tmpdir, f"podcast_{lang_code}.txt")
|
89 |
+
with open(txt_path, "w", encoding="utf-8") as f:
|
90 |
f.write(dialogue)
|
91 |
|
92 |
+
# 3️⃣ Synthesise speech with XTTS‑v2
|
|
|
93 |
wav_path = os.path.join(tmpdir, f"podcast_{lang_code}.wav")
|
94 |
+
# ► xtts_v2 accepts ISO‑639‑1 language codes directly
|
95 |
+
tts.tts_to_file(text=dialogue, language=lang_code, file_path=wav_path)
|
96 |
+
|
97 |
+
audio_outputs.append((wav_path, None)) # (file, label) for Gradio Audio
|
98 |
|
99 |
return audio_outputs
|
100 |
|
101 |
# ------------------------------------------------------------------
|
102 |
+
# Gradio UI
|
103 |
# ------------------------------------------------------------------
|
104 |
|
105 |
+
audio_components = [
|
106 |
+
gr.Audio(label=f"{info['name']} Podcast", type="filepath") for info in LANG_INFO.values()
|
107 |
+
]
|
108 |
|
109 |
iface = gr.Interface(
|
110 |
fn=generate_podcast,
|
111 |
inputs=gr.File(label="Upload Lecture PDF", file_types=[".pdf"]),
|
112 |
outputs=audio_components,
|
113 |
title="Lecture → Multilingual Podcast Generator",
|
114 |
+
description=(
|
115 |
+
"Upload a lecture PDF and receive a two‑host audio podcast in English, "
|
116 |
+
"Bangla, Chinese, Urdu, and Nepali. Generation uses Qwen‑32B for the "
|
117 |
+
"dialogue and Coqui XTTS‑v2 for speech synthesis — no private repos "
|
118 |
+
"or API keys needed."
|
119 |
+
),
|
120 |
)
|
121 |
|
122 |
if __name__ == "__main__":
|