Update app.py
Browse files
app.py
CHANGED
@@ -28,7 +28,6 @@ except ImportError:
|
|
28 |
# ------------------------------------------------------------------
|
29 |
# Globals & templates
|
30 |
# ------------------------------------------------------------------
|
31 |
-
# Gemini prompt for ~300-word two-host dialogue in English
|
32 |
PROMPT_TEMPLATE = textwrap.dedent(
|
33 |
"""
|
34 |
You are producing a lively two-host educational podcast in English.
|
@@ -41,29 +40,23 @@ PROMPT_TEMPLATE = textwrap.dedent(
|
|
41 |
"""
|
42 |
)
|
43 |
|
44 |
-
# TTS model ID for English MMS-TTS
|
45 |
HF_TTS_MODEL = "facebook/mms-tts-eng"
|
46 |
-
# Safe chunk size for HF text-to-speech
|
47 |
CHUNK_CHAR_LIMIT = 280
|
48 |
|
49 |
-
# Initialize HF TTS client (no token required for public models)
|
50 |
tts_client = InferenceClient()
|
51 |
|
52 |
# ------------------------------------------------------------------
|
53 |
# Helpers
|
54 |
# ------------------------------------------------------------------
|
55 |
def extract_pdf_text(pdf_path: str) -> str:
|
56 |
-
"""Extracts all text from a PDF file."""
|
57 |
reader = PdfReader(pdf_path)
|
58 |
return "\n".join(page.extract_text() or "" for page in reader.pages)
|
59 |
|
60 |
def truncate_text(text: str, max_words: int = 8000) -> str:
|
61 |
-
"""Truncate to max_words to fit LLM context."""
|
62 |
words = text.split()
|
63 |
return " ".join(words[:max_words])
|
64 |
|
65 |
def split_to_chunks(text: str, limit: int = CHUNK_CHAR_LIMIT) -> List[str]:
|
66 |
-
"""Split text into ≤limit-char chunks at sentence boundaries."""
|
67 |
sentences = [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()]
|
68 |
chunks, current = [], ""
|
69 |
for sent in sentences:
|
@@ -77,7 +70,6 @@ def split_to_chunks(text: str, limit: int = CHUNK_CHAR_LIMIT) -> List[str]:
|
|
77 |
return chunks
|
78 |
|
79 |
def synthesize_speech(text: str, model_id: str, out_dir: Path) -> Path:
|
80 |
-
"""Chunk-safe TTS via HF Inference API, concatenating FLAC segments."""
|
81 |
chunks = split_to_chunks(text)
|
82 |
if not chunks:
|
83 |
raise ValueError("No text to synthesize.")
|
@@ -94,10 +86,9 @@ def synthesize_speech(text: str, model_id: str, out_dir: Path) -> Path:
|
|
94 |
segments.append(seg)
|
95 |
except CouldntDecodeError as e:
|
96 |
raise RuntimeError(f"Could not decode segment {i+1}: {e}")
|
97 |
-
|
98 |
-
final = sum(segments, AudioSegment.empty())
|
99 |
out_path = out_dir / "podcast_audio.flac"
|
100 |
-
|
101 |
return out_path
|
102 |
|
103 |
# ------------------------------------------------------------------
|
@@ -107,31 +98,30 @@ def generate_podcast(
|
|
107 |
gemini_api_key: Optional[str],
|
108 |
lecture_pdf: Optional[gr.File]
|
109 |
) -> List[Optional[Any]]:
|
110 |
-
# Validate inputs
|
111 |
if not gemini_api_key:
|
112 |
raise gr.Error("Enter your Google AI Studio API Key.")
|
113 |
if not lecture_pdf:
|
114 |
raise gr.Error("Upload a lecture PDF file.")
|
115 |
-
|
116 |
genai.configure(api_key=gemini_api_key)
|
117 |
-
|
118 |
raw = extract_pdf_text(lecture_pdf.name)
|
119 |
content = truncate_text(raw)
|
120 |
if not content.strip():
|
121 |
raise gr.Error("Lecture PDF contained no extractable text.")
|
122 |
-
|
123 |
try:
|
124 |
gemini_model = genai.GenerativeModel("gemini-1.5-flash-latest")
|
125 |
except Exception as e:
|
126 |
raise gr.Error(f"Gemini init failed: {e}")
|
127 |
-
|
128 |
prompt = PROMPT_TEMPLATE.format(content=content)
|
129 |
try:
|
130 |
resp = gemini_model.generate_content(prompt)
|
131 |
script = resp.text or ""
|
132 |
except Exception as e:
|
133 |
raise gr.Error(f"Gemini generation error: {e}")
|
134 |
-
|
135 |
with tempfile.TemporaryDirectory() as td:
|
136 |
tmp = Path(td)
|
137 |
# Save script file
|
@@ -151,12 +141,16 @@ def generate_podcast(
|
|
151 |
iface = gr.Interface(
|
152 |
fn=generate_podcast,
|
153 |
inputs=[
|
154 |
-
gr.Textbox(
|
|
|
|
|
|
|
|
|
155 |
gr.File(label="Upload Lecture PDF", file_types=[".pdf"]),
|
156 |
],
|
157 |
outputs=[
|
158 |
gr.Audio(label="English Podcast", type="filepath"),
|
159 |
-
gr.Markdown(label="English Script"),
|
160 |
gr.File(label="Download English Script (.txt)", type="filepath"),
|
161 |
],
|
162 |
title="Lecture → English Podcast & Script",
|
|
|
28 |
# ------------------------------------------------------------------
|
29 |
# Globals & templates
|
30 |
# ------------------------------------------------------------------
|
|
|
31 |
PROMPT_TEMPLATE = textwrap.dedent(
|
32 |
"""
|
33 |
You are producing a lively two-host educational podcast in English.
|
|
|
40 |
"""
|
41 |
)
|
42 |
|
|
|
43 |
HF_TTS_MODEL = "facebook/mms-tts-eng"
|
|
|
44 |
CHUNK_CHAR_LIMIT = 280
|
45 |
|
|
|
46 |
tts_client = InferenceClient()
|
47 |
|
48 |
# ------------------------------------------------------------------
|
49 |
# Helpers
|
50 |
# ------------------------------------------------------------------
|
51 |
def extract_pdf_text(pdf_path: str) -> str:
|
|
|
52 |
reader = PdfReader(pdf_path)
|
53 |
return "\n".join(page.extract_text() or "" for page in reader.pages)
|
54 |
|
55 |
def truncate_text(text: str, max_words: int = 8000) -> str:
|
|
|
56 |
words = text.split()
|
57 |
return " ".join(words[:max_words])
|
58 |
|
59 |
def split_to_chunks(text: str, limit: int = CHUNK_CHAR_LIMIT) -> List[str]:
|
|
|
60 |
sentences = [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()]
|
61 |
chunks, current = [], ""
|
62 |
for sent in sentences:
|
|
|
70 |
return chunks
|
71 |
|
72 |
def synthesize_speech(text: str, model_id: str, out_dir: Path) -> Path:
|
|
|
73 |
chunks = split_to_chunks(text)
|
74 |
if not chunks:
|
75 |
raise ValueError("No text to synthesize.")
|
|
|
86 |
segments.append(seg)
|
87 |
except CouldntDecodeError as e:
|
88 |
raise RuntimeError(f"Could not decode segment {i+1}: {e}")
|
89 |
+
final_audio = sum(segments, AudioSegment.empty())
|
|
|
90 |
out_path = out_dir / "podcast_audio.flac"
|
91 |
+
final_audio.export(out_path, format="flac")
|
92 |
return out_path
|
93 |
|
94 |
# ------------------------------------------------------------------
|
|
|
98 |
gemini_api_key: Optional[str],
|
99 |
lecture_pdf: Optional[gr.File]
|
100 |
) -> List[Optional[Any]]:
|
|
|
101 |
if not gemini_api_key:
|
102 |
raise gr.Error("Enter your Google AI Studio API Key.")
|
103 |
if not lecture_pdf:
|
104 |
raise gr.Error("Upload a lecture PDF file.")
|
105 |
+
|
106 |
genai.configure(api_key=gemini_api_key)
|
107 |
+
|
108 |
raw = extract_pdf_text(lecture_pdf.name)
|
109 |
content = truncate_text(raw)
|
110 |
if not content.strip():
|
111 |
raise gr.Error("Lecture PDF contained no extractable text.")
|
112 |
+
|
113 |
try:
|
114 |
gemini_model = genai.GenerativeModel("gemini-1.5-flash-latest")
|
115 |
except Exception as e:
|
116 |
raise gr.Error(f"Gemini init failed: {e}")
|
117 |
+
|
118 |
prompt = PROMPT_TEMPLATE.format(content=content)
|
119 |
try:
|
120 |
resp = gemini_model.generate_content(prompt)
|
121 |
script = resp.text or ""
|
122 |
except Exception as e:
|
123 |
raise gr.Error(f"Gemini generation error: {e}")
|
124 |
+
|
125 |
with tempfile.TemporaryDirectory() as td:
|
126 |
tmp = Path(td)
|
127 |
# Save script file
|
|
|
141 |
iface = gr.Interface(
|
142 |
fn=generate_podcast,
|
143 |
inputs=[
|
144 |
+
gr.Textbox(
|
145 |
+
label="Google Gemini API Key",
|
146 |
+
type="password",
|
147 |
+
placeholder="Paste your key here"
|
148 |
+
),
|
149 |
gr.File(label="Upload Lecture PDF", file_types=[".pdf"]),
|
150 |
],
|
151 |
outputs=[
|
152 |
gr.Audio(label="English Podcast", type="filepath"),
|
153 |
+
gr.Markdown(label="English Script"), # renders the script
|
154 |
gr.File(label="Download English Script (.txt)", type="filepath"),
|
155 |
],
|
156 |
title="Lecture → English Podcast & Script",
|