HaiderAUT commited on
Commit
53744b5
·
verified ·
1 Parent(s): f036ad8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +274 -102
app.py CHANGED
@@ -1,61 +1,62 @@
 
 
 
 
 
 
 
1
  import os
2
  import re
3
  import tempfile
4
  import textwrap
5
  from pathlib import Path
6
- from typing import List, Dict, Optional
7
 
8
  import gradio as gr
9
- from huggingface_hub import InferenceClient
10
- from PyPDF2 import PdfReader # For PDF processing
11
- from smolagents import HfApiModel # For LLM interaction
12
  from pydub import AudioSegment
13
  from pydub.exceptions import CouldntDecodeError
14
 
15
- # ------------------------------------------------------------------
16
- # LLM setup – remote Qwen model via SmolAgents
17
- # ------------------------------------------------------------------
18
- llm = HfApiModel(
19
- model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
20
- max_tokens=2048,
21
- temperature=0.5,
22
- )
23
-
24
- # ------------------------------------------------------------------
25
- # Hugging Face Inference API client
26
- # ------------------------------------------------------------------
27
- client = InferenceClient(token=os.getenv("HF_TOKEN", None))
28
 
29
  # ------------------------------------------------------------------
30
- # Language metadata and open TTS models
 
 
31
  # ------------------------------------------------------------------
32
  LANG_INFO: Dict[str, Dict[str, str]] = {
33
- "en": {"name": "English", "tts_model": "facebook/mms-tts-eng"},
34
- "bn": {"name": "Bangla", "tts_model": "facebook/mms-tts-ben"},
35
- "zh": {"name": "Chinese", "tts_model": "facebook/mms-tts-zho"},
36
- "ur": {"name": "Urdu", "tts_model": "facebook/mms-tts-urd"},
37
- "ne": {"name": "Nepali", "tts_model": "facebook/mms-tts-npi"},
38
  }
39
  LANG_CODE_BY_NAME = {info["name"]: code for code, info in LANG_INFO.items()}
40
 
 
 
 
41
  PROMPT_TEMPLATE = textwrap.dedent(
42
  """
43
  You are producing a lively two-host educational podcast in {lang_name}.
44
- Summarize the following lecture content into a dialogue of ~300 words.
45
  Make it engaging: hosts ask questions, clarify ideas with analogies, and
46
- wrap up with a concise recap. Preserve technical accuracy.
47
 
48
  ### Lecture Content
49
  {content}
50
  """
51
  )
52
 
53
- TOKEN_LIMIT = 8000
54
- CHUNK_CHAR_LIMIT = 280
55
-
56
- # ------------------------------------------------------------------
57
- # PDF text extraction
58
- # ------------------------------------------------------------------
59
  def extract_pdf_text(pdf_path: str) -> str:
60
  try:
61
  reader = PdfReader(pdf_path)
@@ -63,114 +64,285 @@ def extract_pdf_text(pdf_path: str) -> str:
63
  except Exception as e:
64
  raise gr.Error(f"Failed to process PDF: {e}")
65
 
66
- # ------------------------------------------------------------------
67
- # Helpers
68
- # ------------------------------------------------------------------
69
  def truncate_text(text: str, limit: int = TOKEN_LIMIT) -> str:
70
  words = text.split()
71
  if len(words) > limit:
 
72
  return " ".join(words[:limit])
73
  return text
74
 
 
 
 
 
 
75
 
76
  def _split_to_chunks(text: str, limit: int = CHUNK_CHAR_LIMIT) -> List[str]:
77
- sentences = [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()]
78
- chunks, current = [], ""
 
 
79
  for sent in sentences:
80
- if current and len(current) + len(sent) + 1 > limit:
81
- chunks.append(current)
82
- current = sent
83
  else:
84
- current = f"{current} {sent}".strip()
85
- if current:
86
- chunks.append(current)
87
- return chunks
88
 
89
 
90
- def synthesize_speech(text: str, model_id: str, tempdir: Path) -> Path:
 
 
 
 
 
 
91
  chunks = _split_to_chunks(text)
92
  if not chunks:
93
- raise ValueError("No text chunks to synthesize.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
- segments = []
96
- for i, chunk in enumerate(chunks):
97
  try:
98
- audio_bytes = client.text_to_speech(chunk, model=model_id)
99
- except HubHTTPError as e:
100
- raise RuntimeError(f"TTS error on chunk {i}: {e}")
101
- part = tempdir / f"seg_{i}.flac"
102
- part.write_bytes(audio_bytes)
 
 
 
 
 
103
  try:
104
- seg = AudioSegment.from_file(part, format="flac")
 
105
  except CouldntDecodeError as e:
106
- raise RuntimeError(f"Decode error on chunk {i}: {e}")
107
- segments.append(seg)
 
 
108
 
109
- combined = sum(segments, AudioSegment.empty())
110
- outpath = tempdir / "podcast.flac"
111
- combined.export(outpath, format="flac")
112
- return outpath
113
 
114
  # ------------------------------------------------------------------
115
- # Main pipeline
116
  # ------------------------------------------------------------------
117
 
118
- def generate_podcast(pdf_file: Optional[gr.File], languages: List[str]):
119
- if not pdf_file:
 
 
 
 
 
 
 
120
  raise gr.Error("Please upload a PDF file.")
121
- if not languages:
122
- raise gr.Error("Select at least one language.")
123
-
124
- # Extract and truncate
125
- text = extract_pdf_text(pdf_file.name)
126
- if not text.strip():
127
- raise gr.Error("No text found in PDF.")
128
- lecture = truncate_text(text)
129
-
130
- transcripts, audios = [], []
131
- with tempfile.TemporaryDirectory() as td:
132
- base = Path(td)
133
- for name in languages:
134
- code = LANG_CODE_BY_NAME[name]
135
- # 1️⃣ Dialogue
136
- prompt = PROMPT_TEMPLATE.format(lang_name=name, content=lecture)
137
- dialogue = llm(prompt).strip()
138
- transcripts.append(dialogue)
139
- # 2️⃣ Speech
140
- tempdir = base / code
141
- tempdir.mkdir(parents=True, exist_ok=True)
142
- audio_path = synthesize_speech(dialogue, LANG_INFO[code]["tts_model"], tempdir)
143
- audios.append(str(audio_path))
144
-
145
- # Return alternating transcript and audio path
146
- results: List = []
147
- for t, a in zip(transcripts, audios):
148
- results.extend([t, a])
149
- return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
  # ------------------------------------------------------------------
152
- # Gradio UI
153
  # ------------------------------------------------------------------
154
- languages = [info["name"] for info in LANG_INFO.values()]
155
 
156
  inputs = [
157
- gr.File(label="Lecture PDF", file_types=[".pdf"]),
158
- gr.CheckboxGroup(languages, value=["English"], label="Languages"),
 
 
 
 
 
 
 
 
 
159
  ]
160
 
161
- # Two outputs per language: transcript and audio
162
  outputs = []
163
- for name in languages:
164
- outputs.append(gr.Textbox(label=f"{name} Transcript", interactive=False))
165
- outputs.append(gr.Audio(label=f"{name} Podcast", type="filepath"))
 
 
 
166
 
167
  iface = gr.Interface(
168
  fn=generate_podcast,
169
  inputs=inputs,
170
  outputs=outputs,
171
- title="Lecture → Podcast Generator",
172
- description="Upload a lecture PDF, select languages, get dialogue transcript and audio podcast."
 
 
 
 
 
 
 
 
 
 
173
  )
174
 
175
  if __name__ == "__main__":
176
- iface.launch()
 
 
 
 
 
1
+ # =============================================================
2
+ # Hugging Face Space – Lecture → Podcast Generator (Google Gemini & TTS)
3
+ # =============================================================
4
+ # • **Text generation** – Google Gemini API
5
+ # • **Speech synthesis** – Google Cloud Text-to-Speech API
6
+ # -----------------------------------------------------------------
7
+
8
  import os
9
  import re
10
  import tempfile
11
  import textwrap
12
  from pathlib import Path
13
+ from typing import List, Dict, Optional, Any
14
 
15
  import gradio as gr
16
+ from PyPDF2 import PdfReader
 
 
17
  from pydub import AudioSegment
18
  from pydub.exceptions import CouldntDecodeError
19
 
20
+ # Import Google Cloud libraries
21
+ try:
22
+ import google.generativeai as genai
23
+ from google.cloud import texttospeech
24
+ except ImportError:
25
+ raise ImportError(
26
+ "Please install required Google libraries: "
27
+ "pip install google-generativeai google-cloud-texttospeech"
28
+ )
 
 
 
 
29
 
30
  # ------------------------------------------------------------------
31
+ # Language metadata for Google TTS (BCP-47 codes)
32
+ # You might want to specify particular voices too (e.g., "en-US-Wavenet-D")
33
+ # For simplicity, we'll let Google pick a standard voice for the language code.
34
  # ------------------------------------------------------------------
35
  LANG_INFO: Dict[str, Dict[str, str]] = {
36
+ "en": {"name": "English", "tts_lang_code": "en-US"},
37
+ "bn": {"name": "Bangla", "tts_lang_code": "bn-IN"},
38
+ "zh": {"name": "Chinese (Mandarin)", "tts_lang_code": "cmn-CN"}, # cmn for Mandarin
39
+ "ur": {"name": "Urdu", "tts_lang_code": "ur-PK"},
40
+ "ne": {"name": "Nepali", "tts_lang_code": "ne-NP"},
41
  }
42
  LANG_CODE_BY_NAME = {info["name"]: code for code, info in LANG_INFO.items()}
43
 
44
+ # ------------------------------------------------------------------
45
+ # Prompt template (adjust if needed for Gemini's style)
46
+ # ------------------------------------------------------------------
47
  PROMPT_TEMPLATE = textwrap.dedent(
48
  """
49
  You are producing a lively two-host educational podcast in {lang_name}.
50
+ Summarize the following lecture content into a dialogue of **approximately 300 words**.
51
  Make it engaging: hosts ask questions, clarify ideas with analogies, and
52
+ wrap up with a concise recap. Preserve technical accuracy. Use Markdown for host names (e.g., **Host 1:**).
53
 
54
  ### Lecture Content
55
  {content}
56
  """
57
  )
58
 
59
+ # PDF helpers (unchanged) -------------------------------------------
 
 
 
 
 
60
  def extract_pdf_text(pdf_path: str) -> str:
61
  try:
62
  reader = PdfReader(pdf_path)
 
64
  except Exception as e:
65
  raise gr.Error(f"Failed to process PDF: {e}")
66
 
67
+ TOKEN_LIMIT = 8000 # Word limit for input text
68
+
 
69
  def truncate_text(text: str, limit: int = TOKEN_LIMIT) -> str:
70
  words = text.split()
71
  if len(words) > limit:
72
+ gr.Warning(f"Input text was truncated from {len(words)} to {limit} words to fit LLM context window.")
73
  return " ".join(words[:limit])
74
  return text
75
 
76
+ # ------------------------------------------------------------------
77
+ # TTS helper – chunk long text (Google TTS has a limit of 5000 bytes per request)
78
+ # ------------------------------------------------------------------
79
+ CHUNK_CHAR_LIMIT = 1500 # Google TTS limit is 5000 bytes. Characters are safer.
80
+ # Average 3 bytes/char for UTF-8, so 1500 chars is ~4500 bytes.
81
 
82
  def _split_to_chunks(text: str, limit: int = CHUNK_CHAR_LIMIT) -> List[str]:
83
+ sentences_raw = re.split(r"(?<=[.!?])\s+", text.strip())
84
+ sentences = [s.strip() for s in sentences_raw if s.strip()]
85
+ if not sentences: return []
86
+ chunks, current_chunk = [], ""
87
  for sent in sentences:
88
+ if current_chunk and (len(current_chunk) + len(sent) + 1 > limit):
89
+ chunks.append(current_chunk)
90
+ current_chunk = sent
91
  else:
92
+ current_chunk += (" " + sent) if current_chunk else sent
93
+ if current_chunk: chunks.append(current_chunk)
94
+ return [chunk for chunk in chunks if chunk.strip()]
 
95
 
96
 
97
+ def synthesize_speech_google(
98
+ text: str,
99
+ google_lang_code: str,
100
+ lang_tmpdir: Path,
101
+ tts_client: texttospeech.TextToSpeechClient
102
+ ) -> Path:
103
+ """Splits text, synthesizes with Google TTS, concatenates MP3s."""
104
  chunks = _split_to_chunks(text)
105
  if not chunks:
106
+ raise ValueError("Text resulted in no speakable chunks after splitting.")
107
+
108
+ audio_segments: List[AudioSegment] = []
109
+ for idx, chunk in enumerate(chunks):
110
+ gr.Info(f"Synthesizing audio for chunk {idx + 1}/{len(chunks)} with Google TTS...")
111
+
112
+ synthesis_input = texttospeech.SynthesisInput(text=chunk)
113
+ voice = texttospeech.VoiceSelectionParams(
114
+ language_code=google_lang_code,
115
+ # You can specify a voice name, e.g., "en-US-Wavenet-D"
116
+ # ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL # Optional
117
+ )
118
+ audio_config = texttospeech.AudioConfig(
119
+ audio_encoding=texttospeech.AudioEncoding.MP3
120
+ )
121
 
 
 
122
  try:
123
+ response = tts_client.synthesize_speech(
124
+ input=synthesis_input, voice=voice, audio_config=audio_config
125
+ )
126
+ except Exception as e:
127
+ raise RuntimeError(f"Google TTS request failed for chunk {idx+1}: {e}") from e
128
+
129
+ part_path = lang_tmpdir / f"part_{idx}.mp3"
130
+ with open(part_path, "wb") as out_mp3:
131
+ out_mp3.write(response.audio_content)
132
+
133
  try:
134
+ segment = AudioSegment.from_mp3(part_path)
135
+ audio_segments.append(segment)
136
  except CouldntDecodeError as e:
137
+ raise RuntimeError(f"Failed to decode MP3 audio chunk {idx+1} from {part_path}. Error: {e}") from e
138
+
139
+ if not audio_segments:
140
+ raise RuntimeError("No audio segments were successfully synthesized or decoded.")
141
 
142
+ combined_audio = sum(audio_segments, AudioSegment.empty())
143
+ final_path = lang_tmpdir / "podcast_audio.mp3"
144
+ combined_audio.export(final_path, format="mp3")
145
+ return final_path
146
 
147
  # ------------------------------------------------------------------
148
+ # Main pipeline function for Gradio
149
  # ------------------------------------------------------------------
150
 
151
+ def generate_podcast(
152
+ gemini_api_key: Optional[str],
153
+ pdf_file_obj: Optional[gr.File],
154
+ selected_lang_names: List[str]
155
+ ) -> List[Optional[Any]]:
156
+
157
+ if not gemini_api_key:
158
+ raise gr.Error("Please enter your Google AI Studio API Key for Gemini.")
159
+ if not pdf_file_obj:
160
  raise gr.Error("Please upload a PDF file.")
161
+ if not selected_lang_names:
162
+ raise gr.Error("Please select at least one language for the podcast.")
163
+
164
+ try:
165
+ genai.configure(api_key=gemini_api_key)
166
+ except Exception as e:
167
+ raise gr.Error(f"Failed to configure Gemini API. Check your API key. Error: {e}")
168
+
169
+ # IMPORTANT: Google Cloud Text-to-Speech client initialization.
170
+ # It expects GOOGLE_APPLICATION_CREDENTIALS environment variable to be set,
171
+ # pointing to your service account JSON key file.
172
+ # In Hugging Face Spaces, upload this JSON file as a Secret, e.g., named
173
+ # `GOOGLE_CREDS_JSON_CONTENT` (paste the content of the file).
174
+ # Then, in your Space's startup or here, you'd write this content to a temporary file
175
+ # and set GOOGLE_APPLICATION_CREDENTIALS to that temp file's path.
176
+ # Or, if GOOGLE_APPLICATION_CREDENTIALS points to a file path directly (less secure for pasted content).
177
+
178
+ # Example for setting GOOGLE_APPLICATION_CREDENTIALS from a Space secret:
179
+ google_creds_json_content = os.getenv("GOOGLE_CREDS_JSON_CONTENT")
180
+ temp_creds_file = None
181
+ if google_creds_json_content:
182
+ try:
183
+ fd, temp_creds_path = tempfile.mkstemp(suffix=".json")
184
+ with os.fdopen(fd, "w") as tmp:
185
+ tmp.write(google_creds_json_content)
186
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = temp_creds_path
187
+ temp_creds_file = Path(temp_creds_path)
188
+ gr.Info("Using GOOGLE_CREDS_JSON_CONTENT secret for Text-to-Speech API authentication.")
189
+ except Exception as e:
190
+ gr.Warning(f"Could not process GOOGLE_CREDS_JSON_CONTENT secret: {e}. TTS might fail.")
191
+ elif not os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
192
+ gr.Warning(
193
+ "GOOGLE_APPLICATION_CREDENTIALS environment variable not set, and no "
194
+ "GOOGLE_CREDS_JSON_CONTENT secret found. "
195
+ "Google Text-to-Speech API calls may fail. "
196
+ "Please set up authentication for Google Cloud Text-to-Speech."
197
+ )
198
+
199
+ try:
200
+ tts_client = texttospeech.TextToSpeechClient()
201
+ except Exception as e:
202
+ raise gr.Error(f"Failed to initialize Google Text-to-Speech client. Ensure authentication is set up. Error: {e}")
203
+
204
+
205
+ selected_codes = [LANG_CODE_BY_NAME[name] for name in selected_lang_names]
206
+ results_data: Dict[str, Dict[str, Optional[str]]] = {
207
+ code: {"audio": None, "script_text": None, "script_file": None}
208
+ for code in LANG_INFO.keys()
209
+ }
210
+
211
+ try:
212
+ with tempfile.TemporaryDirectory() as td:
213
+ tmpdir_base = Path(td)
214
+
215
+ gr.Info("Extracting text from PDF...")
216
+ lecture_raw = extract_pdf_text(pdf_file_obj.name)
217
+ lecture_text = truncate_text(lecture_raw)
218
+
219
+ if not lecture_text.strip():
220
+ raise gr.Error("Could not extract any text from the PDF, or the PDF content is empty.")
221
+
222
+ # Initialize Gemini model (e.g., 'gemini-1.5-flash' or 'gemini-pro')
223
+ # Choose a model appropriate for your task and quota.
224
+ gemini_model = genai.GenerativeModel('gemini-1.5-flash-latest') # Or 'gemini-pro'
225
+
226
+ for code in selected_codes:
227
+ info = LANG_INFO[code]
228
+ lang_name = info["name"]
229
+ google_tts_lang = info["tts_lang_code"]
230
+
231
+ gr.Info(f"Processing for {lang_name}...")
232
+ lang_tmpdir = tmpdir_base / code
233
+ lang_tmpdir.mkdir(parents=True, exist_ok=True)
234
+
235
+ dialogue: Optional[str] = None
236
+
237
+ gr.Info(f"Generating dialogue for {lang_name} with Gemini...")
238
+ prompt_for_gemini = PROMPT_TEMPLATE.format(lang_name=lang_name, content=lecture_text)
239
+ try:
240
+ response = gemini_model.generate_content(prompt_for_gemini)
241
+ dialogue_raw = response.text # Accessing the text part of the response
242
+
243
+ if not dialogue_raw or not dialogue_raw.strip():
244
+ gr.Warning(f"Gemini returned empty dialogue for {lang_name}. Skipping.")
245
+ continue
246
+
247
+ dialogue = dialogue_raw
248
+ results_data[code]["script_text"] = dialogue
249
+ script_file_path = lang_tmpdir / f"podcast_script_{code}.txt"
250
+ script_file_path.write_text(dialogue, encoding="utf-8")
251
+ results_data[code]["script_file"] = str(script_file_path)
252
+
253
+ except Exception as e:
254
+ gr.Error(f"Error generating dialogue with Gemini for {lang_name}: {e}")
255
+ continue
256
+
257
+ if dialogue:
258
+ gr.Info(f"Synthesizing speech for {lang_name} with Google TTS...")
259
+ try:
260
+ tts_path = synthesize_speech_google(dialogue, google_tts_lang, lang_tmpdir, tts_client)
261
+ results_data[code]["audio"] = str(tts_path)
262
+ except ValueError as e:
263
+ gr.Warning(f"Could not synthesize speech for {lang_name} (ValueError): {e}")
264
+ except RuntimeError as e:
265
+ gr.Error(f"Error synthesizing speech for {lang_name} (RuntimeError): {e}")
266
+ except Exception as e:
267
+ gr.Error(f"Unexpected error during speech synthesis for {lang_name}: {e}")
268
+
269
+ final_ordered_results: List[Optional[Any]] = []
270
+ for code_key in LANG_INFO.keys():
271
+ lang_output_data = results_data[code_key]
272
+ final_ordered_results.append(lang_output_data["audio"])
273
+ final_ordered_results.append(lang_output_data["script_text"])
274
+ final_ordered_results.append(lang_output_data["script_file"])
275
+
276
+ gr.Info("Podcast generation complete!")
277
+ return final_ordered_results
278
+
279
+ except gr.Error as e:
280
+ raise e
281
+ except Exception as e:
282
+ import traceback
283
+ print("An unexpected error occurred in generate_podcast:")
284
+ traceback.print_exc()
285
+ raise gr.Error(f"An unexpected server error occurred. Details: {str(e)[:100]}...")
286
+ finally:
287
+ # Clean up the temporary credentials file if it was created
288
+ if temp_creds_file and temp_creds_file.exists():
289
+ try:
290
+ temp_creds_file.unlink()
291
+ # Unset the env var if you want, though it's specific to this run
292
+ # if "GOOGLE_APPLICATION_CREDENTIALS" in os.environ and os.environ["GOOGLE_APPLICATION_CREDENTIALS"] == str(temp_creds_file):
293
+ # del os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
294
+ except Exception as e_clean:
295
+ print(f"Warning: Could not clean up temporary credentials file {temp_creds_file}: {e_clean}")
296
+
297
 
298
  # ------------------------------------------------------------------
299
+ # Gradio Interface Setup
300
  # ------------------------------------------------------------------
301
+ language_names_ordered = [LANG_INFO[code]["name"] for code in LANG_INFO.keys()]
302
 
303
  inputs = [
304
+ gr.Textbox(
305
+ label="Enter your Google AI Studio API Key (for Gemini)",
306
+ type="password",
307
+ placeholder="Paste your API key here",
308
+ ),
309
+ gr.File(label="Upload Lecture PDF", file_types=[".pdf"]),
310
+ gr.CheckboxGroup(
311
+ choices=language_names_ordered,
312
+ value=["English"],
313
+ label="Select podcast language(s) to generate",
314
+ ),
315
  ]
316
 
 
317
  outputs = []
318
+ for code in LANG_INFO.keys():
319
+ info = LANG_INFO[code]
320
+ lang_name = info["name"]
321
+ outputs.append(gr.Audio(label=f"{lang_name} Podcast (.mp3)", type="filepath"))
322
+ outputs.append(gr.Markdown(label=f"{lang_name} Script"))
323
+ outputs.append(gr.File(label=f"Download {lang_name} Script (.txt)", type="filepath"))
324
 
325
  iface = gr.Interface(
326
  fn=generate_podcast,
327
  inputs=inputs,
328
  outputs=outputs,
329
+ title="Lecture → Podcast & Script (Google Gemini & TTS)",
330
+ description=(
331
+ "**IMPORTANT SETUP:**\n"
332
+ "1. Enter your Google AI Studio API Key for Gemini text generation.\n"
333
+ "2. For Text-to-Speech: Enable the 'Cloud Text-to-Speech API' in your Google Cloud Project. "
334
+ "Create a service account with 'Cloud Text-to-Speech API User' role, download its JSON key. "
335
+ "In this Hugging Face Space, go to 'Settings' -> 'Secrets' and add a new secret named `GOOGLE_CREDS_JSON_CONTENT`. "
336
+ "Paste the *entire content* of your service account JSON key file as the value for this secret.\n\n"
337
+ "Upload a lecture PDF, choose language(s), and receive an audio podcast "
338
+ "and its script. Dialogue by Google Gemini, speech by Google Cloud TTS."
339
+ ),
340
+ allow_flagging="never",
341
  )
342
 
343
  if __name__ == "__main__":
344
+ # Make sure GOOGLE_CREDS_JSON_CONTENT is available as an environment variable
345
+ # or GOOGLE_APPLICATION_CREDENTIALS is set correctly if running locally for testing.
346
+ # For local testing with a service account key file:
347
+ # os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "path/to/your/service-account-file.json"
348
+ iface.launch()