HaiderAUT commited on
Commit
d4adc2b
·
verified ·
1 Parent(s): 617d576

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -19
app.py CHANGED
@@ -28,7 +28,6 @@ except ImportError:
28
  # ------------------------------------------------------------------
29
  # Globals & templates
30
  # ------------------------------------------------------------------
31
- # Gemini prompt for ~300-word two-host dialogue in English
32
  PROMPT_TEMPLATE = textwrap.dedent(
33
  """
34
  You are producing a lively two-host educational podcast in English.
@@ -41,29 +40,23 @@ PROMPT_TEMPLATE = textwrap.dedent(
41
  """
42
  )
43
 
44
- # TTS model ID for English MMS-TTS
45
  HF_TTS_MODEL = "facebook/mms-tts-eng"
46
- # Safe chunk size for HF text-to-speech
47
  CHUNK_CHAR_LIMIT = 280
48
 
49
- # Initialize HF TTS client (no token required for public models)
50
  tts_client = InferenceClient()
51
 
52
  # ------------------------------------------------------------------
53
  # Helpers
54
  # ------------------------------------------------------------------
55
  def extract_pdf_text(pdf_path: str) -> str:
56
- """Extracts all text from a PDF file."""
57
  reader = PdfReader(pdf_path)
58
  return "\n".join(page.extract_text() or "" for page in reader.pages)
59
 
60
  def truncate_text(text: str, max_words: int = 8000) -> str:
61
- """Truncate to max_words to fit LLM context."""
62
  words = text.split()
63
  return " ".join(words[:max_words])
64
 
65
  def split_to_chunks(text: str, limit: int = CHUNK_CHAR_LIMIT) -> List[str]:
66
- """Split text into ≤limit-char chunks at sentence boundaries."""
67
  sentences = [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()]
68
  chunks, current = [], ""
69
  for sent in sentences:
@@ -77,7 +70,6 @@ def split_to_chunks(text: str, limit: int = CHUNK_CHAR_LIMIT) -> List[str]:
77
  return chunks
78
 
79
  def synthesize_speech(text: str, model_id: str, out_dir: Path) -> Path:
80
- """Chunk-safe TTS via HF Inference API, concatenating FLAC segments."""
81
  chunks = split_to_chunks(text)
82
  if not chunks:
83
  raise ValueError("No text to synthesize.")
@@ -94,10 +86,9 @@ def synthesize_speech(text: str, model_id: str, out_dir: Path) -> Path:
94
  segments.append(seg)
95
  except CouldntDecodeError as e:
96
  raise RuntimeError(f"Could not decode segment {i+1}: {e}")
97
- # Concatenate
98
- final = sum(segments, AudioSegment.empty())
99
  out_path = out_dir / "podcast_audio.flac"
100
- final.export(out_path, format="flac")
101
  return out_path
102
 
103
  # ------------------------------------------------------------------
@@ -107,31 +98,30 @@ def generate_podcast(
107
  gemini_api_key: Optional[str],
108
  lecture_pdf: Optional[gr.File]
109
  ) -> List[Optional[Any]]:
110
- # Validate inputs
111
  if not gemini_api_key:
112
  raise gr.Error("Enter your Google AI Studio API Key.")
113
  if not lecture_pdf:
114
  raise gr.Error("Upload a lecture PDF file.")
115
- # Configure Gemini
116
  genai.configure(api_key=gemini_api_key)
117
- # Extract & truncate lecture text
118
  raw = extract_pdf_text(lecture_pdf.name)
119
  content = truncate_text(raw)
120
  if not content.strip():
121
  raise gr.Error("Lecture PDF contained no extractable text.")
122
- # Initialize Gemini model
123
  try:
124
  gemini_model = genai.GenerativeModel("gemini-1.5-flash-latest")
125
  except Exception as e:
126
  raise gr.Error(f"Gemini init failed: {e}")
127
- # Generate script
128
  prompt = PROMPT_TEMPLATE.format(content=content)
129
  try:
130
  resp = gemini_model.generate_content(prompt)
131
  script = resp.text or ""
132
  except Exception as e:
133
  raise gr.Error(f"Gemini generation error: {e}")
134
- # Prepare temp directory
135
  with tempfile.TemporaryDirectory() as td:
136
  tmp = Path(td)
137
  # Save script file
@@ -151,12 +141,16 @@ def generate_podcast(
151
  iface = gr.Interface(
152
  fn=generate_podcast,
153
  inputs=[
154
- gr.Textbox(label="Google Gemini API Key", type="password", placeholder="Paste your key"),
 
 
 
 
155
  gr.File(label="Upload Lecture PDF", file_types=[".pdf"]),
156
  ],
157
  outputs=[
158
  gr.Audio(label="English Podcast", type="filepath"),
159
- gr.Markdown(label="English Script"),
160
  gr.File(label="Download English Script (.txt)", type="filepath"),
161
  ],
162
  title="Lecture → English Podcast & Script",
 
28
  # ------------------------------------------------------------------
29
  # Globals & templates
30
  # ------------------------------------------------------------------
 
31
  PROMPT_TEMPLATE = textwrap.dedent(
32
  """
33
  You are producing a lively two-host educational podcast in English.
 
40
  """
41
  )
42
 
 
43
  HF_TTS_MODEL = "facebook/mms-tts-eng"
 
44
  CHUNK_CHAR_LIMIT = 280
45
 
 
46
  tts_client = InferenceClient()
47
 
48
  # ------------------------------------------------------------------
49
  # Helpers
50
  # ------------------------------------------------------------------
51
  def extract_pdf_text(pdf_path: str) -> str:
 
52
  reader = PdfReader(pdf_path)
53
  return "\n".join(page.extract_text() or "" for page in reader.pages)
54
 
55
  def truncate_text(text: str, max_words: int = 8000) -> str:
 
56
  words = text.split()
57
  return " ".join(words[:max_words])
58
 
59
  def split_to_chunks(text: str, limit: int = CHUNK_CHAR_LIMIT) -> List[str]:
 
60
  sentences = [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()]
61
  chunks, current = [], ""
62
  for sent in sentences:
 
70
  return chunks
71
 
72
  def synthesize_speech(text: str, model_id: str, out_dir: Path) -> Path:
 
73
  chunks = split_to_chunks(text)
74
  if not chunks:
75
  raise ValueError("No text to synthesize.")
 
86
  segments.append(seg)
87
  except CouldntDecodeError as e:
88
  raise RuntimeError(f"Could not decode segment {i+1}: {e}")
89
+ final_audio = sum(segments, AudioSegment.empty())
 
90
  out_path = out_dir / "podcast_audio.flac"
91
+ final_audio.export(out_path, format="flac")
92
  return out_path
93
 
94
  # ------------------------------------------------------------------
 
98
  gemini_api_key: Optional[str],
99
  lecture_pdf: Optional[gr.File]
100
  ) -> List[Optional[Any]]:
 
101
  if not gemini_api_key:
102
  raise gr.Error("Enter your Google AI Studio API Key.")
103
  if not lecture_pdf:
104
  raise gr.Error("Upload a lecture PDF file.")
105
+
106
  genai.configure(api_key=gemini_api_key)
107
+
108
  raw = extract_pdf_text(lecture_pdf.name)
109
  content = truncate_text(raw)
110
  if not content.strip():
111
  raise gr.Error("Lecture PDF contained no extractable text.")
112
+
113
  try:
114
  gemini_model = genai.GenerativeModel("gemini-1.5-flash-latest")
115
  except Exception as e:
116
  raise gr.Error(f"Gemini init failed: {e}")
117
+
118
  prompt = PROMPT_TEMPLATE.format(content=content)
119
  try:
120
  resp = gemini_model.generate_content(prompt)
121
  script = resp.text or ""
122
  except Exception as e:
123
  raise gr.Error(f"Gemini generation error: {e}")
124
+
125
  with tempfile.TemporaryDirectory() as td:
126
  tmp = Path(td)
127
  # Save script file
 
141
  iface = gr.Interface(
142
  fn=generate_podcast,
143
  inputs=[
144
+ gr.Textbox(
145
+ label="Google Gemini API Key",
146
+ type="password",
147
+ placeholder="Paste your key here"
148
+ ),
149
  gr.File(label="Upload Lecture PDF", file_types=[".pdf"]),
150
  ],
151
  outputs=[
152
  gr.Audio(label="English Podcast", type="filepath"),
153
+ gr.Markdown(label="English Script"), # renders the script
154
  gr.File(label="Download English Script (.txt)", type="filepath"),
155
  ],
156
  title="Lecture → English Podcast & Script",