mgbam commited on
Commit
54048e7
Β·
verified Β·
1 Parent(s): 1a4fd38

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -41
app.py CHANGED
@@ -27,7 +27,7 @@ from pydantic import BaseModel, Field, ValidationError, field_validator, model_v
27
  # Video and audio processing
28
  from moviepy.editor import ImageClip, AudioFileClip, concatenate_videoclips
29
 
30
- # Google Generative AI library and async patch
31
  import google.generativeai as genai
32
  import nest_asyncio
33
  nest_asyncio.apply() # Ensure asyncio works correctly in Streamlit/Jupyter
@@ -38,7 +38,7 @@ logger = logging.getLogger(__name__)
38
 
39
  # --- Constants & Configurations ---
40
  TEXT_MODEL_ID = "models/gemini-1.5-flash" # Alternatively "gemini-1.5-pro"
41
- AUDIO_MODEL_ID = "models/gemini-1.5-flash" # Synchronous generation for audio now
42
  AUDIO_SAMPLING_RATE = 24000
43
  IMAGE_MODEL_ID = "imagen-3" # NOTE: Requires Vertex AI SDK integration in the future
44
  DEFAULT_ASPECT_RATIO = "1:1"
@@ -122,8 +122,9 @@ class ChronoWeaveGenerator:
122
  try:
123
  self.client_text = genai.GenerativeModel(TEXT_MODEL_ID)
124
  logger.info(f"Initialized text model: {TEXT_MODEL_ID}")
 
125
  self.client_audio = genai.GenerativeModel(AUDIO_MODEL_ID)
126
- logger.info(f"Initialized audio model: {AUDIO_MODEL_ID}")
127
  self.client_image = genai.GenerativeModel(IMAGE_MODEL_ID)
128
  logger.info(f"Initialized image model: {IMAGE_MODEL_ID} (Placeholder: Update to Vertex AI SDK)")
129
  except Exception as exc:
@@ -187,51 +188,27 @@ JSON Schema: ```json
187
 
188
  async def generate_audio(self, text: str, output_filename: str, voice: Optional[str] = None) -> Optional[str]:
189
  """
190
- Asynchronously generates audio by wrapping the synchronous generate_content call.
191
- The call is executed using asyncio.to_thread to avoid blocking.
192
- Note: The unsupported 'audio_config' parameter has been removed.
193
  """
194
- task_id = os.path.basename(output_filename).split(".")[0]
195
- logger.info(f"πŸŽ™οΈ [{task_id}] Generating audio for text: '{text[:60]}...'")
196
-
197
  try:
198
- # Define a synchronous function for audio generation.
199
- def sync_generate_audio():
200
- prompt = f"Narrate directly: \"{text}\""
201
- response = self.client_audio.generate_content(
202
- contents=prompt,
203
- generation_config=genai.types.GenerationConfig(
204
- response_mime_type="application/octet-stream",
205
- temperature=0.7
206
- # Removed 'audio_config' to avoid the unexpected keyword argument error.
207
- )
208
- )
209
- return response
210
-
211
- # Execute the synchronous call in a separate thread.
212
- response = await asyncio.to_thread(sync_generate_audio)
213
-
214
- # Process the response. Adjust as necessary based on your API’s actual response structure.
215
- if not response or not hasattr(response, "audio_chunk") or not response.audio_chunk.data:
216
- logger.error(f"❌ [{task_id}] No audio data returned.")
217
- st.error(f"Audio generation failed for {task_id}: No audio data.", icon="πŸ”Š")
218
- return None
219
-
220
- audio_data = response.audio_chunk.data
221
- with wave_file_writer(output_filename) as wf:
222
- wf.writeframes(audio_data)
223
- logger.info(f"βœ… [{task_id}] Audio saved: {os.path.basename(output_filename)} ({len(audio_data)} bytes)")
224
  return output_filename
225
-
226
  except Exception as e:
227
- logger.exception(f"❌ [{task_id}] Audio generation error: {e}")
228
- st.error(f"Audio generation failed for {task_id}: {e}", icon="πŸ”Š")
229
  return None
230
 
231
  async def generate_image_async(self, prompt: str, aspect_ratio: str, task_id: str) -> Optional[Image.Image]:
232
  """
233
  Placeholder for image generation.
234
- Currently logs an error and returns None. Update this function once integrating Vertex AI SDK.
 
235
  """
236
  logger.info(f"πŸ–ΌοΈ [{task_id}] Requesting image for prompt: '{prompt[:70]}...' (Aspect Ratio: {aspect_ratio})")
237
  logger.error(f"❌ [{task_id}] Image generation not implemented. Update required for Vertex AI.")
@@ -254,7 +231,8 @@ JSON Schema: ```json
254
  errors: List[str] = []
255
  task_id = f"T{timeline_id}_S{segment.scene_id}"
256
  image_path = os.path.join(temp_dir, f"{task_id}_image.png")
257
- audio_path = os.path.join(temp_dir, f"{task_id}_audio.wav")
 
258
  video_clip = None
259
 
260
  # Launch image and audio generation concurrently.
@@ -283,7 +261,7 @@ JSON Schema: ```json
283
  if audio_result:
284
  try:
285
  with open(audio_result, "rb") as ap:
286
- st.audio(ap.read(), format="audio/wav")
287
  except Exception as e:
288
  logger.warning(f"⚠️ [{task_id}] Audio preview error: {e}")
289
  else:
 
27
  # Video and audio processing
28
  from moviepy.editor import ImageClip, AudioFileClip, concatenate_videoclips
29
 
30
+ # Google Generative AI library and asyncio patch
31
  import google.generativeai as genai
32
  import nest_asyncio
33
  nest_asyncio.apply() # Ensure asyncio works correctly in Streamlit/Jupyter
 
38
 
39
  # --- Constants & Configurations ---
40
  TEXT_MODEL_ID = "models/gemini-1.5-flash" # Alternatively "gemini-1.5-pro"
41
+ AUDIO_MODEL_ID = "models/gemini-1.5-flash" # Not used for audio generation now
42
  AUDIO_SAMPLING_RATE = 24000
43
  IMAGE_MODEL_ID = "imagen-3" # NOTE: Requires Vertex AI SDK integration in the future
44
  DEFAULT_ASPECT_RATIO = "1:1"
 
122
  try:
123
  self.client_text = genai.GenerativeModel(TEXT_MODEL_ID)
124
  logger.info(f"Initialized text model: {TEXT_MODEL_ID}")
125
+ # Audio generation now uses gTTS instead of the GenerativeModel.
126
  self.client_audio = genai.GenerativeModel(AUDIO_MODEL_ID)
127
+ logger.info(f"Initialized audio model (not used for TTS): {AUDIO_MODEL_ID}")
128
  self.client_image = genai.GenerativeModel(IMAGE_MODEL_ID)
129
  logger.info(f"Initialized image model: {IMAGE_MODEL_ID} (Placeholder: Update to Vertex AI SDK)")
130
  except Exception as exc:
 
188
 
189
  async def generate_audio(self, text: str, output_filename: str, voice: Optional[str] = None) -> Optional[str]:
190
  """
191
+ Generates an audio file from the provided text using gTTS.
192
+ This function runs asynchronously.
 
193
  """
 
 
 
194
  try:
195
+ # Import gTTS; ensure that gTTS is installed (pip install gTTS)
196
+ from gtts import gTTS
197
+ logger.info(f"πŸŽ™οΈ Generating audio for text: '{text[:60]}...'")
198
+ tts = gTTS(text=text, lang="en")
199
+ tts.save(output_filename)
200
+ logger.info(f"βœ… Audio saved: {output_filename}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  return output_filename
 
202
  except Exception as e:
203
+ logger.exception(f"Audio generation error: {e}")
204
+ st.error(f"Audio generation failed: {e}", icon="πŸ”Š")
205
  return None
206
 
207
  async def generate_image_async(self, prompt: str, aspect_ratio: str, task_id: str) -> Optional[Image.Image]:
208
  """
209
  Placeholder for image generation.
210
+ Currently logs an error and returns None.
211
+ Update this function once integrating Vertex AI SDK.
212
  """
213
  logger.info(f"πŸ–ΌοΈ [{task_id}] Requesting image for prompt: '{prompt[:70]}...' (Aspect Ratio: {aspect_ratio})")
214
  logger.error(f"❌ [{task_id}] Image generation not implemented. Update required for Vertex AI.")
 
231
  errors: List[str] = []
232
  task_id = f"T{timeline_id}_S{segment.scene_id}"
233
  image_path = os.path.join(temp_dir, f"{task_id}_image.png")
234
+ # Use .mp3 extension for audio generated via gTTS.
235
+ audio_path = os.path.join(temp_dir, f"{task_id}_audio.mp3")
236
  video_clip = None
237
 
238
  # Launch image and audio generation concurrently.
 
261
  if audio_result:
262
  try:
263
  with open(audio_result, "rb") as ap:
264
+ st.audio(ap.read(), format="audio/mp3")
265
  except Exception as e:
266
  logger.warning(f"⚠️ [{task_id}] Audio preview error: {e}")
267
  else: