Spaces:

mgbam
/

ChronoWeave

Sleeping

App Files Files Community

mgbam commited on Apr 14

Commit

54048e7

verified ·

1 Parent(s): 1a4fd38

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -41

app.py CHANGED Viewed

@@ -27,7 +27,7 @@ from pydantic import BaseModel, Field, ValidationError, field_validator, model_v
 # Video and audio processing
 from moviepy.editor import ImageClip, AudioFileClip, concatenate_videoclips
-# Google Generative AI library and async patch
 import google.generativeai as genai
 import nest_asyncio
 nest_asyncio.apply()  # Ensure asyncio works correctly in Streamlit/Jupyter
@@ -38,7 +38,7 @@ logger = logging.getLogger(__name__)
 # --- Constants & Configurations ---
 TEXT_MODEL_ID = "models/gemini-1.5-flash"   # Alternatively "gemini-1.5-pro"
-AUDIO_MODEL_ID = "models/gemini-1.5-flash"    # Synchronous generation for audio now
 AUDIO_SAMPLING_RATE = 24000
 IMAGE_MODEL_ID = "imagen-3"  # NOTE: Requires Vertex AI SDK integration in the future
 DEFAULT_ASPECT_RATIO = "1:1"
@@ -122,8 +122,9 @@ class ChronoWeaveGenerator:
         try:
             self.client_text = genai.GenerativeModel(TEXT_MODEL_ID)
             logger.info(f"Initialized text model: {TEXT_MODEL_ID}")
             self.client_audio = genai.GenerativeModel(AUDIO_MODEL_ID)
-            logger.info(f"Initialized audio model: {AUDIO_MODEL_ID}")
             self.client_image = genai.GenerativeModel(IMAGE_MODEL_ID)
             logger.info(f"Initialized image model: {IMAGE_MODEL_ID} (Placeholder: Update to Vertex AI SDK)")
         except Exception as exc:
@@ -187,51 +188,27 @@ JSON Schema: ```json
     async def generate_audio(self, text: str, output_filename: str, voice: Optional[str] = None) -> Optional[str]:
         """
-        Asynchronously generates audio by wrapping the synchronous generate_content call.
-        The call is executed using asyncio.to_thread to avoid blocking.
-        Note: The unsupported 'audio_config' parameter has been removed.
         """
-        task_id = os.path.basename(output_filename).split(".")[0]
-        logger.info(f"🎙️ [{task_id}] Generating audio for text: '{text[:60]}...'")
         try:
-            # Define a synchronous function for audio generation.
-            def sync_generate_audio():
-                prompt = f"Narrate directly: \"{text}\""
-                response = self.client_audio.generate_content(
-                    contents=prompt,
-                    generation_config=genai.types.GenerationConfig(
-                        response_mime_type="application/octet-stream",
-                        temperature=0.7
-                        # Removed 'audio_config' to avoid the unexpected keyword argument error.
-                    )
-                )
-                return response
-            # Execute the synchronous call in a separate thread.
-            response = await asyncio.to_thread(sync_generate_audio)
-            # Process the response. Adjust as necessary based on your API’s actual response structure.
-            if not response or not hasattr(response, "audio_chunk") or not response.audio_chunk.data:
-                logger.error(f"❌ [{task_id}] No audio data returned.")
-                st.error(f"Audio generation failed for {task_id}: No audio data.", icon="🔊")
-                return None
-            audio_data = response.audio_chunk.data
-            with wave_file_writer(output_filename) as wf:
-                wf.writeframes(audio_data)
-            logger.info(f"✅ [{task_id}] Audio saved: {os.path.basename(output_filename)} ({len(audio_data)} bytes)")
             return output_filename
         except Exception as e:
-            logger.exception(f"❌ [{task_id}] Audio generation error: {e}")
-            st.error(f"Audio generation failed for {task_id}: {e}", icon="🔊")
             return None
     async def generate_image_async(self, prompt: str, aspect_ratio: str, task_id: str) -> Optional[Image.Image]:
         """
         Placeholder for image generation.
-        Currently logs an error and returns None. Update this function once integrating Vertex AI SDK.
         """
         logger.info(f"🖼️ [{task_id}] Requesting image for prompt: '{prompt[:70]}...' (Aspect Ratio: {aspect_ratio})")
         logger.error(f"❌ [{task_id}] Image generation not implemented. Update required for Vertex AI.")
@@ -254,7 +231,8 @@ JSON Schema: ```json
         errors: List[str] = []
         task_id = f"T{timeline_id}_S{segment.scene_id}"
         image_path = os.path.join(temp_dir, f"{task_id}_image.png")
-        audio_path = os.path.join(temp_dir, f"{task_id}_audio.wav")
         video_clip = None
         # Launch image and audio generation concurrently.
@@ -283,7 +261,7 @@ JSON Schema: ```json
         if audio_result:
             try:
                 with open(audio_result, "rb") as ap:
-                    st.audio(ap.read(), format="audio/wav")
             except Exception as e:
                 logger.warning(f"⚠️ [{task_id}] Audio preview error: {e}")
         else:

 # Video and audio processing
 from moviepy.editor import ImageClip, AudioFileClip, concatenate_videoclips
+# Google Generative AI library and asyncio patch
 import google.generativeai as genai
 import nest_asyncio
 nest_asyncio.apply()  # Ensure asyncio works correctly in Streamlit/Jupyter
 # --- Constants & Configurations ---
 TEXT_MODEL_ID = "models/gemini-1.5-flash"   # Alternatively "gemini-1.5-pro"
+AUDIO_MODEL_ID = "models/gemini-1.5-flash"    # Not used for audio generation now
 AUDIO_SAMPLING_RATE = 24000
 IMAGE_MODEL_ID = "imagen-3"  # NOTE: Requires Vertex AI SDK integration in the future
 DEFAULT_ASPECT_RATIO = "1:1"
         try:
             self.client_text = genai.GenerativeModel(TEXT_MODEL_ID)
             logger.info(f"Initialized text model: {TEXT_MODEL_ID}")
+            # Audio generation now uses gTTS instead of the GenerativeModel.
             self.client_audio = genai.GenerativeModel(AUDIO_MODEL_ID)
+            logger.info(f"Initialized audio model (not used for TTS): {AUDIO_MODEL_ID}")
             self.client_image = genai.GenerativeModel(IMAGE_MODEL_ID)
             logger.info(f"Initialized image model: {IMAGE_MODEL_ID} (Placeholder: Update to Vertex AI SDK)")
         except Exception as exc:
     async def generate_audio(self, text: str, output_filename: str, voice: Optional[str] = None) -> Optional[str]:
         """
+        Generates an audio file from the provided text using gTTS.
+        This function runs asynchronously.
         """
         try:
+            # Import gTTS; ensure that gTTS is installed (pip install gTTS)
+            from gtts import gTTS
+            logger.info(f"🎙️ Generating audio for text: '{text[:60]}...'")
+            tts = gTTS(text=text, lang="en")
+            tts.save(output_filename)
+            logger.info(f"✅ Audio saved: {output_filename}")
             return output_filename
         except Exception as e:
+            logger.exception(f"Audio generation error: {e}")
+            st.error(f"Audio generation failed: {e}", icon="🔊")
             return None
     async def generate_image_async(self, prompt: str, aspect_ratio: str, task_id: str) -> Optional[Image.Image]:
         """
         Placeholder for image generation.
+        Currently logs an error and returns None.
+        Update this function once integrating Vertex AI SDK.
         """
         logger.info(f"🖼️ [{task_id}] Requesting image for prompt: '{prompt[:70]}...' (Aspect Ratio: {aspect_ratio})")
         logger.error(f"❌ [{task_id}] Image generation not implemented. Update required for Vertex AI.")
         errors: List[str] = []
         task_id = f"T{timeline_id}_S{segment.scene_id}"
         image_path = os.path.join(temp_dir, f"{task_id}_image.png")
+        # Use .mp3 extension for audio generated via gTTS.
+        audio_path = os.path.join(temp_dir, f"{task_id}_audio.mp3")
         video_clip = None
         # Launch image and audio generation concurrently.
         if audio_result:
             try:
                 with open(audio_result, "rb") as ap:
+                    st.audio(ap.read(), format="audio/mp3")
             except Exception as e:
                 logger.warning(f"⚠️ [{task_id}] Audio preview error: {e}")
         else: