Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -27,7 +27,7 @@ from pydantic import BaseModel, Field, ValidationError, field_validator, model_v
|
|
27 |
# Video and audio processing
|
28 |
from moviepy.editor import ImageClip, AudioFileClip, concatenate_videoclips
|
29 |
|
30 |
-
# Google Generative AI library and
|
31 |
import google.generativeai as genai
|
32 |
import nest_asyncio
|
33 |
nest_asyncio.apply() # Ensure asyncio works correctly in Streamlit/Jupyter
|
@@ -38,7 +38,7 @@ logger = logging.getLogger(__name__)
|
|
38 |
|
39 |
# --- Constants & Configurations ---
|
40 |
TEXT_MODEL_ID = "models/gemini-1.5-flash" # Alternatively "gemini-1.5-pro"
|
41 |
-
AUDIO_MODEL_ID = "models/gemini-1.5-flash" #
|
42 |
AUDIO_SAMPLING_RATE = 24000
|
43 |
IMAGE_MODEL_ID = "imagen-3" # NOTE: Requires Vertex AI SDK integration in the future
|
44 |
DEFAULT_ASPECT_RATIO = "1:1"
|
@@ -122,8 +122,9 @@ class ChronoWeaveGenerator:
|
|
122 |
try:
|
123 |
self.client_text = genai.GenerativeModel(TEXT_MODEL_ID)
|
124 |
logger.info(f"Initialized text model: {TEXT_MODEL_ID}")
|
|
|
125 |
self.client_audio = genai.GenerativeModel(AUDIO_MODEL_ID)
|
126 |
-
logger.info(f"Initialized audio model: {AUDIO_MODEL_ID}")
|
127 |
self.client_image = genai.GenerativeModel(IMAGE_MODEL_ID)
|
128 |
logger.info(f"Initialized image model: {IMAGE_MODEL_ID} (Placeholder: Update to Vertex AI SDK)")
|
129 |
except Exception as exc:
|
@@ -187,51 +188,27 @@ JSON Schema: ```json
|
|
187 |
|
188 |
async def generate_audio(self, text: str, output_filename: str, voice: Optional[str] = None) -> Optional[str]:
|
189 |
"""
|
190 |
-
|
191 |
-
|
192 |
-
Note: The unsupported 'audio_config' parameter has been removed.
|
193 |
"""
|
194 |
-
task_id = os.path.basename(output_filename).split(".")[0]
|
195 |
-
logger.info(f"ποΈ [{task_id}] Generating audio for text: '{text[:60]}...'")
|
196 |
-
|
197 |
try:
|
198 |
-
#
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
response_mime_type="application/octet-stream",
|
205 |
-
temperature=0.7
|
206 |
-
# Removed 'audio_config' to avoid the unexpected keyword argument error.
|
207 |
-
)
|
208 |
-
)
|
209 |
-
return response
|
210 |
-
|
211 |
-
# Execute the synchronous call in a separate thread.
|
212 |
-
response = await asyncio.to_thread(sync_generate_audio)
|
213 |
-
|
214 |
-
# Process the response. Adjust as necessary based on your APIβs actual response structure.
|
215 |
-
if not response or not hasattr(response, "audio_chunk") or not response.audio_chunk.data:
|
216 |
-
logger.error(f"β [{task_id}] No audio data returned.")
|
217 |
-
st.error(f"Audio generation failed for {task_id}: No audio data.", icon="π")
|
218 |
-
return None
|
219 |
-
|
220 |
-
audio_data = response.audio_chunk.data
|
221 |
-
with wave_file_writer(output_filename) as wf:
|
222 |
-
wf.writeframes(audio_data)
|
223 |
-
logger.info(f"β
[{task_id}] Audio saved: {os.path.basename(output_filename)} ({len(audio_data)} bytes)")
|
224 |
return output_filename
|
225 |
-
|
226 |
except Exception as e:
|
227 |
-
logger.exception(f"
|
228 |
-
st.error(f"Audio generation failed
|
229 |
return None
|
230 |
|
231 |
async def generate_image_async(self, prompt: str, aspect_ratio: str, task_id: str) -> Optional[Image.Image]:
|
232 |
"""
|
233 |
Placeholder for image generation.
|
234 |
-
Currently logs an error and returns None.
|
|
|
235 |
"""
|
236 |
logger.info(f"πΌοΈ [{task_id}] Requesting image for prompt: '{prompt[:70]}...' (Aspect Ratio: {aspect_ratio})")
|
237 |
logger.error(f"β [{task_id}] Image generation not implemented. Update required for Vertex AI.")
|
@@ -254,7 +231,8 @@ JSON Schema: ```json
|
|
254 |
errors: List[str] = []
|
255 |
task_id = f"T{timeline_id}_S{segment.scene_id}"
|
256 |
image_path = os.path.join(temp_dir, f"{task_id}_image.png")
|
257 |
-
|
|
|
258 |
video_clip = None
|
259 |
|
260 |
# Launch image and audio generation concurrently.
|
@@ -283,7 +261,7 @@ JSON Schema: ```json
|
|
283 |
if audio_result:
|
284 |
try:
|
285 |
with open(audio_result, "rb") as ap:
|
286 |
-
st.audio(ap.read(), format="audio/
|
287 |
except Exception as e:
|
288 |
logger.warning(f"β οΈ [{task_id}] Audio preview error: {e}")
|
289 |
else:
|
|
|
27 |
# Video and audio processing
|
28 |
from moviepy.editor import ImageClip, AudioFileClip, concatenate_videoclips
|
29 |
|
30 |
+
# Google Generative AI library and asyncio patch
|
31 |
import google.generativeai as genai
|
32 |
import nest_asyncio
|
33 |
nest_asyncio.apply() # Ensure asyncio works correctly in Streamlit/Jupyter
|
|
|
38 |
|
39 |
# --- Constants & Configurations ---
|
40 |
TEXT_MODEL_ID = "models/gemini-1.5-flash" # Alternatively "gemini-1.5-pro"
|
41 |
+
AUDIO_MODEL_ID = "models/gemini-1.5-flash" # Not used for audio generation now
|
42 |
AUDIO_SAMPLING_RATE = 24000
|
43 |
IMAGE_MODEL_ID = "imagen-3" # NOTE: Requires Vertex AI SDK integration in the future
|
44 |
DEFAULT_ASPECT_RATIO = "1:1"
|
|
|
122 |
try:
|
123 |
self.client_text = genai.GenerativeModel(TEXT_MODEL_ID)
|
124 |
logger.info(f"Initialized text model: {TEXT_MODEL_ID}")
|
125 |
+
# Audio generation now uses gTTS instead of the GenerativeModel.
|
126 |
self.client_audio = genai.GenerativeModel(AUDIO_MODEL_ID)
|
127 |
+
logger.info(f"Initialized audio model (not used for TTS): {AUDIO_MODEL_ID}")
|
128 |
self.client_image = genai.GenerativeModel(IMAGE_MODEL_ID)
|
129 |
logger.info(f"Initialized image model: {IMAGE_MODEL_ID} (Placeholder: Update to Vertex AI SDK)")
|
130 |
except Exception as exc:
|
|
|
188 |
|
189 |
async def generate_audio(self, text: str, output_filename: str, voice: Optional[str] = None) -> Optional[str]:
|
190 |
"""
|
191 |
+
Generates an audio file from the provided text using gTTS.
|
192 |
+
This function runs asynchronously.
|
|
|
193 |
"""
|
|
|
|
|
|
|
194 |
try:
|
195 |
+
# Import gTTS; ensure that gTTS is installed (pip install gTTS)
|
196 |
+
from gtts import gTTS
|
197 |
+
logger.info(f"ποΈ Generating audio for text: '{text[:60]}...'")
|
198 |
+
tts = gTTS(text=text, lang="en")
|
199 |
+
tts.save(output_filename)
|
200 |
+
logger.info(f"β
Audio saved: {output_filename}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
return output_filename
|
|
|
202 |
except Exception as e:
|
203 |
+
logger.exception(f"Audio generation error: {e}")
|
204 |
+
st.error(f"Audio generation failed: {e}", icon="π")
|
205 |
return None
|
206 |
|
207 |
async def generate_image_async(self, prompt: str, aspect_ratio: str, task_id: str) -> Optional[Image.Image]:
|
208 |
"""
|
209 |
Placeholder for image generation.
|
210 |
+
Currently logs an error and returns None.
|
211 |
+
Update this function once integrating Vertex AI SDK.
|
212 |
"""
|
213 |
logger.info(f"πΌοΈ [{task_id}] Requesting image for prompt: '{prompt[:70]}...' (Aspect Ratio: {aspect_ratio})")
|
214 |
logger.error(f"β [{task_id}] Image generation not implemented. Update required for Vertex AI.")
|
|
|
231 |
errors: List[str] = []
|
232 |
task_id = f"T{timeline_id}_S{segment.scene_id}"
|
233 |
image_path = os.path.join(temp_dir, f"{task_id}_image.png")
|
234 |
+
# Use .mp3 extension for audio generated via gTTS.
|
235 |
+
audio_path = os.path.join(temp_dir, f"{task_id}_audio.mp3")
|
236 |
video_clip = None
|
237 |
|
238 |
# Launch image and audio generation concurrently.
|
|
|
261 |
if audio_result:
|
262 |
try:
|
263 |
with open(audio_result, "rb") as ap:
|
264 |
+
st.audio(ap.read(), format="audio/mp3")
|
265 |
except Exception as e:
|
266 |
logger.warning(f"β οΈ [{task_id}] Audio preview error: {e}")
|
267 |
else:
|