seawolf2357 commited on
Commit
df2ef41
·
verified ·
1 Parent(s): af5108f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -6
app.py CHANGED
@@ -121,6 +121,11 @@ def interpolate_frames(video_path, target_fps=30):
121
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
122
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
123
 
 
 
 
 
 
124
  print(f"Original FPS: {original_fps}, Target FPS: {target_fps}")
125
 
126
  # If target FPS is not higher, return original
@@ -204,6 +209,25 @@ except Exception as e:
204
  # Invert the emo_map for easy lookup from the dropdown value
205
  emo_name_to_id = {v: k for k, v in emo_map.items()}
206
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  # --- Core Generation Function ---
208
  @spaces.GPU(duration=180) # Increased duration for smoothing and interpolation
209
  def generate_motion(source_image_path, driving_audio_path, emotion_name,
@@ -227,6 +251,15 @@ def generate_motion(source_image_path, driving_audio_path, emotion_name,
227
  raise gr.Error("Please upload a source image.")
228
  if driving_audio_path is None:
229
  raise gr.Error("Please upload a driving audio file.")
 
 
 
 
 
 
 
 
 
230
 
231
  start_time = time.time()
232
 
@@ -341,7 +374,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 960px
341
 
342
  with gr.Row():
343
  driving_audio = gr.Audio(
344
- label="Driving Audio",
345
  type="filepath",
346
  value="src/examples/driving_audios/5.wav"
347
  )
@@ -352,7 +385,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 960px
352
  emotion_dropdown = gr.Dropdown(
353
  label="Emotion",
354
  choices=list(emo_map.values()),
355
- value="Neutral",
356
  info="Select an emotion for more natural facial expressions"
357
  )
358
 
@@ -371,7 +404,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 960px
371
  with gr.Row():
372
  smooth_checkbox = gr.Checkbox(
373
  label="Enable Smoothing (Experimental)",
374
- value=True, # Changed to False due to CUDA issues
375
  info="May cause errors on some systems. If errors occur, disable this option."
376
  )
377
 
@@ -400,8 +433,9 @@ with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 960px
400
  <b>Tips for best results:</b><br>
401
  • Use high-quality front-facing images<br>
402
  • Clear audio without background noise<br>
403
- Enable smoothing for natural motion<br>
404
- • Adjust CFG scale if motion seems stiff
 
405
  </p>
406
  </div>
407
  """
@@ -415,10 +449,16 @@ with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 960px
415
  Users are solely liable for their actions while using this generative model.
416
 
417
  ### 🚀 **Enhancement Features**
418
- - **Frame Smoothing**: Reduces jitter and improves transition between frames
419
  - **Frame Interpolation**: Increases FPS for smoother motion
420
  - **Optimized Audio Processing**: Better lip-sync with 24kHz sampling
421
  - **Fine-tuned CFG Scale**: Better control over motion naturalness
 
 
 
 
 
 
422
  """
423
  )
424
 
 
121
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
122
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
123
 
124
+ # Fix for FPS detection issue
125
+ if original_fps == 0 or original_fps is None:
126
+ print("Warning: Could not detect original FPS. Assuming 25 FPS.")
127
+ original_fps = 25.0
128
+
129
  print(f"Original FPS: {original_fps}, Target FPS: {target_fps}")
130
 
131
  # If target FPS is not higher, return original
 
209
  # Invert the emo_map for easy lookup from the dropdown value
210
  emo_name_to_id = {v: k for k, v in emo_map.items()}
211
 
212
+ # --- Audio Length Check Function ---
213
+ def check_audio_length(audio_path):
214
+ """
215
+ Check the length of an audio file and warn if it's too long.
216
+
217
+ Args:
218
+ audio_path: Path to the audio file
219
+
220
+ Returns:
221
+ Duration in seconds
222
+ """
223
+ try:
224
+ audio = AudioSegment.from_file(audio_path)
225
+ duration_seconds = len(audio) / 1000.0
226
+ return duration_seconds
227
+ except Exception as e:
228
+ print(f"Error checking audio length: {e}")
229
+ return None
230
+
231
  # --- Core Generation Function ---
232
  @spaces.GPU(duration=180) # Increased duration for smoothing and interpolation
233
  def generate_motion(source_image_path, driving_audio_path, emotion_name,
 
251
  raise gr.Error("Please upload a source image.")
252
  if driving_audio_path is None:
253
  raise gr.Error("Please upload a driving audio file.")
254
+
255
+ # Check audio length
256
+ audio_duration = check_audio_length(driving_audio_path)
257
+ if audio_duration:
258
+ print(f"Audio duration: {audio_duration:.1f} seconds")
259
+ if audio_duration > 60:
260
+ gr.Warning(f"⚠️ Audio is {audio_duration:.1f} seconds long. MoDA works best with audio under 60 seconds. Processing may be slow and quality may degrade.")
261
+ if audio_duration > 180:
262
+ raise gr.Error("Audio is too long. Please use audio files under 3 minutes (180 seconds) for best results.")
263
 
264
  start_time = time.time()
265
 
 
374
 
375
  with gr.Row():
376
  driving_audio = gr.Audio(
377
+ label="Driving Audio (Recommended: < 60 seconds)",
378
  type="filepath",
379
  value="src/examples/driving_audios/5.wav"
380
  )
 
385
  emotion_dropdown = gr.Dropdown(
386
  label="Emotion",
387
  choices=list(emo_map.values()),
388
+ value="None",
389
  info="Select an emotion for more natural facial expressions"
390
  )
391
 
 
404
  with gr.Row():
405
  smooth_checkbox = gr.Checkbox(
406
  label="Enable Smoothing (Experimental)",
407
+ value=False, # Changed to False due to CUDA issues
408
  info="May cause errors on some systems. If errors occur, disable this option."
409
  )
410
 
 
433
  <b>Tips for best results:</b><br>
434
  • Use high-quality front-facing images<br>
435
  • Clear audio without background noise<br>
436
+ <b>Keep audio under 60 seconds</b><br>
437
+ • Adjust CFG scale if motion seems stiff<br>
438
+ • For longer audio, split into segments
439
  </p>
440
  </div>
441
  """
 
449
  Users are solely liable for their actions while using this generative model.
450
 
451
  ### 🚀 **Enhancement Features**
452
+ - **Frame Smoothing**: Reduces jitter and improves transition between frames (currently experimental)
453
  - **Frame Interpolation**: Increases FPS for smoother motion
454
  - **Optimized Audio Processing**: Better lip-sync with 24kHz sampling
455
  - **Fine-tuned CFG Scale**: Better control over motion naturalness
456
+
457
+ ### ⏱️ **Audio Length Limitations**
458
+ - **Optimal**: Under 30 seconds for best quality and speed
459
+ - **Recommended**: Under 60 seconds
460
+ - **Maximum**: 180 seconds (3 minutes) - very slow processing
461
+ - For longer content, consider splitting audio into segments
462
  """
463
  )
464