jsonop

Running

App Files Files Community

sheikhed commited on Oct 14, 2024

Commit

8882e69

verified ·

1 Parent(s): c9696b8

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -28

app.py CHANGED Viewed

@@ -29,7 +29,7 @@ def get_voices():
         ("shimmer", "shimmer")
     ]
-def text_to_speech(voice, text):
     url = "https://api.openai.com/v1/audio/speech"
     headers = {
@@ -47,12 +47,17 @@ def text_to_speech(voice, text):
     if response.status_code != 200:
         return None
-    return response.content
-def upload_file(file_content, file_name):
-    files = {'fileToUpload': (file_name, file_content)}
-    data = {'reqtype': 'fileupload'}
-    response = requests.post(UPLOAD_URL, files=files, data=data)
     if response.status_code == 200:
         return response.text.strip()
@@ -74,7 +79,7 @@ def lipsync_api_call(video_url, audio_url):
         "synergizerStrength": 1
     }
-    response = requests.post(API_URL, headers=headers, json=data)
     return response.json()
 def check_job_status(job_id):
@@ -94,24 +99,20 @@ def check_job_status(job_id):
     return None
 def get_media_duration(file_path):
     cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
     result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     return float(result.stdout.strip())
-def combine_audio_video(video_path, audio_content, output_path):
-    # Save audio content to a temporary file
-    temp_audio_path = f'temp_audio_{uuid.uuid4()}.mp3'
-    with open(temp_audio_path, 'wb') as audio_file:
-        audio_file.write(audio_content)
     # Get durations of both video and audio
     video_duration = get_media_duration(video_path)
-    audio_duration = get_media_duration(temp_audio_path)
     if video_duration > audio_duration:
         # Trim video to match the audio length
         cmd = [
-            'ffmpeg', '-i', video_path, '-i', temp_audio_path,
             '-t', str(audio_duration),  # Trim video to audio duration
             '-map', '0:v', '-map', '1:a',
             '-c:v', 'copy', '-c:a', 'aac',
@@ -121,7 +122,7 @@ def combine_audio_video(video_path, audio_content, output_path):
         # Loop video if it's shorter than audio
         loop_count = int(audio_duration // video_duration) + 1  # Calculate how many times to loop
         cmd = [
-            'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', temp_audio_path,
             '-t', str(audio_duration),  # Match the duration of the final video with the audio
             '-map', '0:v', '-map', '1:a',
             '-c:v', 'copy', '-c:a', 'aac',
@@ -130,23 +131,22 @@ def combine_audio_video(video_path, audio_content, output_path):
     subprocess.run(cmd, check=True)
-    # Clean up temporary audio file
-    os.remove(temp_audio_path)
 def process_video(voice, video_url, text, progress=gr.Progress()):
     session_id = str(uuid.uuid4())  # Generate a unique session ID
     progress(0, desc="Generating speech...")
-    audio_content = text_to_speech(voice, text)
-    if not audio_content:
         return None, "Failed to generate speech audio."
-    progress(0.2, desc="Uploading audio...")
-    audio_url = upload_file(audio_content, f"audio_{session_id}.mp3")
-    if not audio_url:
-        return None, "Failed to upload audio file."
     try:
         progress(0.4, desc="Initiating lipsync...")
         job_data = lipsync_api_call(video_url, audio_url)
@@ -179,13 +179,15 @@ def process_video(voice, video_url, text, progress=gr.Progress()):
                 f.write(video_response.content)
             output_path = f"output_{session_id}.mp4"
-            combine_audio_video(video_path, audio_content, output_path)
             progress(1.0, desc="Complete!")
             return output_path, f"Used fallback method. Original error: {str(e)}"
         except Exception as fallback_error:
             return None, f"All methods failed. Error: {str(fallback_error)}"
     finally:
         # Cleanup
         if os.path.exists(f"temp_video_{session_id}.mp4"):
             os.remove(f"temp_video_{session_id}.mp4")

         ("shimmer", "shimmer")
     ]
+def text_to_speech(voice, text, session_id):
     url = "https://api.openai.com/v1/audio/speech"
     headers = {
     if response.status_code != 200:
         return None
+    # Save temporary audio file with session ID
+    audio_file_path = f'temp_voice_{session_id}.mp3'
+    with open(audio_file_path, 'wb') as audio_file:
+        audio_file.write(response.content)
+    return audio_file_path
+def upload_file(file_path):
+    with open(file_path, 'rb') as file:
+        files = {'fileToUpload': (os.path.basename(file_path), file)}
+        data = {'reqtype': 'fileupload'}
+        response = requests.post(UPLOAD_URL, files=files, data=data)
     if response.status_code == 200:
         return response.text.strip()
         "synergizerStrength": 1
     }
+    response = requests.post(API_URL, headers=headers, data=json.dumps(data))
     return response.json()
 def check_job_status(job_id):
     return None
 def get_media_duration(file_path):
+    # Fetch media duration using ffprobe
     cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
     result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     return float(result.stdout.strip())
+def combine_audio_video(video_path, audio_path, output_path):
     # Get durations of both video and audio
     video_duration = get_media_duration(video_path)
+    audio_duration = get_media_duration(audio_path)
     if video_duration > audio_duration:
         # Trim video to match the audio length
         cmd = [
+            'ffmpeg', '-i', video_path, '-i', audio_path,
             '-t', str(audio_duration),  # Trim video to audio duration
             '-map', '0:v', '-map', '1:a',
             '-c:v', 'copy', '-c:a', 'aac',
         # Loop video if it's shorter than audio
         loop_count = int(audio_duration // video_duration) + 1  # Calculate how many times to loop
         cmd = [
+            'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', audio_path,
             '-t', str(audio_duration),  # Match the duration of the final video with the audio
             '-map', '0:v', '-map', '1:a',
             '-c:v', 'copy', '-c:a', 'aac',
     subprocess.run(cmd, check=True)
 def process_video(voice, video_url, text, progress=gr.Progress()):
     session_id = str(uuid.uuid4())  # Generate a unique session ID
     progress(0, desc="Generating speech...")
+    audio_path = text_to_speech(voice, text, session_id)
+    if not audio_path:
         return None, "Failed to generate speech audio."
+    progress(0.2, desc="Processing video...")
     try:
+        progress(0.3, desc="Uploading audio...")
+        audio_url = upload_file(audio_path)
+        if not audio_url:
+            raise Exception("Failed to upload audio file")
         progress(0.4, desc="Initiating lipsync...")
         job_data = lipsync_api_call(video_url, audio_url)
                 f.write(video_response.content)
             output_path = f"output_{session_id}.mp4"
+            combine_audio_video(video_path, audio_path, output_path)
             progress(1.0, desc="Complete!")
             return output_path, f"Used fallback method. Original error: {str(e)}"
         except Exception as fallback_error:
             return None, f"All methods failed. Error: {str(fallback_error)}"
     finally:
         # Cleanup
+        if os.path.exists(audio_path):
+            os.remove(audio_path)
         if os.path.exists(f"temp_video_{session_id}.mp4"):
             os.remove(f"temp_video_{session_id}.mp4")