jsonop

Running

App Files Files Community

sheikhed commited on Oct 15, 2024

Commit

a3e352f

verified ·

1 Parent(s): b2b31f2

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -68

app.py CHANGED Viewed

@@ -1,12 +1,11 @@
-import os
 import requests
 import json
 import time
 import subprocess
 import gradio as gr
 import uuid
 from dotenv import load_dotenv
-from urllib.parse import urlparse
 # Load environment variables
 load_dotenv()
@@ -32,24 +31,24 @@ def get_voices():
 def text_to_speech(voice, text, session_id):
     url = "https://api.openai.com/v1/audio/speech"
     headers = {
         "Authorization": f"Bearer {OPENAI_API_KEY}",
         "Content-Type": "application/json"
     }
     data = {
         "model": "tts-1",
         "input": text,
         "voice": voice
     }
     response = requests.post(url, json=data, headers=headers)
     if response.status_code != 200:
         return None
     # Save temporary audio file with session ID
-    audio_file_path = f'temp_voice_{session_id}.mp3'
     with open(audio_file_path, 'wb') as audio_file:
         audio_file.write(response.content)
     return audio_file_path
@@ -59,7 +58,7 @@ def upload_file(file_path):
         files = {'fileToUpload': (os.path.basename(file_path), file)}
         data = {'reqtype': 'fileupload'}
         response = requests.post(UPLOAD_URL, files=files, data=data)
     if response.status_code == 200:
         return response.text.strip()
     return None
@@ -69,7 +68,7 @@ def lipsync_api_call(video_url, audio_url):
         "Content-Type": "application/json",
         "x-api-key": B_KEY
     }
     data = {
         "audioUrl": audio_url,
         "videoUrl": video_url,
@@ -79,23 +78,23 @@ def lipsync_api_call(video_url, audio_url):
         "pads": [0, 5, 0, 0],
         "synergizerStrength": 1
     }
     response = requests.post(API_URL, headers=headers, data=json.dumps(data))
     return response.json()
 def check_job_status(job_id):
     headers = {"x-api-key": B_KEY}
     max_attempts = 30  # Limit the number of attempts
     for _ in range(max_attempts):
         response = requests.get(f"{API_URL}/{job_id}", headers=headers)
         data = response.json()
         if data["status"] == "COMPLETED":
             return data["videoUrl"]
         elif data["status"] == "FAILED":
             return None
         time.sleep(10)
     return None
@@ -132,95 +131,88 @@ def combine_audio_video(video_path, audio_path, output_path):
     subprocess.run(cmd, check=True)
-def is_image_url(url):
-    parsed = urlparse(url)
-    path = parsed.path.lower()
-    return path.endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))
-def create_video_from_image(image_url, output_path, duration=10):
     # Download the image
     response = requests.get(image_url)
-    if response.status_code != 200:
-        raise Exception("Failed to download the image")
-    temp_image_path = f"temp_image_{uuid.uuid4()}.jpg"
-    with open(temp_image_path, 'wb') as f:
         f.write(response.content)
     # Create a 10-second video from the image
     cmd = [
-        'ffmpeg', '-loop', '1', '-i', temp_image_path,
-        '-c:v', 'libx264', '-t', str(duration), '-pix_fmt', 'yuv420p',
-        '-vf', 'scale=1920:1080:force_original_aspect_ratio=decrease,pad=1920:1080:(ow-iw)/2:(oh-ih)/2',
-        '-y', output_path
     ]
     subprocess.run(cmd, check=True)
     # Clean up the temporary image file
-    os.remove(temp_image_path)
-    return output_path
-def process_video(voice, media_url, text, progress=gr.Progress()):
-    session_id = str(uuid.uuid4())
     progress(0, desc="Generating speech...")
     audio_path = text_to_speech(voice, text, session_id)
     if not audio_path:
         return None, "Failed to generate speech audio."
     progress(0.2, desc="Processing media...")
     try:
-        if is_image_url(media_url):
             progress(0.3, desc="Converting image to video...")
-            video_path = f"temp_video_{session_id}.mp4"
-            create_video_from_image(media_url, video_path)
-            progress(0.4, desc="Uploading converted video...")
             video_url = upload_file(video_path)
-            if not video_url:
-                raise Exception("Failed to upload converted video")
         else:
-            video_url = media_url
-        progress(0.5, desc="Uploading audio...")
         audio_url = upload_file(audio_path)
-        if not audio_url:
-            raise Exception("Failed to upload audio file")
-        progress(0.6, desc="Initiating lipsync...")
         job_data = lipsync_api_call(video_url, audio_url)
         if "error" in job_data or "message" in job_data:
             raise Exception(job_data.get("error", job_data.get("message", "Unknown error")))
         job_id = job_data["id"]
-        progress(0.7, desc="Processing lipsync...")
         result_url = check_job_status(job_id)
         if result_url:
             progress(0.9, desc="Downloading result...")
             response = requests.get(result_url)
-            output_path = f"output_{session_id}.mp4"
             with open(output_path, "wb") as f:
                 f.write(response.content)
             progress(1.0, desc="Complete!")
             return output_path, "Lipsync completed successfully!"
         else:
             raise Exception("Lipsync processing failed or timed out")
     except Exception as e:
         progress(0.8, desc="Falling back to simple combination...")
         try:
             if 'video_path' not in locals():
                 # Download the video from the URL if it wasn't created from an image
                 video_response = requests.get(video_url)
-                video_path = f"temp_video_{session_id}.mp4"
                 with open(video_path, "wb") as f:
                     f.write(video_response.content)
-            output_path = f"output_{session_id}.mp4"
             combine_audio_video(video_path, audio_path, output_path)
             progress(1.0, desc="Complete!")
             return output_path, f"Used fallback method. Original error: {str(e)}"
@@ -230,36 +222,36 @@ def process_video(voice, media_url, text, progress=gr.Progress()):
         # Cleanup
         if os.path.exists(audio_path):
             os.remove(audio_path)
-        if os.path.exists(f"temp_video_{session_id}.mp4"):
-            os.remove(f"temp_video_{session_id}.mp4")
 def create_interface():
     voices = get_voices()
     with gr.Blocks() as app:
         gr.Markdown("# Lipsync Video Generator")
         with gr.Row():
             with gr.Column():
                 voice_dropdown = gr.Dropdown(choices=[v[0] for v in voices], label="Select Voice", value=voices[0][0] if voices else None)
-                media_url_input = gr.Textbox(label="Enter Video or Image URL")
                 text_input = gr.Textbox(label="Enter text", lines=3)
                 generate_btn = gr.Button("Generate Video")
             with gr.Column():
                 video_output = gr.Video(label="Generated Video")
                 status_output = gr.Textbox(label="Status", interactive=False)
-        def on_generate(voice_name, media_url, text):
             voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
             if not voice_id:
                 return None, "Invalid voice selected."
-            return process_video(voice_id, media_url, text)
         generate_btn.click(
             fn=on_generate,
-            inputs=[voice_dropdown, media_url_input, text_input],
             outputs=[video_output, status_output]
         )
     return app
 if __name__ == "__main__":

 import requests
 import json
 import time
 import subprocess
 import gradio as gr
 import uuid
+import os
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
 def text_to_speech(voice, text, session_id):
     url = "https://api.openai.com/v1/audio/speech"
     headers = {
         "Authorization": f"Bearer {OPENAI_API_KEY}",
         "Content-Type": "application/json"
     }
     data = {
         "model": "tts-1",
         "input": text,
         "voice": voice
     }
     response = requests.post(url, json=data, headers=headers)
     if response.status_code != 200:
         return None
     # Save temporary audio file with session ID
+    audio_file_path = f'tempvoice{session_id}.mp3'
     with open(audio_file_path, 'wb') as audio_file:
         audio_file.write(response.content)
     return audio_file_path
         files = {'fileToUpload': (os.path.basename(file_path), file)}
         data = {'reqtype': 'fileupload'}
         response = requests.post(UPLOAD_URL, files=files, data=data)
     if response.status_code == 200:
         return response.text.strip()
     return None
         "Content-Type": "application/json",
         "x-api-key": B_KEY
     }
     data = {
         "audioUrl": audio_url,
         "videoUrl": video_url,
         "pads": [0, 5, 0, 0],
         "synergizerStrength": 1
     }
     response = requests.post(API_URL, headers=headers, data=json.dumps(data))
     return response.json()
 def check_job_status(job_id):
     headers = {"x-api-key": B_KEY}
     max_attempts = 30  # Limit the number of attempts
     for _ in range(max_attempts):
         response = requests.get(f"{API_URL}/{job_id}", headers=headers)
         data = response.json()
         if data["status"] == "COMPLETED":
             return data["videoUrl"]
         elif data["status"] == "FAILED":
             return None
         time.sleep(10)
     return None
     subprocess.run(cmd, check=True)
+def create_video_from_image(image_url, session_id):
     # Download the image
     response = requests.get(image_url)
+    image_path = f"tempimage{session_id}.jpg"
+    with open(image_path, "wb") as f:
         f.write(response.content)
     # Create a 10-second video from the image
+    video_path = f"tempvideo{session_id}.mp4"
     cmd = [
+        'ffmpeg', '-loop', '1', '-i', image_path,
+        '-c:v', 'libx264', '-t', '10', '-pix_fmt', 'yuv420p',
+        '-vf', 'scale=1280:720', # Adjust resolution as needed
+        video_path
     ]
     subprocess.run(cmd, check=True)
     # Clean up the temporary image file
+    os.remove(image_path)
+    return video_path
+def process_video(voice, url, text, progress=gr.Progress()):
+    session_id = str(uuid.uuid4())  # Generate a unique session ID
     progress(0, desc="Generating speech...")
     audio_path = text_to_speech(voice, text, session_id)
     if not audio_path:
         return None, "Failed to generate speech audio."
     progress(0.2, desc="Processing media...")
     try:
+        # Check if the URL is an image
+        response = requests.head(url)
+        content_type = response.headers.get('Content-Type', '')
+        if content_type.startswith('image'):
             progress(0.3, desc="Converting image to video...")
+            video_path = create_video_from_image(url, session_id)
             video_url = upload_file(video_path)
         else:
+            video_url = url
+        progress(0.4, desc="Uploading audio...")
         audio_url = upload_file(audio_path)
+        if not audio_url or not video_url:
+            raise Exception("Failed to upload audio or video file")
+        progress(0.5, desc="Initiating lipsync...")
         job_data = lipsync_api_call(video_url, audio_url)
         if "error" in job_data or "message" in job_data:
             raise Exception(job_data.get("error", job_data.get("message", "Unknown error")))
         job_id = job_data["id"]
+        progress(0.6, desc="Processing lipsync...")
         result_url = check_job_status(job_id)
         if result_url:
             progress(0.9, desc="Downloading result...")
             response = requests.get(result_url)
+            output_path = f"output{session_id}.mp4"
             with open(output_path, "wb") as f:
                 f.write(response.content)
             progress(1.0, desc="Complete!")
             return output_path, "Lipsync completed successfully!"
         else:
             raise Exception("Lipsync processing failed or timed out")
     except Exception as e:
         progress(0.8, desc="Falling back to simple combination...")
         try:
             if 'video_path' not in locals():
                 # Download the video from the URL if it wasn't created from an image
                 video_response = requests.get(video_url)
+                video_path = f"tempvideo{session_id}.mp4"
                 with open(video_path, "wb") as f:
                     f.write(video_response.content)
+            output_path = f"output{session_id}.mp4"
             combine_audio_video(video_path, audio_path, output_path)
             progress(1.0, desc="Complete!")
             return output_path, f"Used fallback method. Original error: {str(e)}"
         # Cleanup
         if os.path.exists(audio_path):
             os.remove(audio_path)
+        if os.path.exists(f"tempvideo{session_id}.mp4"):
+            os.remove(f"tempvideo{session_id}.mp4")
 def create_interface():
     voices = get_voices()
     with gr.Blocks() as app:
         gr.Markdown("# Lipsync Video Generator")
         with gr.Row():
             with gr.Column():
                 voice_dropdown = gr.Dropdown(choices=[v[0] for v in voices], label="Select Voice", value=voices[0][0] if voices else None)
+                url_input = gr.Textbox(label="Enter Video or Image URL")
                 text_input = gr.Textbox(label="Enter text", lines=3)
                 generate_btn = gr.Button("Generate Video")
             with gr.Column():
                 video_output = gr.Video(label="Generated Video")
                 status_output = gr.Textbox(label="Status", interactive=False)
+        def on_generate(voice_name, url, text):
             voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
             if not voice_id:
                 return None, "Invalid voice selected."
+            return process_video(voice_id, url, text)
         generate_btn.click(
             fn=on_generate,
+            inputs=[voice_dropdown, url_input, text_input],
             outputs=[video_output, status_output]
         )
     return app
 if __name__ == "__main__":