jsonop

Running

App Files Files Community

sheikhed commited on Oct 11, 2024

Commit

d7e99cb

verified ·

1 Parent(s): f959be9

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -16

app.py CHANGED Viewed

@@ -30,9 +30,6 @@ def get_voices():
         return []
     return [(voice['name'], voice['voice_id']) for voice in response.json().get('voices', [])]
-def get_video_models():
-    return [f for f in os.listdir("models") if f.endswith((".mp4", ".avi", ".mov"))]
 def text_to_speech(voice_id, text, session_id):
     url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
@@ -139,7 +136,7 @@ def combine_audio_video(video_path, audio_path, output_path):
     subprocess.run(cmd, check=True)
-def process_video(voice, model, text, progress=gr.Progress()):
     session_id = str(uuid.uuid4())  # Generate a unique session ID
     progress(0, desc="Generating speech...")
     audio_path = text_to_speech(voice, text, session_id)
@@ -147,15 +144,13 @@ def process_video(voice, model, text, progress=gr.Progress()):
         return None, "Failed to generate speech audio."
     progress(0.2, desc="Processing video...")
-    video_path = os.path.join("models", model)
     try:
-        progress(0.3, desc="Uploading files...")
-        video_url = upload_file(video_path)
         audio_url = upload_file(audio_path)
-        if not video_url or not audio_url:
-            raise Exception("Failed to upload files")
         progress(0.4, desc="Initiating lipsync...")
         job_data = lipsync_api_call(video_url, audio_url)
@@ -182,6 +177,12 @@ def process_video(voice, model, text, progress=gr.Progress()):
     except Exception as e:
         progress(0.8, desc="Falling back to simple combination...")
         try:
             output_path = f"output_{session_id}.mp4"
             combine_audio_video(video_path, audio_path, output_path)
             progress(1.0, desc="Complete!")
@@ -192,32 +193,33 @@ def process_video(voice, model, text, progress=gr.Progress()):
         # Cleanup
         if os.path.exists(audio_path):
             os.remove(audio_path)
 def create_interface():
     voices = get_voices()
-    models = get_video_models()
     with gr.Blocks() as app:
         gr.Markdown("# JSON Train")
         with gr.Row():
             with gr.Column():
-                voice_dropdown = gr.Dropdown(choices=[v[0] for v in voices], label="Select", value=voices[0][0] if voices else None)
-                model_dropdown = gr.Dropdown(choices=models, label="Select", value=models[0] if models else None)
                 text_input = gr.Textbox(label="Enter text", lines=3)
                 generate_btn = gr.Button("Generate Video")
             with gr.Column():
                 video_output = gr.Video(label="Generated Video")
                 status_output = gr.Textbox(label="Status", interactive=False)
-        def on_generate(voice_name, model_name, text):
             voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
             if not voice_id:
                 return None, "Invalid voice selected."
-            return process_video(voice_id, model_name, text)
         generate_btn.click(
             fn=on_generate,
-            inputs=[voice_dropdown, model_dropdown, text_input],
             outputs=[video_output, status_output]
         )
@@ -225,4 +227,4 @@ def create_interface():
 if __name__ == "__main__":
     app = create_interface()
-    app.launch()

         return []
     return [(voice['name'], voice['voice_id']) for voice in response.json().get('voices', [])]
 def text_to_speech(voice_id, text, session_id):
     url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
     subprocess.run(cmd, check=True)
+def process_video(voice, video_url, text, progress=gr.Progress()):
     session_id = str(uuid.uuid4())  # Generate a unique session ID
     progress(0, desc="Generating speech...")
     audio_path = text_to_speech(voice, text, session_id)
         return None, "Failed to generate speech audio."
     progress(0.2, desc="Processing video...")
     try:
+        progress(0.3, desc="Uploading audio...")
         audio_url = upload_file(audio_path)
+        if not audio_url:
+            raise Exception("Failed to upload audio file")
         progress(0.4, desc="Initiating lipsync...")
         job_data = lipsync_api_call(video_url, audio_url)
     except Exception as e:
         progress(0.8, desc="Falling back to simple combination...")
         try:
+            # Download the video from the URL
+            video_response = requests.get(video_url)
+            video_path = f"temp_video_{session_id}.mp4"
+            with open(video_path, "wb") as f:
+                f.write(video_response.content)
             output_path = f"output_{session_id}.mp4"
             combine_audio_video(video_path, audio_path, output_path)
             progress(1.0, desc="Complete!")
         # Cleanup
         if os.path.exists(audio_path):
             os.remove(audio_path)
+        if os.path.exists(f"temp_video_{session_id}.mp4"):
+            os.remove(f"temp_video_{session_id}.mp4")
 def create_interface():
     voices = get_voices()
     with gr.Blocks() as app:
         gr.Markdown("# JSON Train")
         with gr.Row():
             with gr.Column():
+                voice_dropdown = gr.Dropdown(choices=[v[0] for v in voices], label="Select Voice", value=voices[0][0] if voices else None)
+                video_url_input = gr.Textbox(label="Enter Video URL")
                 text_input = gr.Textbox(label="Enter text", lines=3)
                 generate_btn = gr.Button("Generate Video")
             with gr.Column():
                 video_output = gr.Video(label="Generated Video")
                 status_output = gr.Textbox(label="Status", interactive=False)
+        def on_generate(voice_name, video_url, text):
             voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
             if not voice_id:
                 return None, "Invalid voice selected."
+            return process_video(voice_id, video_url, text)
         generate_btn.click(
             fn=on_generate,
+            inputs=[voice_dropdown, video_url_input, text_input],
             outputs=[video_output, status_output]
         )
 if __name__ == "__main__":
     app = create_interface()
+    app.launch()