jsonop

Running

App Files Files Community

sheikhed commited on Oct 14, 2024

Commit

a23ccb7

verified ·

1 Parent(s): 07d1639

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -78

app.py CHANGED Viewed

@@ -11,16 +11,14 @@ from dotenv import load_dotenv
 load_dotenv()
 # API Keys
-A_KEY = os.getenv("A_KEY")  # ElevenLabs API key
-B_KEY = os.getenv("B_KEY")  # Lipsync API key
-OPENAI_KEY = os.getenv("OPENAI_KEY")  # OpenAI API key
 # URLs
 API_URL = os.getenv("API_URL")
 UPLOAD_URL = os.getenv("UPLOAD_URL")
-OPENAI_API_URL = "https://api.openai.com/v1/audio/speech"
-def get_elevenlabs_voices():
     url = "https://api.elevenlabs.io/v1/voices"
     headers = {
         "Accept": "application/json",
@@ -32,18 +30,7 @@ def get_elevenlabs_voices():
         return []
     return [(voice['name'], voice['voice_id']) for voice in response.json().get('voices', [])]
-def get_openai_voices():
-    # OpenAI voices are predefined
-    return [
-        ("alloy", "alloy"),
-        ("echo", "echo"),
-        ("fable", "fable"),
-        ("onyx", "onyx"),
-        ("nova", "nova"),
-        ("shimmer", "shimmer")
-    ]
-def text_to_speech_elevenlabs(voice_id, text, session_id):
     url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
     headers = {
@@ -65,27 +52,7 @@ def text_to_speech_elevenlabs(voice_id, text, session_id):
     if response.status_code != 200:
         return None
-    audio_file_path = f'temp_voice_{session_id}.mp3'
-    with open(audio_file_path, 'wb') as audio_file:
-        audio_file.write(response.content)
-    return audio_file_path
-def text_to_speech_openai(voice, text, session_id):
-    headers = {
-        "Authorization": f"Bearer {OPENAI_KEY}",
-        "Content-Type": "application/json"
-    }
-    data = {
-        "model": "tts-1",
-        "input": text,
-        "voice": voice
-    }
-    response = requests.post(OPENAI_API_URL, headers=headers, json=data)
-    if response.status_code != 200:
-        return None
     audio_file_path = f'temp_voice_{session_id}.mp3'
     with open(audio_file_path, 'wb') as audio_file:
         audio_file.write(response.content)
@@ -122,7 +89,7 @@ def lipsync_api_call(video_url, audio_url):
 def check_job_status(job_id):
     headers = {"x-api-key": B_KEY}
-    max_attempts = 30
     for _ in range(max_attempts):
         response = requests.get(f"{API_URL}/{job_id}", headers=headers)
@@ -137,27 +104,31 @@ def check_job_status(job_id):
     return None
 def get_media_duration(file_path):
     cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
     result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     return float(result.stdout.strip())
 def combine_audio_video(video_path, audio_path, output_path):
     video_duration = get_media_duration(video_path)
     audio_duration = get_media_duration(audio_path)
     if video_duration > audio_duration:
         cmd = [
             'ffmpeg', '-i', video_path, '-i', audio_path,
-            '-t', str(audio_duration),
             '-map', '0:v', '-map', '1:a',
             '-c:v', 'copy', '-c:a', 'aac',
             '-y', output_path
         ]
     else:
-        loop_count = int(audio_duration // video_duration) + 1
         cmd = [
             'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', audio_path,
-            '-t', str(audio_duration),
             '-map', '0:v', '-map', '1:a',
             '-c:v', 'copy', '-c:a', 'aac',
             '-shortest', '-y', output_path
@@ -165,15 +136,10 @@ def combine_audio_video(video_path, audio_path, output_path):
     subprocess.run(cmd, check=True)
-def process_video(provider, voice, video_url, text, progress=gr.Progress()):
-    session_id = str(uuid.uuid4())
     progress(0, desc="Generating speech...")
-    if provider == "ElevenLabs":
-        audio_path = text_to_speech_elevenlabs(voice, text, session_id)
-    else:  # OpenAI
-        audio_path = text_to_speech_openai(voice, text, session_id)
     if not audio_path:
         return None, "Failed to generate speech audio."
@@ -211,6 +177,7 @@ def process_video(provider, voice, video_url, text, progress=gr.Progress()):
     except Exception as e:
         progress(0.8, desc="Falling back to simple combination...")
         try:
             video_response = requests.get(video_url)
             video_path = f"temp_video_{session_id}.mp4"
             with open(video_path, "wb") as f:
@@ -223,21 +190,20 @@ def process_video(provider, voice, video_url, text, progress=gr.Progress()):
         except Exception as fallback_error:
             return None, f"All methods failed. Error: {str(fallback_error)}"
     finally:
         if os.path.exists(audio_path):
             os.remove(audio_path)
         if os.path.exists(f"temp_video_{session_id}.mp4"):
             os.remove(f"temp_video_{session_id}.mp4")
 def create_interface():
-    elevenlabs_voices = get_elevenlabs_voices()
-    openai_voices = get_openai_voices()
     with gr.Blocks() as app:
-        gr.Markdown("# Voice Synthesis Application")
         with gr.Row():
             with gr.Column():
-                provider_dropdown = gr.Dropdown(choices=["ElevenLabs", "OpenAI"], label="Select Provider", value="ElevenLabs")
-                voice_dropdown = gr.Dropdown(choices=[v[0] for v in elevenlabs_voices], label="Select Voice", value=elevenlabs_voices[0][0] if elevenlabs_voices else None)
                 video_url_input = gr.Textbox(label="Enter Video URL")
                 text_input = gr.Textbox(label="Enter text", lines=3)
                 generate_btn = gr.Button("Generate Video")
@@ -245,32 +211,15 @@ def create_interface():
                 video_output = gr.Video(label="Generated Video")
                 status_output = gr.Textbox(label="Status", interactive=False)
-        def update_voices(provider):
-            if provider == "ElevenLabs":
-                return gr.Dropdown(choices=[v[0] for v in elevenlabs_voices], value=elevenlabs_voices[0][0] if elevenlabs_voices else None)
-            else:  # OpenAI
-                return gr.Dropdown(choices=[v[0] for v in openai_voices], value=openai_voices[0][0])
-        provider_dropdown.change(fn=update_voices, inputs=[provider_dropdown], outputs=[voice_dropdown])
-        def on_generate(provider, voice_name, video_url, text):
-            try:
-                if provider == "ElevenLabs":
-                    voices = elevenlabs_voices
-                else:  # OpenAI
-                    voices = openai_voices
-                voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
-                if not voice_id:
-                    raise ValueError(f"Invalid voice selected for {provider}: {voice_name}")
-                return process_video(provider, voice_id, video_url, text)
-            except Exception as e:
-                return None, f"Error: {str(e)}"
         generate_btn.click(
             fn=on_generate,
-            inputs=[provider_dropdown, voice_dropdown, video_url_input, text_input],
             outputs=[video_output, status_output]
         )

 load_dotenv()
 # API Keys
+A_KEY = os.getenv("A_KEY")
+B_KEY = os.getenv("B_KEY")
 # URLs
 API_URL = os.getenv("API_URL")
 UPLOAD_URL = os.getenv("UPLOAD_URL")
+def get_voices():
     url = "https://api.elevenlabs.io/v1/voices"
     headers = {
         "Accept": "application/json",
         return []
     return [(voice['name'], voice['voice_id']) for voice in response.json().get('voices', [])]
+def text_to_speech(voice_id, text, session_id):
     url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
     headers = {
     if response.status_code != 200:
         return None
+    # Save temporary audio file with session ID
     audio_file_path = f'temp_voice_{session_id}.mp3'
     with open(audio_file_path, 'wb') as audio_file:
         audio_file.write(response.content)
 def check_job_status(job_id):
     headers = {"x-api-key": B_KEY}
+    max_attempts = 30  # Limit the number of attempts
     for _ in range(max_attempts):
         response = requests.get(f"{API_URL}/{job_id}", headers=headers)
     return None
 def get_media_duration(file_path):
+    # Fetch media duration using ffprobe
     cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
     result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     return float(result.stdout.strip())
 def combine_audio_video(video_path, audio_path, output_path):
+    # Get durations of both video and audio
     video_duration = get_media_duration(video_path)
     audio_duration = get_media_duration(audio_path)
     if video_duration > audio_duration:
+        # Trim video to match the audio length
         cmd = [
             'ffmpeg', '-i', video_path, '-i', audio_path,
+            '-t', str(audio_duration),  # Trim video to audio duration
             '-map', '0:v', '-map', '1:a',
             '-c:v', 'copy', '-c:a', 'aac',
             '-y', output_path
         ]
     else:
+        # Loop video if it's shorter than audio
+        loop_count = int(audio_duration // video_duration) + 1  # Calculate how many times to loop
         cmd = [
             'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', audio_path,
+            '-t', str(audio_duration),  # Match the duration of the final video with the audio
             '-map', '0:v', '-map', '1:a',
             '-c:v', 'copy', '-c:a', 'aac',
             '-shortest', '-y', output_path
     subprocess.run(cmd, check=True)
+def process_video(voice, video_url, text, progress=gr.Progress()):
+    session_id = str(uuid.uuid4())  # Generate a unique session ID
     progress(0, desc="Generating speech...")
+    audio_path = text_to_speech(voice, text, session_id)
     if not audio_path:
         return None, "Failed to generate speech audio."
     except Exception as e:
         progress(0.8, desc="Falling back to simple combination...")
         try:
+            # Download the video from the URL
             video_response = requests.get(video_url)
             video_path = f"temp_video_{session_id}.mp4"
             with open(video_path, "wb") as f:
         except Exception as fallback_error:
             return None, f"All methods failed. Error: {str(fallback_error)}"
     finally:
+        # Cleanup
         if os.path.exists(audio_path):
             os.remove(audio_path)
         if os.path.exists(f"temp_video_{session_id}.mp4"):
             os.remove(f"temp_video_{session_id}.mp4")
 def create_interface():
+    voices = get_voices()
     with gr.Blocks() as app:
+        gr.Markdown("# JSON Train")
         with gr.Row():
             with gr.Column():
+                voice_dropdown = gr.Dropdown(choices=[v[0] for v in voices], label="Select Voice", value=voices[0][0] if voices else None)
                 video_url_input = gr.Textbox(label="Enter Video URL")
                 text_input = gr.Textbox(label="Enter text", lines=3)
                 generate_btn = gr.Button("Generate Video")
                 video_output = gr.Video(label="Generated Video")
                 status_output = gr.Textbox(label="Status", interactive=False)
+        def on_generate(voice_name, video_url, text):
+            voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
+            if not voice_id:
+                return None, "Invalid voice selected."
+            return process_video(voice_id, video_url, text)
         generate_btn.click(
             fn=on_generate,
+            inputs=[voice_dropdown, video_url_input, text_input],
             outputs=[video_output, status_output]
         )