jsonvidedgde

Sleeping

App Files Files Community

sheikhed commited on Oct 11, 2024

Commit

8a677c2

verified ·

1 Parent(s): 715a36b

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -28

app.py CHANGED Viewed

@@ -5,8 +5,9 @@ import time
 import subprocess
 import gradio as gr
 import uuid
 from dotenv import load_dotenv
-from edge_tts import Voices, speak
 # Load environment variables
 load_dotenv()
@@ -14,23 +15,21 @@ load_dotenv()
 # API Key
 B_KEY = os.getenv("B_KEY")
-# URLs
 API_URL = os.getenv("API_URL")
 UPLOAD_URL = os.getenv("UPLOAD_URL")
-def get_voices():
-    """Fetches and returns a list of available voices from Edge TTS."""
-    voices = Voices().get_voices()
-    return [(f"{v['Name']} ({v['Locale']})", v['ShortName']) for v in voices]
-async def text_to_speech(voice_name, text, session_id):
-    """Uses edge_tts to generate speech and saves it to a file."""
     audio_file_path = f'temp_voice_{session_id}.mp3'
-    await speak(text, voice_name, audio_file_path)
     return audio_file_path
 def upload_file(file_path):
-    """Uploads a file to the specified URL."""
     with open(file_path, 'rb') as file:
         files = {'fileToUpload': (os.path.basename(file_path), file)}
         data = {'reqtype': 'fileupload'}
@@ -41,7 +40,6 @@ def upload_file(file_path):
     return None
 def lipsync_api_call(video_url, audio_url):
-    """Makes an API call to perform lipsync."""
     headers = {
         "Content-Type": "application/json",
         "x-api-key": B_KEY
@@ -61,9 +59,8 @@ def lipsync_api_call(video_url, audio_url):
     return response.json()
 def check_job_status(job_id):
-    """Checks the status of a lipsync job."""
     headers = {"x-api-key": B_KEY}
-    max_attempts = 30
     for _ in range(max_attempts):
         response = requests.get(f"{API_URL}/{job_id}", headers=headers)
@@ -78,29 +75,31 @@ def check_job_status(job_id):
     return None
 def get_media_duration(file_path):
-    """Gets the duration of a media file using ffprobe."""
     cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
     result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     return float(result.stdout.strip())
 def combine_audio_video(video_path, audio_path, output_path):
-    """Combines audio and video files, handling duration differences."""
     video_duration = get_media_duration(video_path)
     audio_duration = get_media_duration(audio_path)
     if video_duration > audio_duration:
         cmd = [
             'ffmpeg', '-i', video_path, '-i', audio_path,
-            '-t', str(audio_duration),
             '-map', '0:v', '-map', '1:a',
             '-c:v', 'copy', '-c:a', 'aac',
             '-y', output_path
         ]
     else:
-        loop_count = int(audio_duration // video_duration) + 1
         cmd = [
             'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', audio_path,
-            '-t', str(audio_duration),
             '-map', '0:v', '-map', '1:a',
             '-c:v', 'copy', '-c:a', 'aac',
             '-shortest', '-y', output_path
@@ -108,11 +107,10 @@ def combine_audio_video(video_path, audio_path, output_path):
     subprocess.run(cmd, check=True)
-async def process_video(voice_name, video_url, text, progress=gr.Progress()):
-    """Main function to process the video: generate speech, lipsync, and handle results."""
-    session_id = str(uuid.uuid4())
     progress(0, desc="Generating speech...")
-    audio_path = await text_to_speech(voice_name, text, session_id)
     if not audio_path:
         return None, "Failed to generate speech audio."
@@ -150,6 +148,7 @@ async def process_video(voice_name, video_url, text, progress=gr.Progress()):
     except Exception as e:
         progress(0.8, desc="Falling back to simple combination...")
         try:
             video_response = requests.get(video_url)
             video_path = f"temp_video_{session_id}.mp4"
             with open(video_path, "wb") as f:
@@ -162,20 +161,18 @@ async def process_video(voice_name, video_url, text, progress=gr.Progress()):
         except Exception as fallback_error:
             return None, f"All methods failed. Error: {str(fallback_error)}"
     finally:
         if os.path.exists(audio_path):
             os.remove(audio_path)
         if os.path.exists(f"temp_video_{session_id}.mp4"):
             os.remove(f"temp_video_{session_id}.mp4")
 def create_interface():
-    """Creates the Gradio interface for the application."""
-    voices = get_voices()
     with gr.Blocks() as app:
-        gr.Markdown("# Lipsync Video Generator")
         with gr.Row():
             with gr.Column():
-                voice_dropdown = gr.Dropdown(choices=[v[0] for v in voices], label="Select Voice", value=voices[0][0] if voices else None)
                 video_url_input = gr.Textbox(label="Enter Video URL")
                 text_input = gr.Textbox(label="Enter text", lines=3)
                 generate_btn = gr.Button("Generate Video")
@@ -184,7 +181,17 @@ def create_interface():
                 status_output = gr.Textbox(label="Status", interactive=False)
         async def on_generate(voice_name, video_url, text):
-            return await process_video(voice_name, video_url, text)
         generate_btn.click(
             fn=on_generate,

 import subprocess
 import gradio as gr
 import uuid
+import asyncio
+import edge_tts
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
 # API Key
 B_KEY = os.getenv("B_KEY")
+# URL
 API_URL = os.getenv("API_URL")
 UPLOAD_URL = os.getenv("UPLOAD_URL")
+async def get_voices():
+    voices = await edge_tts.list_voices()
+    return [(voice.name, voice.voice) for voice in voices]
+async def text_to_speech(voice_id, text, session_id):
+    communicate = edge_tts.Communicate(text, voice_id)
     audio_file_path = f'temp_voice_{session_id}.mp3'
+    await communicate.save(audio_file_path)
     return audio_file_path
 def upload_file(file_path):
     with open(file_path, 'rb') as file:
         files = {'fileToUpload': (os.path.basename(file_path), file)}
         data = {'reqtype': 'fileupload'}
     return None
 def lipsync_api_call(video_url, audio_url):
     headers = {
         "Content-Type": "application/json",
         "x-api-key": B_KEY
     return response.json()
 def check_job_status(job_id):
     headers = {"x-api-key": B_KEY}
+    max_attempts = 30  # Limit the number of attempts
     for _ in range(max_attempts):
         response = requests.get(f"{API_URL}/{job_id}", headers=headers)
     return None
 def get_media_duration(file_path):
+    # Fetch media duration using ffprobe
     cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
     result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     return float(result.stdout.strip())
 def combine_audio_video(video_path, audio_path, output_path):
+    # Get durations of both video and audio
     video_duration = get_media_duration(video_path)
     audio_duration = get_media_duration(audio_path)
     if video_duration > audio_duration:
+        # Trim video to match the audio length
         cmd = [
             'ffmpeg', '-i', video_path, '-i', audio_path,
+            '-t', str(audio_duration),  # Trim video to audio duration
             '-map', '0:v', '-map', '1:a',
             '-c:v', 'copy', '-c:a', 'aac',
             '-y', output_path
         ]
     else:
+        # Loop video if it's shorter than audio
+        loop_count = int(audio_duration // video_duration) + 1  # Calculate how many times to loop
         cmd = [
             'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', audio_path,
+            '-t', str(audio_duration),  # Match the duration of the final video with the audio
             '-map', '0:v', '-map', '1:a',
             '-c:v', 'copy', '-c:a', 'aac',
             '-shortest', '-y', output_path
     subprocess.run(cmd, check=True)
+async def process_video(voice, video_url, text, progress=gr.Progress()):
+    session_id = str(uuid.uuid4())  # Generate a unique session ID
     progress(0, desc="Generating speech...")
+    audio_path = await text_to_speech(voice, text, session_id)
     if not audio_path:
         return None, "Failed to generate speech audio."
     except Exception as e:
         progress(0.8, desc="Falling back to simple combination...")
         try:
+            # Download the video from the URL
             video_response = requests.get(video_url)
             video_path = f"temp_video_{session_id}.mp4"
             with open(video_path, "wb") as f:
         except Exception as fallback_error:
             return None, f"All methods failed. Error: {str(fallback_error)}"
     finally:
+        # Cleanup
         if os.path.exists(audio_path):
             os.remove(audio_path)
         if os.path.exists(f"temp_video_{session_id}.mp4"):
             os.remove(f"temp_video_{session_id}.mp4")
 def create_interface():
     with gr.Blocks() as app:
+        gr.Markdown("# JSON Train")
         with gr.Row():
             with gr.Column():
+                voice_dropdown = gr.Dropdown(label="Select Voice")
                 video_url_input = gr.Textbox(label="Enter Video URL")
                 text_input = gr.Textbox(label="Enter text", lines=3)
                 generate_btn = gr.Button("Generate Video")
                 status_output = gr.Textbox(label="Status", interactive=False)
         async def on_generate(voice_name, video_url, text):
+            voices = await get_voices()
+            voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
+            if not voice_id:
+                return None, "Invalid voice selected."
+            return await process_video(voice_id, video_url, text)
+        async def populate_voices():
+            voices = await get_voices()
+            return gr.Dropdown.update(choices=[v[0] for v in voices], value=voices[0][0] if voices else None)
+        app.load(populate_voices, outputs=[voice_dropdown])
         generate_btn.click(
             fn=on_generate,