Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,8 +5,9 @@ import time
|
|
5 |
import subprocess
|
6 |
import gradio as gr
|
7 |
import uuid
|
|
|
|
|
8 |
from dotenv import load_dotenv
|
9 |
-
from edge_tts import Voices, speak
|
10 |
|
11 |
# Load environment variables
|
12 |
load_dotenv()
|
@@ -14,23 +15,21 @@ load_dotenv()
|
|
14 |
# API Key
|
15 |
B_KEY = os.getenv("B_KEY")
|
16 |
|
17 |
-
#
|
18 |
API_URL = os.getenv("API_URL")
|
19 |
UPLOAD_URL = os.getenv("UPLOAD_URL")
|
20 |
|
21 |
-
def get_voices():
|
22 |
-
|
23 |
-
|
24 |
-
return [(f"{v['Name']} ({v['Locale']})", v['ShortName']) for v in voices]
|
25 |
|
26 |
-
async def text_to_speech(
|
27 |
-
|
28 |
audio_file_path = f'temp_voice_{session_id}.mp3'
|
29 |
-
await
|
30 |
return audio_file_path
|
31 |
|
32 |
def upload_file(file_path):
|
33 |
-
"""Uploads a file to the specified URL."""
|
34 |
with open(file_path, 'rb') as file:
|
35 |
files = {'fileToUpload': (os.path.basename(file_path), file)}
|
36 |
data = {'reqtype': 'fileupload'}
|
@@ -41,7 +40,6 @@ def upload_file(file_path):
|
|
41 |
return None
|
42 |
|
43 |
def lipsync_api_call(video_url, audio_url):
|
44 |
-
"""Makes an API call to perform lipsync."""
|
45 |
headers = {
|
46 |
"Content-Type": "application/json",
|
47 |
"x-api-key": B_KEY
|
@@ -61,9 +59,8 @@ def lipsync_api_call(video_url, audio_url):
|
|
61 |
return response.json()
|
62 |
|
63 |
def check_job_status(job_id):
|
64 |
-
"""Checks the status of a lipsync job."""
|
65 |
headers = {"x-api-key": B_KEY}
|
66 |
-
max_attempts = 30
|
67 |
|
68 |
for _ in range(max_attempts):
|
69 |
response = requests.get(f"{API_URL}/{job_id}", headers=headers)
|
@@ -78,29 +75,31 @@ def check_job_status(job_id):
|
|
78 |
return None
|
79 |
|
80 |
def get_media_duration(file_path):
|
81 |
-
|
82 |
cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
|
83 |
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
84 |
return float(result.stdout.strip())
|
85 |
|
86 |
def combine_audio_video(video_path, audio_path, output_path):
|
87 |
-
|
88 |
video_duration = get_media_duration(video_path)
|
89 |
audio_duration = get_media_duration(audio_path)
|
90 |
|
91 |
if video_duration > audio_duration:
|
|
|
92 |
cmd = [
|
93 |
'ffmpeg', '-i', video_path, '-i', audio_path,
|
94 |
-
'-t', str(audio_duration),
|
95 |
'-map', '0:v', '-map', '1:a',
|
96 |
'-c:v', 'copy', '-c:a', 'aac',
|
97 |
'-y', output_path
|
98 |
]
|
99 |
else:
|
100 |
-
|
|
|
101 |
cmd = [
|
102 |
'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', audio_path,
|
103 |
-
'-t', str(audio_duration),
|
104 |
'-map', '0:v', '-map', '1:a',
|
105 |
'-c:v', 'copy', '-c:a', 'aac',
|
106 |
'-shortest', '-y', output_path
|
@@ -108,11 +107,10 @@ def combine_audio_video(video_path, audio_path, output_path):
|
|
108 |
|
109 |
subprocess.run(cmd, check=True)
|
110 |
|
111 |
-
async def process_video(
|
112 |
-
|
113 |
-
session_id = str(uuid.uuid4())
|
114 |
progress(0, desc="Generating speech...")
|
115 |
-
audio_path = await text_to_speech(
|
116 |
if not audio_path:
|
117 |
return None, "Failed to generate speech audio."
|
118 |
|
@@ -150,6 +148,7 @@ async def process_video(voice_name, video_url, text, progress=gr.Progress()):
|
|
150 |
except Exception as e:
|
151 |
progress(0.8, desc="Falling back to simple combination...")
|
152 |
try:
|
|
|
153 |
video_response = requests.get(video_url)
|
154 |
video_path = f"temp_video_{session_id}.mp4"
|
155 |
with open(video_path, "wb") as f:
|
@@ -162,20 +161,18 @@ async def process_video(voice_name, video_url, text, progress=gr.Progress()):
|
|
162 |
except Exception as fallback_error:
|
163 |
return None, f"All methods failed. Error: {str(fallback_error)}"
|
164 |
finally:
|
|
|
165 |
if os.path.exists(audio_path):
|
166 |
os.remove(audio_path)
|
167 |
if os.path.exists(f"temp_video_{session_id}.mp4"):
|
168 |
os.remove(f"temp_video_{session_id}.mp4")
|
169 |
|
170 |
def create_interface():
|
171 |
-
"""Creates the Gradio interface for the application."""
|
172 |
-
voices = get_voices()
|
173 |
-
|
174 |
with gr.Blocks() as app:
|
175 |
-
gr.Markdown("#
|
176 |
with gr.Row():
|
177 |
with gr.Column():
|
178 |
-
voice_dropdown = gr.Dropdown(
|
179 |
video_url_input = gr.Textbox(label="Enter Video URL")
|
180 |
text_input = gr.Textbox(label="Enter text", lines=3)
|
181 |
generate_btn = gr.Button("Generate Video")
|
@@ -184,7 +181,17 @@ def create_interface():
|
|
184 |
status_output = gr.Textbox(label="Status", interactive=False)
|
185 |
|
186 |
async def on_generate(voice_name, video_url, text):
|
187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
|
189 |
generate_btn.click(
|
190 |
fn=on_generate,
|
|
|
5 |
import subprocess
|
6 |
import gradio as gr
|
7 |
import uuid
|
8 |
+
import asyncio
|
9 |
+
import edge_tts
|
10 |
from dotenv import load_dotenv
|
|
|
11 |
|
12 |
# Load environment variables
|
13 |
load_dotenv()
|
|
|
15 |
# API Key
|
16 |
B_KEY = os.getenv("B_KEY")
|
17 |
|
18 |
+
# URL
|
19 |
API_URL = os.getenv("API_URL")
|
20 |
UPLOAD_URL = os.getenv("UPLOAD_URL")
|
21 |
|
22 |
+
async def get_voices():
|
23 |
+
voices = await edge_tts.list_voices()
|
24 |
+
return [(voice.name, voice.voice) for voice in voices]
|
|
|
25 |
|
26 |
+
async def text_to_speech(voice_id, text, session_id):
|
27 |
+
communicate = edge_tts.Communicate(text, voice_id)
|
28 |
audio_file_path = f'temp_voice_{session_id}.mp3'
|
29 |
+
await communicate.save(audio_file_path)
|
30 |
return audio_file_path
|
31 |
|
32 |
def upload_file(file_path):
|
|
|
33 |
with open(file_path, 'rb') as file:
|
34 |
files = {'fileToUpload': (os.path.basename(file_path), file)}
|
35 |
data = {'reqtype': 'fileupload'}
|
|
|
40 |
return None
|
41 |
|
42 |
def lipsync_api_call(video_url, audio_url):
|
|
|
43 |
headers = {
|
44 |
"Content-Type": "application/json",
|
45 |
"x-api-key": B_KEY
|
|
|
59 |
return response.json()
|
60 |
|
61 |
def check_job_status(job_id):
|
|
|
62 |
headers = {"x-api-key": B_KEY}
|
63 |
+
max_attempts = 30 # Limit the number of attempts
|
64 |
|
65 |
for _ in range(max_attempts):
|
66 |
response = requests.get(f"{API_URL}/{job_id}", headers=headers)
|
|
|
75 |
return None
|
76 |
|
77 |
def get_media_duration(file_path):
|
78 |
+
# Fetch media duration using ffprobe
|
79 |
cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
|
80 |
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
81 |
return float(result.stdout.strip())
|
82 |
|
83 |
def combine_audio_video(video_path, audio_path, output_path):
|
84 |
+
# Get durations of both video and audio
|
85 |
video_duration = get_media_duration(video_path)
|
86 |
audio_duration = get_media_duration(audio_path)
|
87 |
|
88 |
if video_duration > audio_duration:
|
89 |
+
# Trim video to match the audio length
|
90 |
cmd = [
|
91 |
'ffmpeg', '-i', video_path, '-i', audio_path,
|
92 |
+
'-t', str(audio_duration), # Trim video to audio duration
|
93 |
'-map', '0:v', '-map', '1:a',
|
94 |
'-c:v', 'copy', '-c:a', 'aac',
|
95 |
'-y', output_path
|
96 |
]
|
97 |
else:
|
98 |
+
# Loop video if it's shorter than audio
|
99 |
+
loop_count = int(audio_duration // video_duration) + 1 # Calculate how many times to loop
|
100 |
cmd = [
|
101 |
'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', audio_path,
|
102 |
+
'-t', str(audio_duration), # Match the duration of the final video with the audio
|
103 |
'-map', '0:v', '-map', '1:a',
|
104 |
'-c:v', 'copy', '-c:a', 'aac',
|
105 |
'-shortest', '-y', output_path
|
|
|
107 |
|
108 |
subprocess.run(cmd, check=True)
|
109 |
|
110 |
+
async def process_video(voice, video_url, text, progress=gr.Progress()):
|
111 |
+
session_id = str(uuid.uuid4()) # Generate a unique session ID
|
|
|
112 |
progress(0, desc="Generating speech...")
|
113 |
+
audio_path = await text_to_speech(voice, text, session_id)
|
114 |
if not audio_path:
|
115 |
return None, "Failed to generate speech audio."
|
116 |
|
|
|
148 |
except Exception as e:
|
149 |
progress(0.8, desc="Falling back to simple combination...")
|
150 |
try:
|
151 |
+
# Download the video from the URL
|
152 |
video_response = requests.get(video_url)
|
153 |
video_path = f"temp_video_{session_id}.mp4"
|
154 |
with open(video_path, "wb") as f:
|
|
|
161 |
except Exception as fallback_error:
|
162 |
return None, f"All methods failed. Error: {str(fallback_error)}"
|
163 |
finally:
|
164 |
+
# Cleanup
|
165 |
if os.path.exists(audio_path):
|
166 |
os.remove(audio_path)
|
167 |
if os.path.exists(f"temp_video_{session_id}.mp4"):
|
168 |
os.remove(f"temp_video_{session_id}.mp4")
|
169 |
|
170 |
def create_interface():
|
|
|
|
|
|
|
171 |
with gr.Blocks() as app:
|
172 |
+
gr.Markdown("# JSON Train")
|
173 |
with gr.Row():
|
174 |
with gr.Column():
|
175 |
+
voice_dropdown = gr.Dropdown(label="Select Voice")
|
176 |
video_url_input = gr.Textbox(label="Enter Video URL")
|
177 |
text_input = gr.Textbox(label="Enter text", lines=3)
|
178 |
generate_btn = gr.Button("Generate Video")
|
|
|
181 |
status_output = gr.Textbox(label="Status", interactive=False)
|
182 |
|
183 |
async def on_generate(voice_name, video_url, text):
|
184 |
+
voices = await get_voices()
|
185 |
+
voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
|
186 |
+
if not voice_id:
|
187 |
+
return None, "Invalid voice selected."
|
188 |
+
return await process_video(voice_id, video_url, text)
|
189 |
+
|
190 |
+
async def populate_voices():
|
191 |
+
voices = await get_voices()
|
192 |
+
return gr.Dropdown.update(choices=[v[0] for v in voices], value=voices[0][0] if voices else None)
|
193 |
+
|
194 |
+
app.load(populate_voices, outputs=[voice_dropdown])
|
195 |
|
196 |
generate_btn.click(
|
197 |
fn=on_generate,
|