sheikhed commited on
Commit
8a677c2
·
verified ·
1 Parent(s): 715a36b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -28
app.py CHANGED
@@ -5,8 +5,9 @@ import time
5
  import subprocess
6
  import gradio as gr
7
  import uuid
 
 
8
  from dotenv import load_dotenv
9
- from edge_tts import Voices, speak
10
 
11
  # Load environment variables
12
  load_dotenv()
@@ -14,23 +15,21 @@ load_dotenv()
14
  # API Key
15
  B_KEY = os.getenv("B_KEY")
16
 
17
- # URLs
18
  API_URL = os.getenv("API_URL")
19
  UPLOAD_URL = os.getenv("UPLOAD_URL")
20
 
21
- def get_voices():
22
- """Fetches and returns a list of available voices from Edge TTS."""
23
- voices = Voices().get_voices()
24
- return [(f"{v['Name']} ({v['Locale']})", v['ShortName']) for v in voices]
25
 
26
- async def text_to_speech(voice_name, text, session_id):
27
- """Uses edge_tts to generate speech and saves it to a file."""
28
  audio_file_path = f'temp_voice_{session_id}.mp3'
29
- await speak(text, voice_name, audio_file_path)
30
  return audio_file_path
31
 
32
  def upload_file(file_path):
33
- """Uploads a file to the specified URL."""
34
  with open(file_path, 'rb') as file:
35
  files = {'fileToUpload': (os.path.basename(file_path), file)}
36
  data = {'reqtype': 'fileupload'}
@@ -41,7 +40,6 @@ def upload_file(file_path):
41
  return None
42
 
43
  def lipsync_api_call(video_url, audio_url):
44
- """Makes an API call to perform lipsync."""
45
  headers = {
46
  "Content-Type": "application/json",
47
  "x-api-key": B_KEY
@@ -61,9 +59,8 @@ def lipsync_api_call(video_url, audio_url):
61
  return response.json()
62
 
63
  def check_job_status(job_id):
64
- """Checks the status of a lipsync job."""
65
  headers = {"x-api-key": B_KEY}
66
- max_attempts = 30
67
 
68
  for _ in range(max_attempts):
69
  response = requests.get(f"{API_URL}/{job_id}", headers=headers)
@@ -78,29 +75,31 @@ def check_job_status(job_id):
78
  return None
79
 
80
  def get_media_duration(file_path):
81
- """Gets the duration of a media file using ffprobe."""
82
  cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
83
  result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
84
  return float(result.stdout.strip())
85
 
86
  def combine_audio_video(video_path, audio_path, output_path):
87
- """Combines audio and video files, handling duration differences."""
88
  video_duration = get_media_duration(video_path)
89
  audio_duration = get_media_duration(audio_path)
90
 
91
  if video_duration > audio_duration:
 
92
  cmd = [
93
  'ffmpeg', '-i', video_path, '-i', audio_path,
94
- '-t', str(audio_duration),
95
  '-map', '0:v', '-map', '1:a',
96
  '-c:v', 'copy', '-c:a', 'aac',
97
  '-y', output_path
98
  ]
99
  else:
100
- loop_count = int(audio_duration // video_duration) + 1
 
101
  cmd = [
102
  'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', audio_path,
103
- '-t', str(audio_duration),
104
  '-map', '0:v', '-map', '1:a',
105
  '-c:v', 'copy', '-c:a', 'aac',
106
  '-shortest', '-y', output_path
@@ -108,11 +107,10 @@ def combine_audio_video(video_path, audio_path, output_path):
108
 
109
  subprocess.run(cmd, check=True)
110
 
111
- async def process_video(voice_name, video_url, text, progress=gr.Progress()):
112
- """Main function to process the video: generate speech, lipsync, and handle results."""
113
- session_id = str(uuid.uuid4())
114
  progress(0, desc="Generating speech...")
115
- audio_path = await text_to_speech(voice_name, text, session_id)
116
  if not audio_path:
117
  return None, "Failed to generate speech audio."
118
 
@@ -150,6 +148,7 @@ async def process_video(voice_name, video_url, text, progress=gr.Progress()):
150
  except Exception as e:
151
  progress(0.8, desc="Falling back to simple combination...")
152
  try:
 
153
  video_response = requests.get(video_url)
154
  video_path = f"temp_video_{session_id}.mp4"
155
  with open(video_path, "wb") as f:
@@ -162,20 +161,18 @@ async def process_video(voice_name, video_url, text, progress=gr.Progress()):
162
  except Exception as fallback_error:
163
  return None, f"All methods failed. Error: {str(fallback_error)}"
164
  finally:
 
165
  if os.path.exists(audio_path):
166
  os.remove(audio_path)
167
  if os.path.exists(f"temp_video_{session_id}.mp4"):
168
  os.remove(f"temp_video_{session_id}.mp4")
169
 
170
  def create_interface():
171
- """Creates the Gradio interface for the application."""
172
- voices = get_voices()
173
-
174
  with gr.Blocks() as app:
175
- gr.Markdown("# Lipsync Video Generator")
176
  with gr.Row():
177
  with gr.Column():
178
- voice_dropdown = gr.Dropdown(choices=[v[0] for v in voices], label="Select Voice", value=voices[0][0] if voices else None)
179
  video_url_input = gr.Textbox(label="Enter Video URL")
180
  text_input = gr.Textbox(label="Enter text", lines=3)
181
  generate_btn = gr.Button("Generate Video")
@@ -184,7 +181,17 @@ def create_interface():
184
  status_output = gr.Textbox(label="Status", interactive=False)
185
 
186
  async def on_generate(voice_name, video_url, text):
187
- return await process_video(voice_name, video_url, text)
 
 
 
 
 
 
 
 
 
 
188
 
189
  generate_btn.click(
190
  fn=on_generate,
 
5
  import subprocess
6
  import gradio as gr
7
  import uuid
8
+ import asyncio
9
+ import edge_tts
10
  from dotenv import load_dotenv
 
11
 
12
  # Load environment variables
13
  load_dotenv()
 
15
  # API Key
16
  B_KEY = os.getenv("B_KEY")
17
 
18
+ # URL
19
  API_URL = os.getenv("API_URL")
20
  UPLOAD_URL = os.getenv("UPLOAD_URL")
21
 
22
+ async def get_voices():
23
+ voices = await edge_tts.list_voices()
24
+ return [(voice.name, voice.voice) for voice in voices]
 
25
 
26
+ async def text_to_speech(voice_id, text, session_id):
27
+ communicate = edge_tts.Communicate(text, voice_id)
28
  audio_file_path = f'temp_voice_{session_id}.mp3'
29
+ await communicate.save(audio_file_path)
30
  return audio_file_path
31
 
32
  def upload_file(file_path):
 
33
  with open(file_path, 'rb') as file:
34
  files = {'fileToUpload': (os.path.basename(file_path), file)}
35
  data = {'reqtype': 'fileupload'}
 
40
  return None
41
 
42
  def lipsync_api_call(video_url, audio_url):
 
43
  headers = {
44
  "Content-Type": "application/json",
45
  "x-api-key": B_KEY
 
59
  return response.json()
60
 
61
  def check_job_status(job_id):
 
62
  headers = {"x-api-key": B_KEY}
63
+ max_attempts = 30 # Limit the number of attempts
64
 
65
  for _ in range(max_attempts):
66
  response = requests.get(f"{API_URL}/{job_id}", headers=headers)
 
75
  return None
76
 
77
  def get_media_duration(file_path):
78
+ # Fetch media duration using ffprobe
79
  cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
80
  result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
81
  return float(result.stdout.strip())
82
 
83
  def combine_audio_video(video_path, audio_path, output_path):
84
+ # Get durations of both video and audio
85
  video_duration = get_media_duration(video_path)
86
  audio_duration = get_media_duration(audio_path)
87
 
88
  if video_duration > audio_duration:
89
+ # Trim video to match the audio length
90
  cmd = [
91
  'ffmpeg', '-i', video_path, '-i', audio_path,
92
+ '-t', str(audio_duration), # Trim video to audio duration
93
  '-map', '0:v', '-map', '1:a',
94
  '-c:v', 'copy', '-c:a', 'aac',
95
  '-y', output_path
96
  ]
97
  else:
98
+ # Loop video if it's shorter than audio
99
+ loop_count = int(audio_duration // video_duration) + 1 # Calculate how many times to loop
100
  cmd = [
101
  'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', audio_path,
102
+ '-t', str(audio_duration), # Match the duration of the final video with the audio
103
  '-map', '0:v', '-map', '1:a',
104
  '-c:v', 'copy', '-c:a', 'aac',
105
  '-shortest', '-y', output_path
 
107
 
108
  subprocess.run(cmd, check=True)
109
 
110
+ async def process_video(voice, video_url, text, progress=gr.Progress()):
111
+ session_id = str(uuid.uuid4()) # Generate a unique session ID
 
112
  progress(0, desc="Generating speech...")
113
+ audio_path = await text_to_speech(voice, text, session_id)
114
  if not audio_path:
115
  return None, "Failed to generate speech audio."
116
 
 
148
  except Exception as e:
149
  progress(0.8, desc="Falling back to simple combination...")
150
  try:
151
+ # Download the video from the URL
152
  video_response = requests.get(video_url)
153
  video_path = f"temp_video_{session_id}.mp4"
154
  with open(video_path, "wb") as f:
 
161
  except Exception as fallback_error:
162
  return None, f"All methods failed. Error: {str(fallback_error)}"
163
  finally:
164
+ # Cleanup
165
  if os.path.exists(audio_path):
166
  os.remove(audio_path)
167
  if os.path.exists(f"temp_video_{session_id}.mp4"):
168
  os.remove(f"temp_video_{session_id}.mp4")
169
 
170
  def create_interface():
 
 
 
171
  with gr.Blocks() as app:
172
+ gr.Markdown("# JSON Train")
173
  with gr.Row():
174
  with gr.Column():
175
+ voice_dropdown = gr.Dropdown(label="Select Voice")
176
  video_url_input = gr.Textbox(label="Enter Video URL")
177
  text_input = gr.Textbox(label="Enter text", lines=3)
178
  generate_btn = gr.Button("Generate Video")
 
181
  status_output = gr.Textbox(label="Status", interactive=False)
182
 
183
  async def on_generate(voice_name, video_url, text):
184
+ voices = await get_voices()
185
+ voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
186
+ if not voice_id:
187
+ return None, "Invalid voice selected."
188
+ return await process_video(voice_id, video_url, text)
189
+
190
+ async def populate_voices():
191
+ voices = await get_voices()
192
+ return gr.Dropdown.update(choices=[v[0] for v in voices], value=voices[0][0] if voices else None)
193
+
194
+ app.load(populate_voices, outputs=[voice_dropdown])
195
 
196
  generate_btn.click(
197
  fn=on_generate,