sheikhed commited on
Commit
2760870
·
verified ·
1 Parent(s): 2bce8ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -37
app.py CHANGED
@@ -16,7 +16,6 @@ B_KEY = os.getenv("B_KEY")
16
 
17
  # URLs
18
  API_URL = os.getenv("API_URL")
19
- UPLOAD_URL = os.getenv("UPLOAD_URL")
20
 
21
  def get_voices():
22
  # OpenAI TTS voices
@@ -29,7 +28,7 @@ def get_voices():
29
  ("shimmer", "shimmer")
30
  ]
31
 
32
- def text_to_speech(voice, text, session_id):
33
  url = "https://api.openai.com/v1/audio/speech"
34
 
35
  headers = {
@@ -47,30 +46,20 @@ def text_to_speech(voice, text, session_id):
47
  if response.status_code != 200:
48
  return None
49
 
50
- # Save temporary audio file with session ID
51
- audio_file_path = f'temp_voice_{session_id}.mp3'
52
- with open(audio_file_path, 'wb') as audio_file:
53
- audio_file.write(response.content)
54
- return audio_file_path
55
-
56
- def upload_file(file_path):
57
- with open(file_path, 'rb') as file:
58
- files = {'fileToUpload': (os.path.basename(file_path), file)}
59
- data = {'reqtype': 'fileupload'}
60
- response = requests.post(UPLOAD_URL, files=files, data=data)
61
-
62
- if response.status_code == 200:
63
- return response.text.strip()
64
- return None
65
 
66
- def lipsync_api_call(video_url, audio_url):
67
  headers = {
68
  "Content-Type": "application/json",
69
  "x-api-key": B_KEY
70
  }
71
 
 
 
 
 
 
72
  data = {
73
- "audioUrl": audio_url,
74
  "videoUrl": video_url,
75
  "maxCredits": 1000,
76
  "model": "sync-1.7.1-beta",
@@ -79,7 +68,7 @@ def lipsync_api_call(video_url, audio_url):
79
  "synergizerStrength": 1
80
  }
81
 
82
- response = requests.post(API_URL, headers=headers, data=json.dumps(data))
83
  return response.json()
84
 
85
  def check_job_status(job_id):
@@ -104,15 +93,20 @@ def get_media_duration(file_path):
104
  result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
105
  return float(result.stdout.strip())
106
 
107
- def combine_audio_video(video_path, audio_path, output_path):
 
 
 
 
 
108
  # Get durations of both video and audio
109
  video_duration = get_media_duration(video_path)
110
- audio_duration = get_media_duration(audio_path)
111
 
112
  if video_duration > audio_duration:
113
  # Trim video to match the audio length
114
  cmd = [
115
- 'ffmpeg', '-i', video_path, '-i', audio_path,
116
  '-t', str(audio_duration), # Trim video to audio duration
117
  '-map', '0:v', '-map', '1:a',
118
  '-c:v', 'copy', '-c:a', 'aac',
@@ -122,7 +116,7 @@ def combine_audio_video(video_path, audio_path, output_path):
122
  # Loop video if it's shorter than audio
123
  loop_count = int(audio_duration // video_duration) + 1 # Calculate how many times to loop
124
  cmd = [
125
- 'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', audio_path,
126
  '-t', str(audio_duration), # Match the duration of the final video with the audio
127
  '-map', '0:v', '-map', '1:a',
128
  '-c:v', 'copy', '-c:a', 'aac',
@@ -131,24 +125,21 @@ def combine_audio_video(video_path, audio_path, output_path):
131
 
132
  subprocess.run(cmd, check=True)
133
 
 
 
 
134
  def process_video(voice, video_url, text, progress=gr.Progress()):
135
  session_id = str(uuid.uuid4()) # Generate a unique session ID
136
  progress(0, desc="Generating speech...")
137
- audio_path = text_to_speech(voice, text, session_id)
138
- if not audio_path:
139
  return None, "Failed to generate speech audio."
140
 
141
  progress(0.2, desc="Processing video...")
142
 
143
  try:
144
- progress(0.3, desc="Uploading audio...")
145
- audio_url = upload_file(audio_path)
146
-
147
- if not audio_url:
148
- raise Exception("Failed to upload audio file")
149
-
150
  progress(0.4, desc="Initiating lipsync...")
151
- job_data = lipsync_api_call(video_url, audio_url)
152
 
153
  if "error" in job_data or "message" in job_data:
154
  raise Exception(job_data.get("error", job_data.get("message", "Unknown error")))
@@ -179,15 +170,13 @@ def process_video(voice, video_url, text, progress=gr.Progress()):
179
  f.write(video_response.content)
180
 
181
  output_path = f"output_{session_id}.mp4"
182
- combine_audio_video(video_path, audio_path, output_path)
183
  progress(1.0, desc="Complete!")
184
  return output_path, f"Used fallback method. Original error: {str(e)}"
185
  except Exception as fallback_error:
186
  return None, f"All methods failed. Error: {str(fallback_error)}"
187
  finally:
188
  # Cleanup
189
- if os.path.exists(audio_path):
190
- os.remove(audio_path)
191
  if os.path.exists(f"temp_video_{session_id}.mp4"):
192
  os.remove(f"temp_video_{session_id}.mp4")
193
 
@@ -195,7 +184,7 @@ def create_interface():
195
  voices = get_voices()
196
 
197
  with gr.Blocks() as app:
198
- gr.Markdown("# JSON Train")
199
  with gr.Row():
200
  with gr.Column():
201
  voice_dropdown = gr.Dropdown(choices=[v[0] for v in voices], label="Select Voice", value=voices[0][0] if voices else None)
 
16
 
17
  # URLs
18
  API_URL = os.getenv("API_URL")
 
19
 
20
  def get_voices():
21
  # OpenAI TTS voices
 
28
  ("shimmer", "shimmer")
29
  ]
30
 
31
+ def text_to_speech(voice, text):
32
  url = "https://api.openai.com/v1/audio/speech"
33
 
34
  headers = {
 
46
  if response.status_code != 200:
47
  return None
48
 
49
+ return response.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ def lipsync_api_call(video_url, audio_content):
52
  headers = {
53
  "Content-Type": "application/json",
54
  "x-api-key": B_KEY
55
  }
56
 
57
+ # Create a multipart form-data request
58
+ files = {
59
+ 'audio': ('audio.mp3', audio_content, 'audio/mpeg')
60
+ }
61
+
62
  data = {
 
63
  "videoUrl": video_url,
64
  "maxCredits": 1000,
65
  "model": "sync-1.7.1-beta",
 
68
  "synergizerStrength": 1
69
  }
70
 
71
+ response = requests.post(API_URL, headers=headers, data=data, files=files)
72
  return response.json()
73
 
74
  def check_job_status(job_id):
 
93
  result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
94
  return float(result.stdout.strip())
95
 
96
+ def combine_audio_video(video_path, audio_content, output_path):
97
+ # Save audio content to a temporary file
98
+ temp_audio_path = f'temp_audio_{uuid.uuid4()}.mp3'
99
+ with open(temp_audio_path, 'wb') as audio_file:
100
+ audio_file.write(audio_content)
101
+
102
  # Get durations of both video and audio
103
  video_duration = get_media_duration(video_path)
104
+ audio_duration = get_media_duration(temp_audio_path)
105
 
106
  if video_duration > audio_duration:
107
  # Trim video to match the audio length
108
  cmd = [
109
+ 'ffmpeg', '-i', video_path, '-i', temp_audio_path,
110
  '-t', str(audio_duration), # Trim video to audio duration
111
  '-map', '0:v', '-map', '1:a',
112
  '-c:v', 'copy', '-c:a', 'aac',
 
116
  # Loop video if it's shorter than audio
117
  loop_count = int(audio_duration // video_duration) + 1 # Calculate how many times to loop
118
  cmd = [
119
+ 'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', temp_audio_path,
120
  '-t', str(audio_duration), # Match the duration of the final video with the audio
121
  '-map', '0:v', '-map', '1:a',
122
  '-c:v', 'copy', '-c:a', 'aac',
 
125
 
126
  subprocess.run(cmd, check=True)
127
 
128
+ # Clean up temporary audio file
129
+ os.remove(temp_audio_path)
130
+
131
  def process_video(voice, video_url, text, progress=gr.Progress()):
132
  session_id = str(uuid.uuid4()) # Generate a unique session ID
133
  progress(0, desc="Generating speech...")
134
+ audio_content = text_to_speech(voice, text)
135
+ if not audio_content:
136
  return None, "Failed to generate speech audio."
137
 
138
  progress(0.2, desc="Processing video...")
139
 
140
  try:
 
 
 
 
 
 
141
  progress(0.4, desc="Initiating lipsync...")
142
+ job_data = lipsync_api_call(video_url, audio_content)
143
 
144
  if "error" in job_data or "message" in job_data:
145
  raise Exception(job_data.get("error", job_data.get("message", "Unknown error")))
 
170
  f.write(video_response.content)
171
 
172
  output_path = f"output_{session_id}.mp4"
173
+ combine_audio_video(video_path, audio_content, output_path)
174
  progress(1.0, desc="Complete!")
175
  return output_path, f"Used fallback method. Original error: {str(e)}"
176
  except Exception as fallback_error:
177
  return None, f"All methods failed. Error: {str(fallback_error)}"
178
  finally:
179
  # Cleanup
 
 
180
  if os.path.exists(f"temp_video_{session_id}.mp4"):
181
  os.remove(f"temp_video_{session_id}.mp4")
182
 
 
184
  voices = get_voices()
185
 
186
  with gr.Blocks() as app:
187
+ gr.Markdown("# Lipsync Video Generator")
188
  with gr.Row():
189
  with gr.Column():
190
  voice_dropdown = gr.Dropdown(choices=[v[0] for v in voices], label="Select Voice", value=voices[0][0] if voices else None)