sheikhed commited on
Commit
8882e69
·
verified ·
1 Parent(s): c9696b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -28
app.py CHANGED
@@ -29,7 +29,7 @@ def get_voices():
29
  ("shimmer", "shimmer")
30
  ]
31
 
32
- def text_to_speech(voice, text):
33
  url = "https://api.openai.com/v1/audio/speech"
34
 
35
  headers = {
@@ -47,12 +47,17 @@ def text_to_speech(voice, text):
47
  if response.status_code != 200:
48
  return None
49
 
50
- return response.content
51
-
52
- def upload_file(file_content, file_name):
53
- files = {'fileToUpload': (file_name, file_content)}
54
- data = {'reqtype': 'fileupload'}
55
- response = requests.post(UPLOAD_URL, files=files, data=data)
 
 
 
 
 
56
 
57
  if response.status_code == 200:
58
  return response.text.strip()
@@ -74,7 +79,7 @@ def lipsync_api_call(video_url, audio_url):
74
  "synergizerStrength": 1
75
  }
76
 
77
- response = requests.post(API_URL, headers=headers, json=data)
78
  return response.json()
79
 
80
  def check_job_status(job_id):
@@ -94,24 +99,20 @@ def check_job_status(job_id):
94
  return None
95
 
96
  def get_media_duration(file_path):
 
97
  cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
98
  result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
99
  return float(result.stdout.strip())
100
 
101
- def combine_audio_video(video_path, audio_content, output_path):
102
- # Save audio content to a temporary file
103
- temp_audio_path = f'temp_audio_{uuid.uuid4()}.mp3'
104
- with open(temp_audio_path, 'wb') as audio_file:
105
- audio_file.write(audio_content)
106
-
107
  # Get durations of both video and audio
108
  video_duration = get_media_duration(video_path)
109
- audio_duration = get_media_duration(temp_audio_path)
110
 
111
  if video_duration > audio_duration:
112
  # Trim video to match the audio length
113
  cmd = [
114
- 'ffmpeg', '-i', video_path, '-i', temp_audio_path,
115
  '-t', str(audio_duration), # Trim video to audio duration
116
  '-map', '0:v', '-map', '1:a',
117
  '-c:v', 'copy', '-c:a', 'aac',
@@ -121,7 +122,7 @@ def combine_audio_video(video_path, audio_content, output_path):
121
  # Loop video if it's shorter than audio
122
  loop_count = int(audio_duration // video_duration) + 1 # Calculate how many times to loop
123
  cmd = [
124
- 'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', temp_audio_path,
125
  '-t', str(audio_duration), # Match the duration of the final video with the audio
126
  '-map', '0:v', '-map', '1:a',
127
  '-c:v', 'copy', '-c:a', 'aac',
@@ -130,23 +131,22 @@ def combine_audio_video(video_path, audio_content, output_path):
130
 
131
  subprocess.run(cmd, check=True)
132
 
133
- # Clean up temporary audio file
134
- os.remove(temp_audio_path)
135
-
136
  def process_video(voice, video_url, text, progress=gr.Progress()):
137
  session_id = str(uuid.uuid4()) # Generate a unique session ID
138
-
139
  progress(0, desc="Generating speech...")
140
- audio_content = text_to_speech(voice, text)
141
- if not audio_content:
142
  return None, "Failed to generate speech audio."
143
 
144
- progress(0.2, desc="Uploading audio...")
145
- audio_url = upload_file(audio_content, f"audio_{session_id}.mp3")
146
- if not audio_url:
147
- return None, "Failed to upload audio file."
148
 
149
  try:
 
 
 
 
 
 
150
  progress(0.4, desc="Initiating lipsync...")
151
  job_data = lipsync_api_call(video_url, audio_url)
152
 
@@ -179,13 +179,15 @@ def process_video(voice, video_url, text, progress=gr.Progress()):
179
  f.write(video_response.content)
180
 
181
  output_path = f"output_{session_id}.mp4"
182
- combine_audio_video(video_path, audio_content, output_path)
183
  progress(1.0, desc="Complete!")
184
  return output_path, f"Used fallback method. Original error: {str(e)}"
185
  except Exception as fallback_error:
186
  return None, f"All methods failed. Error: {str(fallback_error)}"
187
  finally:
188
  # Cleanup
 
 
189
  if os.path.exists(f"temp_video_{session_id}.mp4"):
190
  os.remove(f"temp_video_{session_id}.mp4")
191
 
 
29
  ("shimmer", "shimmer")
30
  ]
31
 
32
+ def text_to_speech(voice, text, session_id):
33
  url = "https://api.openai.com/v1/audio/speech"
34
 
35
  headers = {
 
47
  if response.status_code != 200:
48
  return None
49
 
50
+ # Save temporary audio file with session ID
51
+ audio_file_path = f'temp_voice_{session_id}.mp3'
52
+ with open(audio_file_path, 'wb') as audio_file:
53
+ audio_file.write(response.content)
54
+ return audio_file_path
55
+
56
+ def upload_file(file_path):
57
+ with open(file_path, 'rb') as file:
58
+ files = {'fileToUpload': (os.path.basename(file_path), file)}
59
+ data = {'reqtype': 'fileupload'}
60
+ response = requests.post(UPLOAD_URL, files=files, data=data)
61
 
62
  if response.status_code == 200:
63
  return response.text.strip()
 
79
  "synergizerStrength": 1
80
  }
81
 
82
+ response = requests.post(API_URL, headers=headers, data=json.dumps(data))
83
  return response.json()
84
 
85
  def check_job_status(job_id):
 
99
  return None
100
 
101
  def get_media_duration(file_path):
102
+ # Fetch media duration using ffprobe
103
  cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
104
  result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
105
  return float(result.stdout.strip())
106
 
107
+ def combine_audio_video(video_path, audio_path, output_path):
 
 
 
 
 
108
  # Get durations of both video and audio
109
  video_duration = get_media_duration(video_path)
110
+ audio_duration = get_media_duration(audio_path)
111
 
112
  if video_duration > audio_duration:
113
  # Trim video to match the audio length
114
  cmd = [
115
+ 'ffmpeg', '-i', video_path, '-i', audio_path,
116
  '-t', str(audio_duration), # Trim video to audio duration
117
  '-map', '0:v', '-map', '1:a',
118
  '-c:v', 'copy', '-c:a', 'aac',
 
122
  # Loop video if it's shorter than audio
123
  loop_count = int(audio_duration // video_duration) + 1 # Calculate how many times to loop
124
  cmd = [
125
+ 'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', audio_path,
126
  '-t', str(audio_duration), # Match the duration of the final video with the audio
127
  '-map', '0:v', '-map', '1:a',
128
  '-c:v', 'copy', '-c:a', 'aac',
 
131
 
132
  subprocess.run(cmd, check=True)
133
 
 
 
 
134
  def process_video(voice, video_url, text, progress=gr.Progress()):
135
  session_id = str(uuid.uuid4()) # Generate a unique session ID
 
136
  progress(0, desc="Generating speech...")
137
+ audio_path = text_to_speech(voice, text, session_id)
138
+ if not audio_path:
139
  return None, "Failed to generate speech audio."
140
 
141
+ progress(0.2, desc="Processing video...")
 
 
 
142
 
143
  try:
144
+ progress(0.3, desc="Uploading audio...")
145
+ audio_url = upload_file(audio_path)
146
+
147
+ if not audio_url:
148
+ raise Exception("Failed to upload audio file")
149
+
150
  progress(0.4, desc="Initiating lipsync...")
151
  job_data = lipsync_api_call(video_url, audio_url)
152
 
 
179
  f.write(video_response.content)
180
 
181
  output_path = f"output_{session_id}.mp4"
182
+ combine_audio_video(video_path, audio_path, output_path)
183
  progress(1.0, desc="Complete!")
184
  return output_path, f"Used fallback method. Original error: {str(e)}"
185
  except Exception as fallback_error:
186
  return None, f"All methods failed. Error: {str(fallback_error)}"
187
  finally:
188
  # Cleanup
189
+ if os.path.exists(audio_path):
190
+ os.remove(audio_path)
191
  if os.path.exists(f"temp_video_{session_id}.mp4"):
192
  os.remove(f"temp_video_{session_id}.mp4")
193