Update app.py
Browse files
app.py
CHANGED
@@ -139,6 +139,10 @@ def convert_to_wav(audio_file):
|
|
139 |
audio.export(wav_path, format="wav")
|
140 |
return wav_path
|
141 |
|
|
|
|
|
|
|
|
|
142 |
def detect_language(audio_file):
|
143 |
"""Detect the language of the audio file."""
|
144 |
if audio_file is None:
|
@@ -247,7 +251,13 @@ def detect_and_trim_audio(main_audio, target_audio, threshold=0.5):
|
|
247 |
|
248 |
# Ensure both audio files have the same sample rate
|
249 |
if main_rate != target_rate:
|
250 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
|
252 |
# Normalize audio data
|
253 |
main_data = main_data.astype(np.float32) / np.iinfo(main_data.dtype).max
|
@@ -258,35 +268,23 @@ def detect_and_trim_audio(main_audio, target_audio, threshold=0.5):
|
|
258 |
correlation = np.abs(correlation)
|
259 |
max_corr = np.max(correlation)
|
260 |
|
261 |
-
#
|
262 |
-
|
263 |
-
|
264 |
-
if corr_value >= threshold * max_corr:
|
265 |
-
start_time = i / main_rate
|
266 |
-
end_time = (i + len(target_data)) / main_rate
|
267 |
-
detected_segments.append((start_time, end_time))
|
268 |
|
269 |
-
#
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
if segment[0] <= last_segment[1] + 1.0: # Merge if within 1 second
|
277 |
-
merged_segments[-1] = (last_segment[0], max(last_segment[1], segment[1]))
|
278 |
-
else:
|
279 |
-
merged_segments.append(segment)
|
280 |
|
281 |
-
# Trim the main audio to include only the detected
|
282 |
main_audio_segment = AudioSegment.from_file(main_wav_path)
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
start_ms = int(segment[0] * 1000)
|
287 |
-
end_ms = int(segment[1] * 1000)
|
288 |
-
trimmed_audio += main_audio_segment[start_ms:end_ms]
|
289 |
-
timestamps.append(f"{segment[0]:.2f}-{segment[1]:.2f}")
|
290 |
|
291 |
# Export the trimmed audio
|
292 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_output:
|
@@ -294,11 +292,13 @@ def detect_and_trim_audio(main_audio, target_audio, threshold=0.5):
|
|
294 |
trimmed_audio.export(output_path, format="wav")
|
295 |
|
296 |
# Format timestamps
|
297 |
-
timestamps_str = "
|
298 |
|
299 |
# Clean up temporary WAV files
|
300 |
os.remove(main_wav_path)
|
301 |
os.remove(target_wav_path)
|
|
|
|
|
302 |
|
303 |
return output_path, timestamps_str
|
304 |
except Exception as e:
|
|
|
139 |
audio.export(wav_path, format="wav")
|
140 |
return wav_path
|
141 |
|
142 |
+
def resample_audio(audio_segment, target_sample_rate):
|
143 |
+
"""Resample an audio segment to the target sample rate."""
|
144 |
+
return audio_segment.set_frame_rate(target_sample_rate)
|
145 |
+
|
146 |
def detect_language(audio_file):
|
147 |
"""Detect the language of the audio file."""
|
148 |
if audio_file is None:
|
|
|
251 |
|
252 |
# Ensure both audio files have the same sample rate
|
253 |
if main_rate != target_rate:
|
254 |
+
logger.warning(f"Sample rates differ: main_audio={main_rate}, target_audio={target_rate}. Resampling target audio.")
|
255 |
+
target_segment = AudioSegment.from_file(target_wav_path)
|
256 |
+
target_segment = resample_audio(target_segment, main_rate)
|
257 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_resampled:
|
258 |
+
resampled_path = temp_resampled.name
|
259 |
+
target_segment.export(resampled_path, format="wav")
|
260 |
+
target_rate, target_data = wavfile.read(resampled_path)
|
261 |
|
262 |
# Normalize audio data
|
263 |
main_data = main_data.astype(np.float32) / np.iinfo(main_data.dtype).max
|
|
|
268 |
correlation = np.abs(correlation)
|
269 |
max_corr = np.max(correlation)
|
270 |
|
271 |
+
# Find the peak in the cross-correlation result
|
272 |
+
peak_index = np.argmax(correlation)
|
273 |
+
peak_value = correlation[peak_index]
|
|
|
|
|
|
|
|
|
274 |
|
275 |
+
# Check if the peak value exceeds the threshold
|
276 |
+
if peak_value < threshold * max_corr:
|
277 |
+
return None, "Error: Target audio not detected in the main audio."
|
278 |
+
|
279 |
+
# Calculate the start and end times of the target audio in the main audio
|
280 |
+
start_time = peak_index / main_rate
|
281 |
+
end_time = (peak_index + len(target_data)) / main_rate
|
|
|
|
|
|
|
|
|
282 |
|
283 |
+
# Trim the main audio to include only the detected segment
|
284 |
main_audio_segment = AudioSegment.from_file(main_wav_path)
|
285 |
+
start_ms = int(start_time * 1000)
|
286 |
+
end_ms = int(end_time * 1000)
|
287 |
+
trimmed_audio = main_audio_segment[start_ms:end_ms]
|
|
|
|
|
|
|
|
|
288 |
|
289 |
# Export the trimmed audio
|
290 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_output:
|
|
|
292 |
trimmed_audio.export(output_path, format="wav")
|
293 |
|
294 |
# Format timestamps
|
295 |
+
timestamps_str = f"{start_time:.2f}-{end_time:.2f}"
|
296 |
|
297 |
# Clean up temporary WAV files
|
298 |
os.remove(main_wav_path)
|
299 |
os.remove(target_wav_path)
|
300 |
+
if 'resampled_path' in locals():
|
301 |
+
os.remove(resampled_path)
|
302 |
|
303 |
return output_path, timestamps_str
|
304 |
except Exception as e:
|