Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,7 @@ from faster_whisper import WhisperModel
|
|
7 |
import numpy as np
|
8 |
from scipy.io import wavfile
|
9 |
from scipy.signal import correlate
|
|
|
10 |
|
11 |
# Mapping of model names to Whisper model sizes
|
12 |
MODELS = {
|
@@ -128,8 +129,9 @@ CODE_TO_LANGUAGE_NAME = {v: k for k, v in LANGUAGE_NAME_TO_CODE.items()}
|
|
128 |
def convert_to_wav(audio_file):
|
129 |
"""Convert any audio file to WAV format."""
|
130 |
audio = AudioSegment.from_file(audio_file)
|
131 |
-
|
132 |
-
|
|
|
133 |
return wav_path
|
134 |
|
135 |
def detect_language(audio_file):
|
@@ -200,8 +202,9 @@ def remove_silence(audio_file, silence_threshold=-40, min_silence_len=500):
|
|
200 |
non_silent_audio += audio[start:] # Add the remaining part
|
201 |
|
202 |
# Export the processed audio
|
203 |
-
|
204 |
-
|
|
|
205 |
|
206 |
# Clean up temporary WAV file
|
207 |
os.remove(wav_path)
|
@@ -279,8 +282,9 @@ def detect_and_trim_audio(main_audio, target_audio, threshold=0.5):
|
|
279 |
timestamps.append(f"{segment[0]:.2f}-{segment[1]:.2f}")
|
280 |
|
281 |
# Export the trimmed audio
|
282 |
-
|
283 |
-
|
|
|
284 |
|
285 |
# Format timestamps
|
286 |
timestamps_str = "\n".join(timestamps)
|
@@ -305,8 +309,9 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whis
|
|
305 |
# Convert audio to 16kHz mono for better compatibility
|
306 |
audio = AudioSegment.from_file(wav_path)
|
307 |
audio = audio.set_frame_rate(16000).set_channels(1)
|
308 |
-
|
309 |
-
|
|
|
310 |
|
311 |
# Load the appropriate model
|
312 |
if model_size == "Faster Whisper Large v3":
|
|
|
7 |
import numpy as np
|
8 |
from scipy.io import wavfile
|
9 |
from scipy.signal import correlate
|
10 |
+
import tempfile
|
11 |
|
12 |
# Mapping of model names to Whisper model sizes
|
13 |
MODELS = {
|
|
|
129 |
def convert_to_wav(audio_file):
|
130 |
"""Convert any audio file to WAV format."""
|
131 |
audio = AudioSegment.from_file(audio_file)
|
132 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
|
133 |
+
wav_path = temp_wav.name
|
134 |
+
audio.export(wav_path, format="wav")
|
135 |
return wav_path
|
136 |
|
137 |
def detect_language(audio_file):
|
|
|
202 |
non_silent_audio += audio[start:] # Add the remaining part
|
203 |
|
204 |
# Export the processed audio
|
205 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_output:
|
206 |
+
output_path = temp_output.name
|
207 |
+
non_silent_audio.export(output_path, format="wav")
|
208 |
|
209 |
# Clean up temporary WAV file
|
210 |
os.remove(wav_path)
|
|
|
282 |
timestamps.append(f"{segment[0]:.2f}-{segment[1]:.2f}")
|
283 |
|
284 |
# Export the trimmed audio
|
285 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_output:
|
286 |
+
output_path = temp_output.name
|
287 |
+
trimmed_audio.export(output_path, format="wav")
|
288 |
|
289 |
# Format timestamps
|
290 |
timestamps_str = "\n".join(timestamps)
|
|
|
309 |
# Convert audio to 16kHz mono for better compatibility
|
310 |
audio = AudioSegment.from_file(wav_path)
|
311 |
audio = audio.set_frame_rate(16000).set_channels(1)
|
312 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_processed:
|
313 |
+
processed_audio_path = temp_processed.name
|
314 |
+
audio.export(processed_audio_path, format="wav")
|
315 |
|
316 |
# Load the appropriate model
|
317 |
if model_size == "Faster Whisper Large v3":
|