Athspi commited on
Commit
ea4f0ce
·
verified ·
1 Parent(s): 8ceb7c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -8
app.py CHANGED
@@ -7,6 +7,7 @@ from faster_whisper import WhisperModel
7
  import numpy as np
8
  from scipy.io import wavfile
9
  from scipy.signal import correlate
 
10
 
11
  # Mapping of model names to Whisper model sizes
12
  MODELS = {
@@ -128,8 +129,9 @@ CODE_TO_LANGUAGE_NAME = {v: k for k, v in LANGUAGE_NAME_TO_CODE.items()}
128
  def convert_to_wav(audio_file):
129
  """Convert any audio file to WAV format."""
130
  audio = AudioSegment.from_file(audio_file)
131
- wav_path = "temp_audio.wav"
132
- audio.export(wav_path, format="wav")
 
133
  return wav_path
134
 
135
  def detect_language(audio_file):
@@ -200,8 +202,9 @@ def remove_silence(audio_file, silence_threshold=-40, min_silence_len=500):
200
  non_silent_audio += audio[start:] # Add the remaining part
201
 
202
  # Export the processed audio
203
- output_path = "silence_removed_audio.wav"
204
- non_silent_audio.export(output_path, format="wav")
 
205
 
206
  # Clean up temporary WAV file
207
  os.remove(wav_path)
@@ -279,8 +282,9 @@ def detect_and_trim_audio(main_audio, target_audio, threshold=0.5):
279
  timestamps.append(f"{segment[0]:.2f}-{segment[1]:.2f}")
280
 
281
  # Export the trimmed audio
282
- output_path = "trimmed_audio.wav"
283
- trimmed_audio.export(output_path, format="wav")
 
284
 
285
  # Format timestamps
286
  timestamps_str = "\n".join(timestamps)
@@ -305,8 +309,9 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whis
305
  # Convert audio to 16kHz mono for better compatibility
306
  audio = AudioSegment.from_file(wav_path)
307
  audio = audio.set_frame_rate(16000).set_channels(1)
308
- processed_audio_path = "processed_audio.wav"
309
- audio.export(processed_audio_path, format="wav")
 
310
 
311
  # Load the appropriate model
312
  if model_size == "Faster Whisper Large v3":
 
7
  import numpy as np
8
  from scipy.io import wavfile
9
  from scipy.signal import correlate
10
+ import tempfile
11
 
12
  # Mapping of model names to Whisper model sizes
13
  MODELS = {
 
129
  def convert_to_wav(audio_file):
130
  """Convert any audio file to WAV format."""
131
  audio = AudioSegment.from_file(audio_file)
132
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
133
+ wav_path = temp_wav.name
134
+ audio.export(wav_path, format="wav")
135
  return wav_path
136
 
137
  def detect_language(audio_file):
 
202
  non_silent_audio += audio[start:] # Add the remaining part
203
 
204
  # Export the processed audio
205
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_output:
206
+ output_path = temp_output.name
207
+ non_silent_audio.export(output_path, format="wav")
208
 
209
  # Clean up temporary WAV file
210
  os.remove(wav_path)
 
282
  timestamps.append(f"{segment[0]:.2f}-{segment[1]:.2f}")
283
 
284
  # Export the trimmed audio
285
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_output:
286
+ output_path = temp_output.name
287
+ trimmed_audio.export(output_path, format="wav")
288
 
289
  # Format timestamps
290
  timestamps_str = "\n".join(timestamps)
 
309
  # Convert audio to 16kHz mono for better compatibility
310
  audio = AudioSegment.from_file(wav_path)
311
  audio = audio.set_frame_rate(16000).set_channels(1)
312
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_processed:
313
+ processed_audio_path = temp_processed.name
314
+ audio.export(processed_audio_path, format="wav")
315
 
316
  # Load the appropriate model
317
  if model_size == "Faster Whisper Large v3":