Spaces:

Athspi
/

Ai-audio

Sleeping

App Files Files Community

Athspi commited on Jan 12

Commit

ea4f0ce

verified ·

1 Parent(s): 8ceb7c6

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -8

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ from faster_whisper import WhisperModel
 import numpy as np
 from scipy.io import wavfile
 from scipy.signal import correlate
 # Mapping of model names to Whisper model sizes
 MODELS = {
@@ -128,8 +129,9 @@ CODE_TO_LANGUAGE_NAME = {v: k for k, v in LANGUAGE_NAME_TO_CODE.items()}
 def convert_to_wav(audio_file):
     """Convert any audio file to WAV format."""
     audio = AudioSegment.from_file(audio_file)
-    wav_path = "temp_audio.wav"
-    audio.export(wav_path, format="wav")
     return wav_path
 def detect_language(audio_file):
@@ -200,8 +202,9 @@ def remove_silence(audio_file, silence_threshold=-40, min_silence_len=500):
         non_silent_audio += audio[start:]  # Add the remaining part
         # Export the processed audio
-        output_path = "silence_removed_audio.wav"
-        non_silent_audio.export(output_path, format="wav")
         # Clean up temporary WAV file
         os.remove(wav_path)
@@ -279,8 +282,9 @@ def detect_and_trim_audio(main_audio, target_audio, threshold=0.5):
             timestamps.append(f"{segment[0]:.2f}-{segment[1]:.2f}")
         # Export the trimmed audio
-        output_path = "trimmed_audio.wav"
-        trimmed_audio.export(output_path, format="wav")
         # Format timestamps
         timestamps_str = "\n".join(timestamps)
@@ -305,8 +309,9 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whis
         # Convert audio to 16kHz mono for better compatibility
         audio = AudioSegment.from_file(wav_path)
         audio = audio.set_frame_rate(16000).set_channels(1)
-        processed_audio_path = "processed_audio.wav"
-        audio.export(processed_audio_path, format="wav")
         # Load the appropriate model
         if model_size == "Faster Whisper Large v3":

 import numpy as np
 from scipy.io import wavfile
 from scipy.signal import correlate
+import tempfile
 # Mapping of model names to Whisper model sizes
 MODELS = {
 def convert_to_wav(audio_file):
     """Convert any audio file to WAV format."""
     audio = AudioSegment.from_file(audio_file)
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
+        wav_path = temp_wav.name
+        audio.export(wav_path, format="wav")
     return wav_path
 def detect_language(audio_file):
         non_silent_audio += audio[start:]  # Add the remaining part
         # Export the processed audio
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_output:
+            output_path = temp_output.name
+            non_silent_audio.export(output_path, format="wav")
         # Clean up temporary WAV file
         os.remove(wav_path)
             timestamps.append(f"{segment[0]:.2f}-{segment[1]:.2f}")
         # Export the trimmed audio
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_output:
+            output_path = temp_output.name
+            trimmed_audio.export(output_path, format="wav")
         # Format timestamps
         timestamps_str = "\n".join(timestamps)
         # Convert audio to 16kHz mono for better compatibility
         audio = AudioSegment.from_file(wav_path)
         audio = audio.set_frame_rate(16000).set_channels(1)
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_processed:
+            processed_audio_path = temp_processed.name
+            audio.export(processed_audio_path, format="wav")
         # Load the appropriate model
         if model_size == "Faster Whisper Large v3":