Spaces:

nenafem
/

flask_whisper

Runtime error

App Files Files Community

Michael Natanael commited on May 26

Commit

aad2b2d

1 Parent(s): 82b14e6

Revert "konversi audio ke FLAC (mono 16kHz) menggunakan ffmpeg sebelum dikirim ke Groq Whisper API untuk transkripsi"

Browse files

This reverts commit 82b14e678f2ebb230f9db2d23bdda576cd6c73d6.

Files changed (1) hide show

app.py +2 -32

app.py CHANGED Viewed

@@ -20,7 +20,6 @@ from werkzeug.security import generate_password_hash, check_password_hash
 from faster_whisper import WhisperModel
 from groq import Groq
 import tempfile
-import subprocess
 import os
 import datetime
 import time
@@ -209,29 +208,6 @@ def bert_predict(input_lyric):
     return predicted_label, prob_results
-# Fungsi konversi audio ke FLAC mono 16kHz
-def convert_audio_to_flac(input_path):
-    output_path = tempfile.NamedTemporaryFile(suffix=".flac", delete=False).name
-    subprocess.run(
-        [
-            "ffmpeg",
-            "-i",
-            input_path,
-            "-ar",
-            "16000",
-            "-ac",
-            "1",
-            "-map",
-            "0:a",
-            "-c:a",
-            "flac",
-            output_path,
-        ],
-        check=True,
-    )
-    return output_path
 # === ROUTES ===
@@ -259,13 +235,9 @@ def transcribe():
             # Step 1: Transcribe
             # transcribed_text = faster_whisper(temp_audio_path).strip()
-            # Konversi audio ke FLAC 16kHz mono untuk efisiensi
-            flac_path = convert_audio_to_flac(temp_audio_path)
-            # Kirim ke Groq Whisper API
-            with open(flac_path, "rb") as file:
                 transcription = client.audio.transcriptions.create(
-                    file=(flac_path, file.read()),
                     model="whisper-large-v3",
                     prompt="Transkripsikan hanya bagian lirik lagu saja",
                     language="id",
@@ -274,8 +246,6 @@ def transcribe():
                 )
             transcribed_text = transcription.text.strip()
             os.remove(temp_audio_path)
-            if os.path.exists(flac_path):
-                os.remove(flac_path)
             # Step 2: BERT Prediction
             predicted_label, prob_results = bert_predict(transcribed_text)

 from faster_whisper import WhisperModel
 from groq import Groq
 import tempfile
 import os
 import datetime
 import time
     return predicted_label, prob_results
 # === ROUTES ===
             # Step 1: Transcribe
             # transcribed_text = faster_whisper(temp_audio_path).strip()
+            with open(temp_audio_path, "rb") as file:
                 transcription = client.audio.transcriptions.create(
+                    file=(temp_audio_path, file.read()),
                     model="whisper-large-v3",
                     prompt="Transkripsikan hanya bagian lirik lagu saja",
                     language="id",
                 )
             transcribed_text = transcription.text.strip()
             os.remove(temp_audio_path)
             # Step 2: BERT Prediction
             predicted_label, prob_results = bert_predict(transcribed_text)