Spaces:

nenafem
/

flask_whisper

Runtime error

App Files Files Community

Michael Natanael commited on May 26

Commit

82b14e6

1 Parent(s): 498fece

konversi audio ke FLAC (mono 16kHz) menggunakan ffmpeg sebelum dikirim ke Groq Whisper API untuk transkripsi

Browse files

Files changed (1) hide show

app.py +32 -2

app.py CHANGED Viewed

@@ -20,6 +20,7 @@ from werkzeug.security import generate_password_hash, check_password_hash
 from faster_whisper import WhisperModel
 from groq import Groq
 import tempfile
 import os
 import datetime
 import time
@@ -208,6 +209,29 @@ def bert_predict(input_lyric):
     return predicted_label, prob_results
 # === ROUTES ===
@@ -235,9 +259,13 @@ def transcribe():
             # Step 1: Transcribe
             # transcribed_text = faster_whisper(temp_audio_path).strip()
-            with open(temp_audio_path, "rb") as file:
                 transcription = client.audio.transcriptions.create(
-                    file=(temp_audio_path, file.read()),
                     model="whisper-large-v3",
                     prompt="Transkripsikan hanya bagian lirik lagu saja",
                     language="id",
@@ -246,6 +274,8 @@ def transcribe():
                 )
             transcribed_text = transcription.text.strip()
             os.remove(temp_audio_path)
             # Step 2: BERT Prediction
             predicted_label, prob_results = bert_predict(transcribed_text)

 from faster_whisper import WhisperModel
 from groq import Groq
 import tempfile
+import subprocess
 import os
 import datetime
 import time
     return predicted_label, prob_results
+# Fungsi konversi audio ke FLAC mono 16kHz
+def convert_audio_to_flac(input_path):
+    output_path = tempfile.NamedTemporaryFile(suffix=".flac", delete=False).name
+    subprocess.run(
+        [
+            "ffmpeg",
+            "-i",
+            input_path,
+            "-ar",
+            "16000",
+            "-ac",
+            "1",
+            "-map",
+            "0:a",
+            "-c:a",
+            "flac",
+            output_path,
+        ],
+        check=True,
+    )
+    return output_path
 # === ROUTES ===
             # Step 1: Transcribe
             # transcribed_text = faster_whisper(temp_audio_path).strip()
+            # Konversi audio ke FLAC 16kHz mono untuk efisiensi
+            flac_path = convert_audio_to_flac(temp_audio_path)
+            # Kirim ke Groq Whisper API
+            with open(flac_path, "rb") as file:
                 transcription = client.audio.transcriptions.create(
+                    file=(flac_path, file.read()),
                     model="whisper-large-v3",
                     prompt="Transkripsikan hanya bagian lirik lagu saja",
                     language="id",
                 )
             transcribed_text = transcription.text.strip()
             os.remove(temp_audio_path)
+            if os.path.exists(flac_path):
+                os.remove(flac_path)
             # Step 2: BERT Prediction
             predicted_label, prob_results = bert_predict(transcribed_text)