Michael Natanael commited on
Commit
82b14e6
·
1 Parent(s): 498fece

konversi audio ke FLAC (mono 16kHz) menggunakan ffmpeg sebelum dikirim ke Groq Whisper API untuk transkripsi

Browse files
Files changed (1) hide show
  1. app.py +32 -2
app.py CHANGED
@@ -20,6 +20,7 @@ from werkzeug.security import generate_password_hash, check_password_hash
20
  from faster_whisper import WhisperModel
21
  from groq import Groq
22
  import tempfile
 
23
  import os
24
  import datetime
25
  import time
@@ -208,6 +209,29 @@ def bert_predict(input_lyric):
208
  return predicted_label, prob_results
209
 
210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  # === ROUTES ===
212
 
213
 
@@ -235,9 +259,13 @@ def transcribe():
235
 
236
  # Step 1: Transcribe
237
  # transcribed_text = faster_whisper(temp_audio_path).strip()
238
- with open(temp_audio_path, "rb") as file:
 
 
 
 
239
  transcription = client.audio.transcriptions.create(
240
- file=(temp_audio_path, file.read()),
241
  model="whisper-large-v3",
242
  prompt="Transkripsikan hanya bagian lirik lagu saja",
243
  language="id",
@@ -246,6 +274,8 @@ def transcribe():
246
  )
247
  transcribed_text = transcription.text.strip()
248
  os.remove(temp_audio_path)
 
 
249
 
250
  # Step 2: BERT Prediction
251
  predicted_label, prob_results = bert_predict(transcribed_text)
 
20
  from faster_whisper import WhisperModel
21
  from groq import Groq
22
  import tempfile
23
+ import subprocess
24
  import os
25
  import datetime
26
  import time
 
209
  return predicted_label, prob_results
210
 
211
 
212
+ # Fungsi konversi audio ke FLAC mono 16kHz
213
+ def convert_audio_to_flac(input_path):
214
+ output_path = tempfile.NamedTemporaryFile(suffix=".flac", delete=False).name
215
+ subprocess.run(
216
+ [
217
+ "ffmpeg",
218
+ "-i",
219
+ input_path,
220
+ "-ar",
221
+ "16000",
222
+ "-ac",
223
+ "1",
224
+ "-map",
225
+ "0:a",
226
+ "-c:a",
227
+ "flac",
228
+ output_path,
229
+ ],
230
+ check=True,
231
+ )
232
+ return output_path
233
+
234
+
235
  # === ROUTES ===
236
 
237
 
 
259
 
260
  # Step 1: Transcribe
261
  # transcribed_text = faster_whisper(temp_audio_path).strip()
262
+ # Konversi audio ke FLAC 16kHz mono untuk efisiensi
263
+ flac_path = convert_audio_to_flac(temp_audio_path)
264
+
265
+ # Kirim ke Groq Whisper API
266
+ with open(flac_path, "rb") as file:
267
  transcription = client.audio.transcriptions.create(
268
+ file=(flac_path, file.read()),
269
  model="whisper-large-v3",
270
  prompt="Transkripsikan hanya bagian lirik lagu saja",
271
  language="id",
 
274
  )
275
  transcribed_text = transcription.text.strip()
276
  os.remove(temp_audio_path)
277
+ if os.path.exists(flac_path):
278
+ os.remove(flac_path)
279
 
280
  # Step 2: BERT Prediction
281
  predicted_label, prob_results = bert_predict(transcribed_text)