Michael Natanael commited on
Commit
aad2b2d
·
1 Parent(s): 82b14e6

Revert "konversi audio ke FLAC (mono 16kHz) menggunakan ffmpeg sebelum dikirim ke Groq Whisper API untuk transkripsi"

Browse files

This reverts commit 82b14e678f2ebb230f9db2d23bdda576cd6c73d6.

Files changed (1) hide show
  1. app.py +2 -32
app.py CHANGED
@@ -20,7 +20,6 @@ from werkzeug.security import generate_password_hash, check_password_hash
20
  from faster_whisper import WhisperModel
21
  from groq import Groq
22
  import tempfile
23
- import subprocess
24
  import os
25
  import datetime
26
  import time
@@ -209,29 +208,6 @@ def bert_predict(input_lyric):
209
  return predicted_label, prob_results
210
 
211
 
212
- # Fungsi konversi audio ke FLAC mono 16kHz
213
- def convert_audio_to_flac(input_path):
214
- output_path = tempfile.NamedTemporaryFile(suffix=".flac", delete=False).name
215
- subprocess.run(
216
- [
217
- "ffmpeg",
218
- "-i",
219
- input_path,
220
- "-ar",
221
- "16000",
222
- "-ac",
223
- "1",
224
- "-map",
225
- "0:a",
226
- "-c:a",
227
- "flac",
228
- output_path,
229
- ],
230
- check=True,
231
- )
232
- return output_path
233
-
234
-
235
  # === ROUTES ===
236
 
237
 
@@ -259,13 +235,9 @@ def transcribe():
259
 
260
  # Step 1: Transcribe
261
  # transcribed_text = faster_whisper(temp_audio_path).strip()
262
- # Konversi audio ke FLAC 16kHz mono untuk efisiensi
263
- flac_path = convert_audio_to_flac(temp_audio_path)
264
-
265
- # Kirim ke Groq Whisper API
266
- with open(flac_path, "rb") as file:
267
  transcription = client.audio.transcriptions.create(
268
- file=(flac_path, file.read()),
269
  model="whisper-large-v3",
270
  prompt="Transkripsikan hanya bagian lirik lagu saja",
271
  language="id",
@@ -274,8 +246,6 @@ def transcribe():
274
  )
275
  transcribed_text = transcription.text.strip()
276
  os.remove(temp_audio_path)
277
- if os.path.exists(flac_path):
278
- os.remove(flac_path)
279
 
280
  # Step 2: BERT Prediction
281
  predicted_label, prob_results = bert_predict(transcribed_text)
 
20
  from faster_whisper import WhisperModel
21
  from groq import Groq
22
  import tempfile
 
23
  import os
24
  import datetime
25
  import time
 
208
  return predicted_label, prob_results
209
 
210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  # === ROUTES ===
212
 
213
 
 
235
 
236
  # Step 1: Transcribe
237
  # transcribed_text = faster_whisper(temp_audio_path).strip()
238
+ with open(temp_audio_path, "rb") as file:
 
 
 
 
239
  transcription = client.audio.transcriptions.create(
240
+ file=(temp_audio_path, file.read()),
241
  model="whisper-large-v3",
242
  prompt="Transkripsikan hanya bagian lirik lagu saja",
243
  language="id",
 
246
  )
247
  transcribed_text = transcription.text.strip()
248
  os.remove(temp_audio_path)
 
 
249
 
250
  # Step 2: BERT Prediction
251
  predicted_label, prob_results = bert_predict(transcribed_text)