Spaces:
Runtime error
Runtime error
Michael Natanael
commited on
Commit
·
aad2b2d
1
Parent(s):
82b14e6
Revert "konversi audio ke FLAC (mono 16kHz) menggunakan ffmpeg sebelum dikirim ke Groq Whisper API untuk transkripsi"
Browse filesThis reverts commit 82b14e678f2ebb230f9db2d23bdda576cd6c73d6.
app.py
CHANGED
@@ -20,7 +20,6 @@ from werkzeug.security import generate_password_hash, check_password_hash
|
|
20 |
from faster_whisper import WhisperModel
|
21 |
from groq import Groq
|
22 |
import tempfile
|
23 |
-
import subprocess
|
24 |
import os
|
25 |
import datetime
|
26 |
import time
|
@@ -209,29 +208,6 @@ def bert_predict(input_lyric):
|
|
209 |
return predicted_label, prob_results
|
210 |
|
211 |
|
212 |
-
# Fungsi konversi audio ke FLAC mono 16kHz
|
213 |
-
def convert_audio_to_flac(input_path):
|
214 |
-
output_path = tempfile.NamedTemporaryFile(suffix=".flac", delete=False).name
|
215 |
-
subprocess.run(
|
216 |
-
[
|
217 |
-
"ffmpeg",
|
218 |
-
"-i",
|
219 |
-
input_path,
|
220 |
-
"-ar",
|
221 |
-
"16000",
|
222 |
-
"-ac",
|
223 |
-
"1",
|
224 |
-
"-map",
|
225 |
-
"0:a",
|
226 |
-
"-c:a",
|
227 |
-
"flac",
|
228 |
-
output_path,
|
229 |
-
],
|
230 |
-
check=True,
|
231 |
-
)
|
232 |
-
return output_path
|
233 |
-
|
234 |
-
|
235 |
# === ROUTES ===
|
236 |
|
237 |
|
@@ -259,13 +235,9 @@ def transcribe():
|
|
259 |
|
260 |
# Step 1: Transcribe
|
261 |
# transcribed_text = faster_whisper(temp_audio_path).strip()
|
262 |
-
|
263 |
-
flac_path = convert_audio_to_flac(temp_audio_path)
|
264 |
-
|
265 |
-
# Kirim ke Groq Whisper API
|
266 |
-
with open(flac_path, "rb") as file:
|
267 |
transcription = client.audio.transcriptions.create(
|
268 |
-
file=(
|
269 |
model="whisper-large-v3",
|
270 |
prompt="Transkripsikan hanya bagian lirik lagu saja",
|
271 |
language="id",
|
@@ -274,8 +246,6 @@ def transcribe():
|
|
274 |
)
|
275 |
transcribed_text = transcription.text.strip()
|
276 |
os.remove(temp_audio_path)
|
277 |
-
if os.path.exists(flac_path):
|
278 |
-
os.remove(flac_path)
|
279 |
|
280 |
# Step 2: BERT Prediction
|
281 |
predicted_label, prob_results = bert_predict(transcribed_text)
|
|
|
20 |
from faster_whisper import WhisperModel
|
21 |
from groq import Groq
|
22 |
import tempfile
|
|
|
23 |
import os
|
24 |
import datetime
|
25 |
import time
|
|
|
208 |
return predicted_label, prob_results
|
209 |
|
210 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
# === ROUTES ===
|
212 |
|
213 |
|
|
|
235 |
|
236 |
# Step 1: Transcribe
|
237 |
# transcribed_text = faster_whisper(temp_audio_path).strip()
|
238 |
+
with open(temp_audio_path, "rb") as file:
|
|
|
|
|
|
|
|
|
239 |
transcription = client.audio.transcriptions.create(
|
240 |
+
file=(temp_audio_path, file.read()),
|
241 |
model="whisper-large-v3",
|
242 |
prompt="Transkripsikan hanya bagian lirik lagu saja",
|
243 |
language="id",
|
|
|
246 |
)
|
247 |
transcribed_text = transcription.text.strip()
|
248 |
os.remove(temp_audio_path)
|
|
|
|
|
249 |
|
250 |
# Step 2: BERT Prediction
|
251 |
predicted_label, prob_results = bert_predict(transcribed_text)
|