Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
|
|
2 |
import whisper
|
3 |
import os
|
4 |
from pydub import AudioSegment
|
|
|
5 |
|
6 |
# Mapping of model names to Whisper model sizes
|
7 |
MODELS = {
|
@@ -12,8 +13,8 @@ MODELS = {
|
|
12 |
"Large (Most Accurate)": "large"
|
13 |
}
|
14 |
|
15 |
-
# Fine-tuned Sinhala model
|
16 |
-
|
17 |
|
18 |
# Mapping of full language names to language codes
|
19 |
LANGUAGE_NAME_TO_CODE = {
|
@@ -121,14 +122,6 @@ LANGUAGE_NAME_TO_CODE = {
|
|
121 |
|
122 |
def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faster)"):
|
123 |
"""Transcribe the audio file."""
|
124 |
-
# Load the appropriate model
|
125 |
-
if language == "Sinhala":
|
126 |
-
# Use the fine-tuned Sinhala model
|
127 |
-
model = gr.load(SINHALA_MODEL)
|
128 |
-
else:
|
129 |
-
# Use the selected Whisper model
|
130 |
-
model = whisper.load_model(MODELS[model_size])
|
131 |
-
|
132 |
# Convert audio to 16kHz mono for better compatibility with Whisper
|
133 |
audio = AudioSegment.from_file(audio_file)
|
134 |
audio = audio.set_frame_rate(16000).set_channels(1)
|
@@ -136,13 +129,20 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faste
|
|
136 |
audio.export(processed_audio_path, format="wav")
|
137 |
|
138 |
# Transcribe the audio
|
139 |
-
if language == "
|
140 |
-
|
141 |
-
|
|
|
142 |
else:
|
143 |
-
|
144 |
-
|
145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
|
147 |
# Clean up processed audio file
|
148 |
os.remove(processed_audio_path)
|
|
|
2 |
import whisper
|
3 |
import os
|
4 |
from pydub import AudioSegment
|
5 |
+
from transformers import pipeline
|
6 |
|
7 |
# Mapping of model names to Whisper model sizes
|
8 |
MODELS = {
|
|
|
13 |
"Large (Most Accurate)": "large"
|
14 |
}
|
15 |
|
16 |
+
# Fine-tuned Sinhala model using Hugging Face pipeline
|
17 |
+
SINHALA_PIPELINE = pipeline("automatic-speech-recognition", model="Subhaka/whisper-small-Sinhala-Fine_Tune")
|
18 |
|
19 |
# Mapping of full language names to language codes
|
20 |
LANGUAGE_NAME_TO_CODE = {
|
|
|
122 |
|
123 |
def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faster)"):
|
124 |
"""Transcribe the audio file."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
# Convert audio to 16kHz mono for better compatibility with Whisper
|
126 |
audio = AudioSegment.from_file(audio_file)
|
127 |
audio = audio.set_frame_rate(16000).set_channels(1)
|
|
|
129 |
audio.export(processed_audio_path, format="wav")
|
130 |
|
131 |
# Transcribe the audio
|
132 |
+
if language == "Sinhala":
|
133 |
+
# Use the fine-tuned Sinhala model
|
134 |
+
result = SINHALA_PIPELINE(processed_audio_path)
|
135 |
+
detected_language = "si"
|
136 |
else:
|
137 |
+
# Use the selected Whisper model
|
138 |
+
model = whisper.load_model(MODELS[model_size])
|
139 |
+
if language == "Auto Detect":
|
140 |
+
result = model.transcribe(processed_audio_path, fp16=False) # Auto-detect language
|
141 |
+
detected_language = result.get("language", "unknown")
|
142 |
+
else:
|
143 |
+
language_code = LANGUAGE_NAME_TO_CODE.get(language, "en") # Default to English if not found
|
144 |
+
result = model.transcribe(processed_audio_path, language=language_code, fp16=False)
|
145 |
+
detected_language = language_code
|
146 |
|
147 |
# Clean up processed audio file
|
148 |
os.remove(processed_audio_path)
|