Athspi commited on
Commit
a274161
·
verified ·
1 Parent(s): fce1940

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -16
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  import whisper
3
  import os
4
  from pydub import AudioSegment
 
5
 
6
  # Mapping of model names to Whisper model sizes
7
  MODELS = {
@@ -12,8 +13,8 @@ MODELS = {
12
  "Large (Most Accurate)": "large"
13
  }
14
 
15
- # Fine-tuned Sinhala model
16
- SINHALA_MODEL = "malakazzz/Subhaka-whisper-small-Sinhala-Fine_Tune"
17
 
18
  # Mapping of full language names to language codes
19
  LANGUAGE_NAME_TO_CODE = {
@@ -121,14 +122,6 @@ LANGUAGE_NAME_TO_CODE = {
121
 
122
  def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faster)"):
123
  """Transcribe the audio file."""
124
- # Load the appropriate model
125
- if language == "Sinhala":
126
- # Use the fine-tuned Sinhala model
127
- model = gr.load(SINHALA_MODEL)
128
- else:
129
- # Use the selected Whisper model
130
- model = whisper.load_model(MODELS[model_size])
131
-
132
  # Convert audio to 16kHz mono for better compatibility with Whisper
133
  audio = AudioSegment.from_file(audio_file)
134
  audio = audio.set_frame_rate(16000).set_channels(1)
@@ -136,13 +129,20 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faste
136
  audio.export(processed_audio_path, format="wav")
137
 
138
  # Transcribe the audio
139
- if language == "Auto Detect":
140
- result = model.transcribe(processed_audio_path, fp16=False) # Auto-detect language
141
- detected_language = result.get("language", "unknown")
 
142
  else:
143
- language_code = LANGUAGE_NAME_TO_CODE.get(language, "en") # Default to English if not found
144
- result = model.transcribe(processed_audio_path, language=language_code, fp16=False)
145
- detected_language = language_code
 
 
 
 
 
 
146
 
147
  # Clean up processed audio file
148
  os.remove(processed_audio_path)
 
2
  import whisper
3
  import os
4
  from pydub import AudioSegment
5
+ from transformers import pipeline
6
 
7
  # Mapping of model names to Whisper model sizes
8
  MODELS = {
 
13
  "Large (Most Accurate)": "large"
14
  }
15
 
16
+ # Fine-tuned Sinhala model using Hugging Face pipeline
17
+ SINHALA_PIPELINE = pipeline("automatic-speech-recognition", model="Subhaka/whisper-small-Sinhala-Fine_Tune")
18
 
19
  # Mapping of full language names to language codes
20
  LANGUAGE_NAME_TO_CODE = {
 
122
 
123
  def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faster)"):
124
  """Transcribe the audio file."""
 
 
 
 
 
 
 
 
125
  # Convert audio to 16kHz mono for better compatibility with Whisper
126
  audio = AudioSegment.from_file(audio_file)
127
  audio = audio.set_frame_rate(16000).set_channels(1)
 
129
  audio.export(processed_audio_path, format="wav")
130
 
131
  # Transcribe the audio
132
+ if language == "Sinhala":
133
+ # Use the fine-tuned Sinhala model
134
+ result = SINHALA_PIPELINE(processed_audio_path)
135
+ detected_language = "si"
136
  else:
137
+ # Use the selected Whisper model
138
+ model = whisper.load_model(MODELS[model_size])
139
+ if language == "Auto Detect":
140
+ result = model.transcribe(processed_audio_path, fp16=False) # Auto-detect language
141
+ detected_language = result.get("language", "unknown")
142
+ else:
143
+ language_code = LANGUAGE_NAME_TO_CODE.get(language, "en") # Default to English if not found
144
+ result = model.transcribe(processed_audio_path, language=language_code, fp16=False)
145
+ detected_language = language_code
146
 
147
  # Clean up processed audio file
148
  os.remove(processed_audio_path)