Athspi commited on
Commit
c53ccee
·
verified ·
1 Parent(s): 959d3d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -40
app.py CHANGED
@@ -3,14 +3,50 @@ import whisper
3
  import os
4
  from pydub import AudioSegment
5
 
6
- # Mapping of model names to Whisper model sizes
7
- MODELS = {
8
- "Tiny (Fastest)": "tiny",
9
- "Base (Faster)": "base",
10
- "Small (Balanced)": "small",
11
- "Medium (Accurate)": "medium",
12
- "Large (Most Accurate)": "large"
13
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  # Mapping of full language names to language codes
16
  LANGUAGE_NAME_TO_CODE = {
@@ -116,31 +152,6 @@ LANGUAGE_NAME_TO_CODE = {
116
  "Sundanese": "su",
117
  }
118
 
119
- def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faster)"):
120
- # Load the selected Whisper model
121
- model = whisper.load_model(MODELS[model_size])
122
-
123
- # Convert audio to 16kHz mono for better compatibility with Whisper
124
- audio = AudioSegment.from_file(audio_file)
125
- audio = audio.set_frame_rate(16000).set_channels(1)
126
- processed_audio_path = "processed_audio.wav"
127
- audio.export(processed_audio_path, format="wav")
128
-
129
- # Transcribe the audio
130
- if language == "Auto Detect":
131
- result = model.transcribe(processed_audio_path, fp16=False) # Auto-detect language
132
- detected_language = result.get("language", "unknown")
133
- else:
134
- language_code = LANGUAGE_NAME_TO_CODE.get(language, "en") # Default to English if not found
135
- result = model.transcribe(processed_audio_path, language=language_code, fp16=False)
136
- detected_language = language_code
137
-
138
- # Clean up processed audio file
139
- os.remove(processed_audio_path)
140
-
141
- # Return transcription and detected language
142
- return f"Detected Language: {detected_language}\n\nTranscription:\n{result['text']}"
143
-
144
  # Define the Gradio interface
145
  iface = gr.Interface(
146
  fn=transcribe_audio,
@@ -150,16 +161,11 @@ iface = gr.Interface(
150
  choices=list(LANGUAGE_NAME_TO_CODE.keys()), # Full language names
151
  label="Select Language",
152
  value="Auto Detect"
153
- ),
154
- gr.Dropdown(
155
- choices=list(MODELS.keys()), # Model options
156
- label="Select Model",
157
- value="Base (Faster)" # Default to "Base" model
158
  )
159
  ],
160
  outputs=gr.Textbox(label="Transcription and Detected Language"),
161
- title="Audio Transcription with Language and Model Selection",
162
- description="Upload an audio file, select a language (or choose 'Auto Detect'), and choose a model for transcription."
163
  )
164
 
165
  # Launch the Gradio interface
 
3
  import os
4
  from pydub import AudioSegment
5
 
6
+ # Load the base Whisper model
7
+ base_model = whisper.load_model("base") # Default model for non-Sinhala languages
8
+
9
+ # Load the fine-tuned Sinhala model (if available)
10
+ sinhala_model = None
11
+ try:
12
+ from transformers import WhisperForConditionalGeneration, WhisperProcessor
13
+ sinhala_model = WhisperForConditionalGeneration.from_pretrained("Subhaka/whisper-small-Sinhala-Fine_Tune")
14
+ sinhala_processor = WhisperProcessor.from_pretrained("Subhaka/whisper-small-Sinhala-Fine_Tune")
15
+ except Exception as e:
16
+ print("Failed to load fine-tuned Sinhala model. Falling back to the base model.")
17
+ print(f"Error: {e}")
18
+
19
+ def transcribe_audio(audio_file, language="Auto Detect"):
20
+ # Convert audio to 16kHz mono for better compatibility with Whisper
21
+ audio = AudioSegment.from_file(audio_file)
22
+ audio = audio.set_frame_rate(16000).set_channels(1)
23
+ processed_audio_path = "processed_audio.wav"
24
+ audio.export(processed_audio_path, format="wav")
25
+
26
+ # Load the appropriate model based on the selected language
27
+ if language == "Sinhala" and sinhala_model is not None:
28
+ print("Using fine-tuned Sinhala model.")
29
+ model = sinhala_model
30
+ processor = sinhala_processor
31
+ else:
32
+ print("Using base Whisper model.")
33
+ model = base_model
34
+ processor = None
35
+
36
+ # Transcribe the audio
37
+ if language == "Auto Detect":
38
+ result = model.transcribe(processed_audio_path, fp16=False) # Auto-detect language
39
+ detected_language = result.get("language", "unknown")
40
+ else:
41
+ language_code = LANGUAGE_NAME_TO_CODE.get(language, "en") # Default to English if not found
42
+ result = model.transcribe(processed_audio_path, language=language_code, fp16=False)
43
+ detected_language = language_code
44
+
45
+ # Clean up processed audio file
46
+ os.remove(processed_audio_path)
47
+
48
+ # Return transcription and detected language
49
+ return f"Detected Language: {detected_language}\n\nTranscription:\n{result['text']}"
50
 
51
  # Mapping of full language names to language codes
52
  LANGUAGE_NAME_TO_CODE = {
 
152
  "Sundanese": "su",
153
  }
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  # Define the Gradio interface
156
  iface = gr.Interface(
157
  fn=transcribe_audio,
 
161
  choices=list(LANGUAGE_NAME_TO_CODE.keys()), # Full language names
162
  label="Select Language",
163
  value="Auto Detect"
 
 
 
 
 
164
  )
165
  ],
166
  outputs=gr.Textbox(label="Transcription and Detected Language"),
167
+ title="Audio Transcription with Language Selection",
168
+ description="Upload an audio file and select a language (or choose 'Auto Detect'). For Sinhala, a fine-tuned model will be used automatically."
169
  )
170
 
171
  # Launch the Gradio interface