Athspi commited on
Commit
959d3d3
·
verified ·
1 Parent(s): b995bb6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -27
app.py CHANGED
@@ -3,30 +3,14 @@ import whisper
3
  import os
4
  from pydub import AudioSegment
5
 
6
- # Load a larger Whisper model for better accuracy
7
- model = whisper.load_model("medium") # Use "medium" or "large" for better results
8
-
9
- def transcribe_audio(audio_file, language="Auto Detect"):
10
- # Convert audio to 16kHz mono for better compatibility with Whisper
11
- audio = AudioSegment.from_file(audio_file)
12
- audio = audio.set_frame_rate(16000).set_channels(1)
13
- processed_audio_path = "processed_audio.wav"
14
- audio.export(processed_audio_path, format="wav")
15
-
16
- # Transcribe the audio
17
- if language == "Auto Detect":
18
- result = model.transcribe(processed_audio_path, fp16=False) # Auto-detect language
19
- detected_language = result.get("language", "unknown")
20
- else:
21
- language_code = LANGUAGE_NAME_TO_CODE.get(language, "en") # Default to English if not found
22
- result = model.transcribe(processed_audio_path, language=language_code, fp16=False)
23
- detected_language = language_code
24
-
25
- # Clean up processed audio file
26
- os.remove(processed_audio_path)
27
-
28
- # Return transcription and detected language
29
- return f"Detected Language: {detected_language}\n\nTranscription:\n{result['text']}"
30
 
31
  # Mapping of full language names to language codes
32
  LANGUAGE_NAME_TO_CODE = {
@@ -94,7 +78,7 @@ LANGUAGE_NAME_TO_CODE = {
94
  "Galician": "gl",
95
  "Marathi": "mr",
96
  "Punjabi": "pa",
97
- "Sinhala": "si",
98
  "Khmer": "km",
99
  "Shona": "sn",
100
  "Yoruba": "yo",
@@ -132,6 +116,31 @@ LANGUAGE_NAME_TO_CODE = {
132
  "Sundanese": "su",
133
  }
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  # Define the Gradio interface
136
  iface = gr.Interface(
137
  fn=transcribe_audio,
@@ -141,11 +150,16 @@ iface = gr.Interface(
141
  choices=list(LANGUAGE_NAME_TO_CODE.keys()), # Full language names
142
  label="Select Language",
143
  value="Auto Detect"
 
 
 
 
 
144
  )
145
  ],
146
  outputs=gr.Textbox(label="Transcription and Detected Language"),
147
- title="Audio Transcription with Language Selection",
148
- description="Upload an audio file and select a language (or choose 'Auto Detect')."
149
  )
150
 
151
  # Launch the Gradio interface
 
3
  import os
4
  from pydub import AudioSegment
5
 
6
+ # Mapping of model names to Whisper model sizes
7
+ MODELS = {
8
+ "Tiny (Fastest)": "tiny",
9
+ "Base (Faster)": "base",
10
+ "Small (Balanced)": "small",
11
+ "Medium (Accurate)": "medium",
12
+ "Large (Most Accurate)": "large"
13
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  # Mapping of full language names to language codes
16
  LANGUAGE_NAME_TO_CODE = {
 
78
  "Galician": "gl",
79
  "Marathi": "mr",
80
  "Punjabi": "pa",
81
+ "Sinhala": "si", # Sinhala support
82
  "Khmer": "km",
83
  "Shona": "sn",
84
  "Yoruba": "yo",
 
116
  "Sundanese": "su",
117
  }
118
 
119
+ def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faster)"):
120
+ # Load the selected Whisper model
121
+ model = whisper.load_model(MODELS[model_size])
122
+
123
+ # Convert audio to 16kHz mono for better compatibility with Whisper
124
+ audio = AudioSegment.from_file(audio_file)
125
+ audio = audio.set_frame_rate(16000).set_channels(1)
126
+ processed_audio_path = "processed_audio.wav"
127
+ audio.export(processed_audio_path, format="wav")
128
+
129
+ # Transcribe the audio
130
+ if language == "Auto Detect":
131
+ result = model.transcribe(processed_audio_path, fp16=False) # Auto-detect language
132
+ detected_language = result.get("language", "unknown")
133
+ else:
134
+ language_code = LANGUAGE_NAME_TO_CODE.get(language, "en") # Default to English if not found
135
+ result = model.transcribe(processed_audio_path, language=language_code, fp16=False)
136
+ detected_language = language_code
137
+
138
+ # Clean up processed audio file
139
+ os.remove(processed_audio_path)
140
+
141
+ # Return transcription and detected language
142
+ return f"Detected Language: {detected_language}\n\nTranscription:\n{result['text']}"
143
+
144
  # Define the Gradio interface
145
  iface = gr.Interface(
146
  fn=transcribe_audio,
 
150
  choices=list(LANGUAGE_NAME_TO_CODE.keys()), # Full language names
151
  label="Select Language",
152
  value="Auto Detect"
153
+ ),
154
+ gr.Dropdown(
155
+ choices=list(MODELS.keys()), # Model options
156
+ label="Select Model",
157
+ value="Base (Faster)" # Default to "Base" model
158
  )
159
  ],
160
  outputs=gr.Textbox(label="Transcription and Detected Language"),
161
+ title="Audio Transcription with Language and Model Selection",
162
+ description="Upload an audio file, select a language (or choose 'Auto Detect'), and choose a model for transcription."
163
  )
164
 
165
  # Launch the Gradio interface