Futuresony commited on
Commit
0f76ae4
·
verified ·
1 Parent(s): e0e18ea

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -0
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import torchaudio
4
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
5
+
6
+ # Load your model from Hugging Face
7
+ model_name = "Futuresony/Future-sw_ASR-24-02-2025"
8
+ processor = Wav2Vec2Processor.from_pretrained(model_name)
9
+ model = Wav2Vec2ForCTC.from_pretrained(model_name)
10
+
11
+ # Function to transcribe audio
12
+ def transcribe(audio_file):
13
+ speech_array, sample_rate = torchaudio.load(audio_file)
14
+
15
+ # Resample to 16kHz
16
+ resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
17
+ speech_array = resampler(speech_array).squeeze().numpy()
18
+
19
+ # Process and transcribe
20
+ input_values = processor(speech_array, sampling_rate=16000, return_tensors="pt").input_values
21
+ with torch.no_grad():
22
+ logits = model(input_values).logits
23
+ predicted_ids = torch.argmax(logits, dim=-1)
24
+
25
+ # Decode the text
26
+ transcription = processor.batch_decode(predicted_ids)[0]
27
+ return transcription
28
+
29
+ # Create Gradio interface
30
+ interface = gr.Interface(
31
+ fn=transcribe,
32
+ inputs=gr.Audio(type="filepath"),
33
+ outputs="text",
34
+ title="Swahili ASR Transcription",
35
+ description="Upload a Swahili audio file, and the model will transcribe the speech.",
36
+ )
37
+
38
+ # Launch the app
39
+ if __name__ == "__main__":
40
+ interface.launch()
41
+