saeedzou commited on
Commit
fe1d6ad
·
1 Parent(s): 934c916

Initial commit: Gradio app for private NeMo ASR model

Browse files
Files changed (2) hide show
  1. app.py +27 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import nemo.collections.asr as nemo_asr
3
+
4
+ # Load your private model (assuming you already have access credentials or it is publicly available)
5
+ asr_model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained(model_name="faimlab/stt_fa_fastconformer_hybrid_large_dataset_v30")
6
+
7
+ # Define a function that takes an audio file, transcribes it, and returns the text
8
+ def transcribe_audio(audio_file):
9
+ # Convert the audio file to the correct format (16k mono)
10
+ audio_path = audio_file.name # Temporary location of the uploaded audio file
11
+ # Assuming the model expects 16k mono audio, no need for conversion if file is correct format.
12
+ output = asr_model.transcribe([audio_path])
13
+ return output[0].text # Return transcribed text
14
+
15
+ # Create the Gradio interface
16
+ iface = gr.Interface(
17
+ fn=transcribe_audio,
18
+ inputs=gr.inputs.Audio(source="upload", type="file", label="Upload an Audio File"),
19
+ outputs="text",
20
+ live=True,
21
+ title="Speech-to-Text with Private ASR Model",
22
+ description="Upload a 16kHz mono audio file for transcription."
23
+ )
24
+
25
+ # Launch the Gradio app
26
+ if __name__ == "__main__":
27
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ pydub
3
+ librosa
4
+ git+https://github.com/NVIDIA/NeMo.git@main#egg=nemo_toolkit[all]
5
+ huggingface_hub