navyaparesh commited on
Commit
5b530ca
Β·
verified Β·
1 Parent(s): 0b46c8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -15
app.py CHANGED
@@ -12,22 +12,40 @@ import spaces
12
  import nemo.collections.asr as nemo_asr
13
 
14
  LANGUAGE_NAME_TO_CODE = {
15
-
 
 
 
 
16
  "Hindi": "hi",
17
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  }
19
 
20
 
21
  DESCRIPTION = """\
22
  ### **IndicConformer: Speech Recognition for Indian Languages** πŸŽ™οΈβž‘οΈπŸ“œ
23
-
24
- **IndicConformer**, a speech recognition model for **22 Indian languages**. The model operates in two modes: **CTC (Connectionist Temporal Classification)** and **RNNT (Recurrent Neural Network Transducer)**
25
-
26
  #### **How to Use:**
27
- 1. **Upload or record** an audio clip in Hindi.
28
  2. Select the **mode** (CTC or RNNT) for transcription.
29
- 3. Click **"Transcribe"** to generate the corresponding text.
30
-
 
31
  """
32
 
33
  hf_token = os.getenv("HF_TOKEN")
@@ -41,8 +59,8 @@ model.eval()
41
  CACHE_EXAMPLES = os.getenv("CACHE_EXAMPLES") == "1" and torch.cuda.is_available()
42
 
43
  AUDIO_SAMPLE_RATE = 16000
44
- MAX_INPUT_AUDIO_LENGTH = 600 # in seconds
45
- DEFAULT_TARGET_LANGUAGE = "Hindi"
46
 
47
  @spaces.GPU
48
  def run_asr_ctc(input_audio: str, target_language: str) -> str:
@@ -135,6 +153,21 @@ with gr.Blocks() as demo_asr_ctc:
135
  btn = gr.Button("Transcribe")
136
  with gr.Column():
137
  output_text = gr.Textbox(label="Transcribed text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  btn.click(
139
  fn=run_asr_ctc,
140
  inputs=[input_audio, target_language],
@@ -156,6 +189,20 @@ with gr.Blocks() as demo_asr_rnnt:
156
  with gr.Column():
157
  output_text = gr.Textbox(label="Transcribed text")
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  btn.click(
160
  fn=run_asr_rnnt,
161
  inputs=[input_audio, target_language],
@@ -166,11 +213,11 @@ with gr.Blocks() as demo_asr_rnnt:
166
 
167
  with gr.Blocks(css="style.css") as demo:
168
  gr.Markdown(DESCRIPTION)
169
- # gr.DuplicateButton(
170
- # value="Duplicate Space for private use",
171
- # elem_id="duplicate-button",
172
- # visible=os.getenv("SHOW_DUPLICATE_BUTTON") == "1",
173
- # )
174
 
175
  with gr.Tabs():
176
  with gr.Tab(label="CTC"):
 
12
  import nemo.collections.asr as nemo_asr
13
 
14
  LANGUAGE_NAME_TO_CODE = {
15
+ "Assamese": "as",
16
+ "Bengali": "bn",
17
+ "Bodo": "br",
18
+ "Dogri": "doi",
19
+ "Gujarati": "gu",
20
  "Hindi": "hi",
21
+ "Kannada": "kn",
22
+ "Kashmiri": "ks",
23
+ "Konkani": "kok",
24
+ "Maithili": "mai",
25
+ "Malayalam": "ml",
26
+ "Manipuri": "mni",
27
+ "Marathi": "mr",
28
+ "Nepali": "ne",
29
+ "Odia": "or",
30
+ "Punjabi": "pa",
31
+ "Sanskrit": "sa",
32
+ "Santali": "sat",
33
+ "Sindhi": "sd",
34
+ "Tamil": "ta",
35
+ "Telugu": "te",
36
+ "Urdu": "ur"
37
  }
38
 
39
 
40
  DESCRIPTION = """\
41
  ### **IndicConformer: Speech Recognition for Indian Languages** πŸŽ™οΈβž‘οΈπŸ“œ
42
+ This Gradio demo showcases **IndicConformer**, a speech recognition model for **22 Indian languages**. The model operates in two modes: **CTC (Connectionist Temporal Classification)** and **RNNT (Recurrent Neural Network Transducer)**, providing robust and accurate transcriptions across diverse linguistic and acoustic conditions.
 
 
43
  #### **How to Use:**
44
+ 1. **Upload or record** an audio clip in any supported Indian language.
45
  2. Select the **mode** (CTC or RNNT) for transcription.
46
+ 3. Click **"Transcribe"** to generate the corresponding text in the target language.
47
+ 4. View or copy the output for further use.
48
+ πŸš€ Try it out and experience seamless speech recognition for Indian languages!
49
  """
50
 
51
  hf_token = os.getenv("HF_TOKEN")
 
59
  CACHE_EXAMPLES = os.getenv("CACHE_EXAMPLES") == "1" and torch.cuda.is_available()
60
 
61
  AUDIO_SAMPLE_RATE = 16000
62
+ MAX_INPUT_AUDIO_LENGTH = 60 # in seconds
63
+ DEFAULT_TARGET_LANGUAGE = "Bengali"
64
 
65
  @spaces.GPU
66
  def run_asr_ctc(input_audio: str, target_language: str) -> str:
 
153
  btn = gr.Button("Transcribe")
154
  with gr.Column():
155
  output_text = gr.Textbox(label="Transcribed text")
156
+
157
+ gr.Examples(
158
+ examples=[
159
+ ["assets/Bengali.wav", "Bengali", "English"],
160
+ ["assets/Gujarati.wav", "Gujarati", "Hindi"],
161
+ ["assets/Punjabi.wav", "Punjabi", "Hindi"],
162
+
163
+ ],
164
+ inputs=[input_audio, target_language],
165
+ outputs=output_text,
166
+ fn=run_asr_ctc,
167
+ cache_examples=CACHE_EXAMPLES,
168
+ api_name=False,
169
+ )
170
+
171
  btn.click(
172
  fn=run_asr_ctc,
173
  inputs=[input_audio, target_language],
 
189
  with gr.Column():
190
  output_text = gr.Textbox(label="Transcribed text")
191
 
192
+ gr.Examples(
193
+ examples=[
194
+ ["assets/Bengali.wav", "Bengali", "English"],
195
+ ["assets/Gujarati.wav", "Gujarati", "Hindi"],
196
+ ["assets/Punjabi.wav", "Punjabi", "Hindi"],
197
+
198
+ ],
199
+ inputs=[input_audio, target_language],
200
+ outputs=output_text,
201
+ fn=run_asr_rnnt,
202
+ cache_examples=CACHE_EXAMPLES,
203
+ api_name=False,
204
+ )
205
+
206
  btn.click(
207
  fn=run_asr_rnnt,
208
  inputs=[input_audio, target_language],
 
213
 
214
  with gr.Blocks(css="style.css") as demo:
215
  gr.Markdown(DESCRIPTION)
216
+ gr.DuplicateButton(
217
+ value="Duplicate Space for private use",
218
+ elem_id="duplicate-button",
219
+ visible=os.getenv("SHOW_DUPLICATE_BUTTON") == "1",
220
+ )
221
 
222
  with gr.Tabs():
223
  with gr.Tab(label="CTC"):