Spaces:

navyaparesh
/

Modeltest

Sleeping

App Files Files Community

navyaparesh commited on Apr 9

Commit

5b530ca

verified ·

1 Parent(s): 0b46c8a

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -15

app.py CHANGED Viewed

@@ -12,22 +12,40 @@ import spaces
 import nemo.collections.asr as nemo_asr
 LANGUAGE_NAME_TO_CODE = {
     "Hindi": "hi",
 }
 DESCRIPTION = """\
 ### **IndicConformer: Speech Recognition for Indian Languages** 🎙️➡️📜
-**IndicConformer**, a speech recognition model for **22 Indian languages**. The model operates in two modes: **CTC (Connectionist Temporal Classification)** and **RNNT (Recurrent Neural Network Transducer)**
 #### **How to Use:**
-1. **Upload or record** an audio clip in Hindi.
 2. Select the **mode** (CTC or RNNT) for transcription.
-3. Click **"Transcribe"** to generate the corresponding text.
 """
 hf_token = os.getenv("HF_TOKEN")
@@ -41,8 +59,8 @@ model.eval()
 CACHE_EXAMPLES = os.getenv("CACHE_EXAMPLES") == "1" and torch.cuda.is_available()
 AUDIO_SAMPLE_RATE = 16000
-MAX_INPUT_AUDIO_LENGTH = 600  # in seconds
-DEFAULT_TARGET_LANGUAGE = "Hindi"
 @spaces.GPU
 def run_asr_ctc(input_audio: str, target_language: str) -> str:
@@ -135,6 +153,21 @@ with gr.Blocks() as demo_asr_ctc:
             btn = gr.Button("Transcribe")
         with gr.Column():
             output_text = gr.Textbox(label="Transcribed text")
     btn.click(
         fn=run_asr_ctc,
         inputs=[input_audio, target_language],
@@ -156,6 +189,20 @@ with gr.Blocks() as demo_asr_rnnt:
         with gr.Column():
             output_text = gr.Textbox(label="Transcribed text")
     btn.click(
         fn=run_asr_rnnt,
         inputs=[input_audio, target_language],
@@ -166,11 +213,11 @@ with gr.Blocks() as demo_asr_rnnt:
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
-#    gr.DuplicateButton(
-#        value="Duplicate Space for private use",
-#        elem_id="duplicate-button",
-#        visible=os.getenv("SHOW_DUPLICATE_BUTTON") == "1",
-#    )
     with gr.Tabs():
         with gr.Tab(label="CTC"):

 import nemo.collections.asr as nemo_asr
 LANGUAGE_NAME_TO_CODE = {
+    "Assamese": "as",
+    "Bengali": "bn",
+    "Bodo": "br",
+    "Dogri": "doi",
+    "Gujarati": "gu",
     "Hindi": "hi",
+    "Kannada": "kn",
+    "Kashmiri": "ks",
+    "Konkani": "kok",
+    "Maithili": "mai",
+    "Malayalam": "ml",
+    "Manipuri": "mni",
+    "Marathi": "mr",
+    "Nepali": "ne",
+    "Odia": "or",
+    "Punjabi": "pa",
+    "Sanskrit": "sa",
+    "Santali": "sat",
+    "Sindhi": "sd",
+    "Tamil": "ta",
+    "Telugu": "te",
+    "Urdu": "ur"
 }
 DESCRIPTION = """\
 ### **IndicConformer: Speech Recognition for Indian Languages** 🎙️➡️📜
+This Gradio demo showcases **IndicConformer**, a speech recognition model for **22 Indian languages**. The model operates in two modes: **CTC (Connectionist Temporal Classification)** and **RNNT (Recurrent Neural Network Transducer)**, providing robust and accurate transcriptions across diverse linguistic and acoustic conditions.
 #### **How to Use:**
+1. **Upload or record** an audio clip in any supported Indian language.
 2. Select the **mode** (CTC or RNNT) for transcription.
+3. Click **"Transcribe"** to generate the corresponding text in the target language.
+4. View or copy the output for further use.
+🚀 Try it out and experience seamless speech recognition for Indian languages!
 """
 hf_token = os.getenv("HF_TOKEN")
 CACHE_EXAMPLES = os.getenv("CACHE_EXAMPLES") == "1" and torch.cuda.is_available()
 AUDIO_SAMPLE_RATE = 16000
+MAX_INPUT_AUDIO_LENGTH = 60  # in seconds
+DEFAULT_TARGET_LANGUAGE = "Bengali"
 @spaces.GPU
 def run_asr_ctc(input_audio: str, target_language: str) -> str:
             btn = gr.Button("Transcribe")
         with gr.Column():
             output_text = gr.Textbox(label="Transcribed text")
+    gr.Examples(
+        examples=[
+            ["assets/Bengali.wav", "Bengali", "English"],
+            ["assets/Gujarati.wav", "Gujarati", "Hindi"],
+            ["assets/Punjabi.wav", "Punjabi", "Hindi"],
+        ],
+        inputs=[input_audio, target_language],
+        outputs=output_text,
+        fn=run_asr_ctc,
+        cache_examples=CACHE_EXAMPLES,
+        api_name=False,
+    )
     btn.click(
         fn=run_asr_ctc,
         inputs=[input_audio, target_language],
         with gr.Column():
             output_text = gr.Textbox(label="Transcribed text")
+    gr.Examples(
+        examples=[
+            ["assets/Bengali.wav", "Bengali", "English"],
+            ["assets/Gujarati.wav", "Gujarati", "Hindi"],
+            ["assets/Punjabi.wav", "Punjabi", "Hindi"],
+        ],
+        inputs=[input_audio, target_language],
+        outputs=output_text,
+        fn=run_asr_rnnt,
+        cache_examples=CACHE_EXAMPLES,
+        api_name=False,
+    )
     btn.click(
         fn=run_asr_rnnt,
         inputs=[input_audio, target_language],
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
+    gr.DuplicateButton(
+        value="Duplicate Space for private use",
+        elem_id="duplicate-button",
+        visible=os.getenv("SHOW_DUPLICATE_BUTTON") == "1",
+    )
     with gr.Tabs():
         with gr.Tab(label="CTC"):