Spaces:

fahadqazi
/

accent-classifier

Running

App Files Files Community

fahadqazi commited on 15 days ago

Commit

46b9035

1 Parent(s): aaf4096

added examples

Browse files

Files changed (5) hide show

.gitattributes +1 -0
app.py +57 -12
examples/{cleo-abram.mp4 → american.mp4} +0 -0
examples/british.mp4 +3 -0
examples/irish.mp4 +3 -0

.gitattributes CHANGED Viewed

@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 cleo-abram.mp4 filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 cleo-abram.mp4 filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -13,6 +13,27 @@ whisper_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-ti
 classifier = foreign_class(source="Jzuluaga/accent-id-commonaccent_xlsr-en-english", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier")
 # Placeholder accent classifier (replace with real one or your own logic)
 def classify_accent(audio_tensor, sample_rate):
     if sample_rate != 16000:
@@ -21,10 +42,16 @@ def classify_accent(audio_tensor, sample_rate):
     out_prob, score, index, text_lab = classifier.classify_batch(audio_tensor)
     return {
-        "accent": "American",
-        "confidence": 87.2,
-        "summary": "The speaker uses rhotic pronunciation and North American intonation."
     }
 def download_video(url):
@@ -119,23 +146,41 @@ with gr.Blocks() as demo:
     with gr.Tab("From URL"):
         url_input = gr.Textbox(label="Video URL (MP4)")
-        url_output = gr.Markdown()
         gr.Button("Analyze").click(fn=analyze_accent, inputs=url_input, outputs=url_output)
     with gr.Tab("From File"):
         file_input = gr.File(label="Upload MP4 Video", file_types=[".mp4"])
-        file_output = gr.Markdown()
         gr.Button("Analyze").click(fn=analyze_accent, inputs=file_input, outputs=file_output)
         gr.Examples(
-            examples=[
-                [os.getcwd() + "/examples/cleo-abram.mp4"],
-            ],
-            inputs=file_input,
-            outputs=file_output,
-            fn=analyze_accent,
-            label="Example MP4 Videos"
         )
 demo.launch()

 classifier = foreign_class(source="Jzuluaga/accent-id-commonaccent_xlsr-en-english", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier")
+ACCENT_LABELS = {
+    "us": "American Accent",
+    "england": "British Accent",
+    "australia": "Australian Accent",
+    "indian": "Indian Accent",
+    "canada": "Canadian Accent",
+    "bermuda": "Bermudian Accent",
+    "scotland": "Scottish Accent",
+    "african": "African Accent",
+    "ireland": "Irish Accent",
+    "newzealand": "New Zealand Accent",
+    "wales": "Welsh Accent",
+    "malaysia": "Malaysian Accent",
+    "philippines": "Philippine Accent",
+    "singapore": "Singaporean Accent",
+    "hongkong": "Hong Kong Accent",
+    "southatlandtic": "South Atlantic Accent"
+}
 # Placeholder accent classifier (replace with real one or your own logic)
 def classify_accent(audio_tensor, sample_rate):
     if sample_rate != 16000:
     out_prob, score, index, text_lab = classifier.classify_batch(audio_tensor)
+    print(out_prob, score, index, text_lab)
+    accent_label = text_lab[0]
+    readable_accent = ACCENT_LABELS.get(accent_label, accent_label.title() + " Accent")
     return {
+        "accent": readable_accent,
+        "confidence": round(score[0].item() * 100, 2),
+        "summary": f"The speaker is predicted to have a {readable_accent} with {round(score[0].item() * 100, 2)}% confidence."
     }
 def download_video(url):
     with gr.Tab("From URL"):
         url_input = gr.Textbox(label="Video URL (MP4)")
+        url_output = gr.Markdown("""### Output will be shown here!""", elem_classes="output-box")
         gr.Button("Analyze").click(fn=analyze_accent, inputs=url_input, outputs=url_output)
+        gr.Examples(
+            examples=[["https://huggingface.co/spaces/fahadqazi/accent-classifier/raw/main/examples/american.mp4"], ["https://huggingface.co/spaces/fahadqazi/accent-classifier/raw/main/examples/british.mp4"]],
+            inputs=[url_input],
+            outputs=[url_output],
+            label="Example MP4 Video URLs",
+            examples_per_page=5
+        )
     with gr.Tab("From File"):
         file_input = gr.File(label="Upload MP4 Video", file_types=[".mp4"])
+        file_output = gr.Markdown("""### Output will be shown here!""", elem_classes="output-box")
         gr.Button("Analyze").click(fn=analyze_accent, inputs=file_input, outputs=file_output)
         gr.Examples(
+            examples=[[os.getcwd() + "/examples/american.mp4"], [os.getcwd() + "/examples/british.mp4"]],
+            inputs=[file_input],
+            outputs=[file_output],
+            label="Example MP4 Videos",
+            examples_per_page=5
         )
+demo.css = """
+.output-box {
+    min-height: 100px;
+    overflow-y: auto;
+    padding: 10px;
+}
+"""
 demo.launch()

examples/{cleo-abram.mp4 → american.mp4} RENAMED Viewed

File without changes

examples/british.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:10d349270f5dd2f8f155dab6c61907778966ed1b4f496c851622658cc3332eb5
+size 1019730

examples/irish.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a43872434c23a80ae3d22c08bc9c5d51cfbd83168301f0e88dcfd65f4925140
+size 352320