Spaces:
Running
on
Zero
Running
on
Zero
layout
Browse files
app.py
CHANGED
@@ -529,113 +529,199 @@ def process_video(video_file):
|
|
529 |
logger.error(f"Traceback: {traceback.format_exc()}")
|
530 |
return f"Error processing video: {str(e)}\n\nPlease check that your video is a valid ASL video under 10 seconds."
|
531 |
|
532 |
-
# Create Gradio interface
|
533 |
-
def create_interface():
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
|
538 |
|
539 |
-
This app uses TTIC's foundation model SHuBERT (introduced in an ACL 2025 paper, see http://shubert.pals.ttic.edu).
|
540 |
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
|
552 |
-
|
553 |
-
|
554 |
|
555 |
|
556 |
-
# dailymoth_pathlist = download_example_videos()
|
557 |
-
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
|
566 |
-
|
567 |
-
|
568 |
-
|
569 |
-
|
570 |
-
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
|
577 |
-
|
578 |
|
579 |
-
|
580 |
-
|
581 |
-
|
582 |
-
|
583 |
-
|
584 |
-
|
585 |
|
586 |
-
|
587 |
-
|
588 |
|
589 |
-
|
590 |
-
|
591 |
-
|
592 |
-
|
593 |
-
|
594 |
-
|
595 |
-
|
596 |
-
|
597 |
-
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
|
603 |
-
|
604 |
-
|
605 |
-
|
606 |
-
|
607 |
-
|
608 |
-
|
609 |
-
|
610 |
-
|
611 |
-
|
612 |
-
|
613 |
-
|
614 |
-
# video_input = gr.Video(label="ASL Video (under 20 seconds)", format="mp4", height=480, width=640)
|
615 |
-
# text_output = gr.Textbox(label="English Translation", lines=5)
|
616 |
|
617 |
|
618 |
|
619 |
-
# interface = gr.Interface(
|
620 |
-
# fn=process_video,
|
621 |
-
# inputs=video_input,
|
622 |
-
# outputs=text_output,
|
623 |
-
# title="ASL Video to English Text Translation",
|
624 |
-
# description=description,
|
625 |
-
# article="",
|
626 |
-
# # examples=dailymoth_pathlist,
|
627 |
-
# # example_labels=["Officials with an EU force said they are searching for the missing."],
|
628 |
-
# allow_flagging="never",
|
629 |
-
# )
|
630 |
|
631 |
-
# gr.Examples(
|
632 |
-
# examples=dailymoth_pathlist,
|
633 |
-
# inputs=video_input,
|
634 |
-
# label="Click a video to try an example"
|
635 |
-
# )
|
636 |
-
|
637 |
|
638 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
639 |
|
640 |
# Create the demo
|
641 |
demo = create_interface()
|
|
|
529 |
logger.error(f"Traceback: {traceback.format_exc()}")
|
530 |
return f"Error processing video: {str(e)}\n\nPlease check that your video is a valid ASL video under 10 seconds."
|
531 |
|
532 |
+
# # Create Gradio interface
|
533 |
+
# def create_interface():
|
534 |
+
# """Create the Gradio interface"""
|
535 |
+
# description = """
|
536 |
+
# Upload an ASL* video to get an English translation. *Sign languages belonging to the same sign language family as ASL (e.g. Ghanaian Sign Language, as well as others listed in Table 7, Row 1 of https://aclanthology.org/2023.findings-emnlp.664.pdf) might also have non-trivial performance, although the model is trained only on ASL data.
|
537 |
|
538 |
|
539 |
+
# This app uses TTIC's foundation model SHuBERT (introduced in an ACL 2025 paper, see http://shubert.pals.ttic.edu).
|
540 |
|
541 |
+
# **Requirements:**
|
542 |
+
# - We recommend that videos be under 20 seconds. Performance for longer videos has not been tested.
|
543 |
+
# - The signer should be the main part (e.g. 90% of the area) of the video. Videos recorded from a phone camera, tablet, or personal computer should work well. Studio recordings where the signer is farther from the camera may not work as well.
|
544 |
+
# - Supported formats: MP4, MOV
|
545 |
+
|
546 |
+
# **Note:**
|
547 |
+
# - This is just a demo of a research project, and should NOT be used to replace an interpreter in any way.
|
548 |
+
# - Videos will be deleted after the output is generated.
|
549 |
+
# - Inquires or feedback? Please email us at [email protected]
|
550 |
+
# """
|
551 |
|
552 |
+
# if initialization_error:
|
553 |
+
# description += f"\n\n:warning: **Initialization Error:** {initialization_error}"
|
554 |
|
555 |
|
556 |
+
# # dailymoth_pathlist = download_example_videos()
|
557 |
+
|
558 |
+
# src_dir = os.path.dirname(os.path.abspath(__file__))
|
559 |
+
# dailymoth_pathlist = []
|
560 |
+
# L5hUxT5YbnY_crop_1 = "dailymoth_examples/L5hUxT5YbnY_crop_1.mp4"
|
561 |
+
# L5hUxT5YbnY_crop_2 = "dailymoth_examples/L5hUxT5YbnY_crop_2.mp4"
|
562 |
+
# L5hUxT5YbnY_crop_3 = "dailymoth_examples/L5hUxT5YbnY_crop_3.mp4"
|
563 |
+
# L5hUxT5YbnY_crop_4 = "dailymoth_examples/L5hUxT5YbnY_crop_4.mp4"
|
564 |
+
# L5hUxT5YbnY_crop_5 = "dailymoth_examples/L5hUxT5YbnY_crop_5.mp4"
|
565 |
+
# L5hUxT5YbnY_crop_6 = "dailymoth_examples/L5hUxT5YbnY_crop_6.mp4"
|
566 |
+
# L5hUxT5YbnY_crop_7 = "dailymoth_examples/L5hUxT5YbnY_crop_7.mp4"
|
567 |
+
# rDUefZVPfmU_crop_1 = "dailymoth_examples/rDUefZVPfmU_crop_1.mp4"
|
568 |
+
# rDUefZVPfmU_crop_2 = "dailymoth_examples/rDUefZVPfmU_crop_2.mp4"
|
569 |
+
# rDUefZVPfmU_crop_3 = "dailymoth_examples/rDUefZVPfmU_crop_3.mp4"
|
570 |
+
# rDUefZVPfmU_crop_4 = "dailymoth_examples/rDUefZVPfmU_crop_4.mp4"
|
571 |
+
# rDUefZVPfmU_crop_5 = "dailymoth_examples/rDUefZVPfmU_crop_5.mp4"
|
572 |
+
# rDUefZVPfmU_crop_6 = "dailymoth_examples/rDUefZVPfmU_crop_6.mp4"
|
573 |
+
# rDUefZVPfmU_crop_7 = "dailymoth_examples/rDUefZVPfmU_crop_7.mp4"
|
574 |
+
# rDUefZVPfmU_crop_8 = "dailymoth_examples/rDUefZVPfmU_crop_8.mp4"
|
575 |
+
# rDUefZVPfmU_crop_9 = "dailymoth_examples/rDUefZVPfmU_crop_9.mp4"
|
576 |
+
# rDUefZVPfmU_crop_10 = "dailymoth_examples/rDUefZVPfmU_crop_10.mp4"
|
577 |
+
# dailymoth_filenames = [L5hUxT5YbnY_crop_1, L5hUxT5YbnY_crop_2, L5hUxT5YbnY_crop_3, L5hUxT5YbnY_crop_4, L5hUxT5YbnY_crop_5, L5hUxT5YbnY_crop_6, L5hUxT5YbnY_crop_7, rDUefZVPfmU_crop_1, rDUefZVPfmU_crop_2, rDUefZVPfmU_crop_3, rDUefZVPfmU_crop_4, rDUefZVPfmU_crop_5, rDUefZVPfmU_crop_6, rDUefZVPfmU_crop_7, rDUefZVPfmU_crop_8, rDUefZVPfmU_crop_9, rDUefZVPfmU_crop_10]
|
578 |
|
579 |
+
# for filename in dailymoth_filenames:
|
580 |
+
# src_path = os.path.join(src_dir, filename)
|
581 |
+
# if os.path.exists(src_path):
|
582 |
+
# dailymoth_pathlist.append(src_path)
|
583 |
+
# else:
|
584 |
+
# print(f"Warning: File not found at {src_path}")
|
585 |
|
586 |
+
# with gr.Blocks(title="ASL Video to English Text Translation") as interface:
|
587 |
+
# gr.Markdown(f"# ASL Video to English Text Translation\n\n{description}")
|
588 |
|
589 |
+
# with gr.Row():
|
590 |
+
# with gr.Column():
|
591 |
+
# video_input = gr.Video(label="ASL Video (under 20 seconds)", format="mp4", height=480, width=640)
|
592 |
+
# submit_btn = gr.Button("Translate", variant="primary")
|
593 |
+
# with gr.Column():
|
594 |
+
# output_text = gr.Textbox(label="English Translation", lines=3)
|
595 |
+
# # Add examples section
|
596 |
+
# if dailymoth_pathlist: # Only show examples if we have valid files
|
597 |
+
# gr.Examples(
|
598 |
+
# examples=dailymoth_pathlist,
|
599 |
+
# inputs=video_input,
|
600 |
+
# label="Click a video to try an example"
|
601 |
+
# )
|
602 |
|
603 |
+
# # Add attribution note for the examples
|
604 |
+
# gr.Markdown("""
|
605 |
+
# ---
|
606 |
+
# **Example Videos Attribution:**
|
607 |
+
# The example videos used in this demo are from [The Daily Moth](https://www.youtube.com/@TheDailyMoth),
|
608 |
+
# a popular ASL news channel made by deaf creators. Specifically, they are from the Previews of [July 10](https://www.youtube.com/watch?v=rDUefZVPfmU) and [July 11](https://www.youtube.com/watch?v=L5hUxT5YbnY) 2025 Top Stories.
|
609 |
+
# The videos are only used for illustrative purposes, and all rights to the content belong to The Daily Moth. In this light, we encourage to subscribe to their [channel](https://members.dailymoth.com/about).
|
610 |
+
# """)
|
611 |
+
# else:
|
612 |
+
# gr.Markdown("*No example videos available at this time.*")
|
613 |
+
|
614 |
+
# # video_input = gr.Video(label="ASL Video (under 20 seconds)", format="mp4", height=480, width=640)
|
615 |
+
# # text_output = gr.Textbox(label="English Translation", lines=5)
|
616 |
|
617 |
|
618 |
|
619 |
+
# # interface = gr.Interface(
|
620 |
+
# # fn=process_video,
|
621 |
+
# # inputs=video_input,
|
622 |
+
# # outputs=text_output,
|
623 |
+
# # title="ASL Video to English Text Translation",
|
624 |
+
# # description=description,
|
625 |
+
# # article="",
|
626 |
+
# # # examples=dailymoth_pathlist,
|
627 |
+
# # # example_labels=["Officials with an EU force said they are searching for the missing."],
|
628 |
+
# # allow_flagging="never",
|
629 |
+
# # )
|
630 |
|
631 |
+
# # gr.Examples(
|
632 |
+
# # examples=dailymoth_pathlist,
|
633 |
+
# # inputs=video_input,
|
634 |
+
# # label="Click a video to try an example"
|
635 |
+
# # )
|
636 |
+
# submit_btn.click(fn=process_video, inputs=video_input, outputs=output_text)
|
637 |
|
638 |
+
# return interface
|
639 |
+
|
640 |
+
|
641 |
+
def create_interface():
|
642 |
+
"""Create the Gradio interface"""
|
643 |
+
description = """
|
644 |
+
Upload an ASL* video to get an English translation. *Sign languages belonging to the same sign language family as ASL (e.g. Ghanaian Sign Language, as well as others listed in Table 7, Row 1 of https://aclanthology.org/2023.findings-emnlp.664.pdf) might also have non-trivial performance, although the model is trained only on ASL data.
|
645 |
+
|
646 |
+
|
647 |
+
This app uses TTIC's foundation model SHuBERT (introduced in an ACL 2025 paper, see http://shubert.pals.ttic.edu).
|
648 |
+
|
649 |
+
**Requirements:**
|
650 |
+
- We recommend that videos be under 20 seconds. Performance for longer videos has not been tested.
|
651 |
+
- The signer should be the main part (e.g. 90% of the area) of the video. Videos recorded from a phone camera, tablet, or personal computer should work well. Studio recordings where the signer is farther from the camera may not work as well.
|
652 |
+
- Supported formats: MP4, MOV
|
653 |
+
|
654 |
+
**Note:**
|
655 |
+
- This is just a demo of a research project, and should NOT be used to replace an interpreter in any way.
|
656 |
+
- Videos will be deleted after the output is generated.
|
657 |
+
- Inquires or feedback? Please email us at [email protected]
|
658 |
+
"""
|
659 |
+
|
660 |
+
if initialization_error:
|
661 |
+
description += f"\n\n:warning: **Initialization Error:** {initialization_error}"
|
662 |
+
|
663 |
+
src_dir = os.path.dirname(os.path.abspath(__file__))
|
664 |
+
dailymoth_pathlist = []
|
665 |
+
L5hUxT5YbnY_crop_1 = "dailymoth_examples/L5hUxT5YbnY_crop_1.mp4"
|
666 |
+
L5hUxT5YbnY_crop_2 = "dailymoth_examples/L5hUxT5YbnY_crop_2.mp4"
|
667 |
+
L5hUxT5YbnY_crop_3 = "dailymoth_examples/L5hUxT5YbnY_crop_3.mp4"
|
668 |
+
L5hUxT5YbnY_crop_4 = "dailymoth_examples/L5hUxT5YbnY_crop_4.mp4"
|
669 |
+
L5hUxT5YbnY_crop_5 = "dailymoth_examples/L5hUxT5YbnY_crop_5.mp4"
|
670 |
+
L5hUxT5YbnY_crop_6 = "dailymoth_examples/L5hUxT5YbnY_crop_6.mp4"
|
671 |
+
L5hUxT5YbnY_crop_7 = "dailymoth_examples/L5hUxT5YbnY_crop_7.mp4"
|
672 |
+
rDUefZVPfmU_crop_1 = "dailymoth_examples/rDUefZVPfmU_crop_1.mp4"
|
673 |
+
rDUefZVPfmU_crop_2 = "dailymoth_examples/rDUefZVPfmU_crop_2.mp4"
|
674 |
+
rDUefZVPfmU_crop_3 = "dailymoth_examples/rDUefZVPfmU_crop_3.mp4"
|
675 |
+
rDUefZVPfmU_crop_4 = "dailymoth_examples/rDUefZVPfmU_crop_4.mp4"
|
676 |
+
rDUefZVPfmU_crop_5 = "dailymoth_examples/rDUefZVPfmU_crop_5.mp4"
|
677 |
+
rDUefZVPfmU_crop_6 = "dailymoth_examples/rDUefZVPfmU_crop_6.mp4"
|
678 |
+
rDUefZVPfmU_crop_7 = "dailymoth_examples/rDUefZVPfmU_crop_7.mp4"
|
679 |
+
rDUefZVPfmU_crop_8 = "dailymoth_examples/rDUefZVPfmU_crop_8.mp4"
|
680 |
+
rDUefZVPfmU_crop_9 = "dailymoth_examples/rDUefZVPfmU_crop_9.mp4"
|
681 |
+
rDUefZVPfmU_crop_10 = "dailymoth_examples/rDUefZVPfmU_crop_10.mp4"
|
682 |
+
dailymoth_filenames = [L5hUxT5YbnY_crop_1, L5hUxT5YbnY_crop_2, L5hUxT5YbnY_crop_3, L5hUxT5YbnY_crop_4, L5hUxT5YbnY_crop_5, L5hUxT5YbnY_crop_6, L5hUxT5YbnY_crop_7, rDUefZVPfmU_crop_1, rDUefZVPfmU_crop_2, rDUefZVPfmU_crop_3, rDUefZVPfmU_crop_4, rDUefZVPfmU_crop_5, rDUefZVPfmU_crop_6, rDUefZVPfmU_crop_7, rDUefZVPfmU_crop_8, rDUefZVPfmU_crop_9, rDUefZVPfmU_crop_10]
|
683 |
+
|
684 |
+
for filename in dailymoth_filenames:
|
685 |
+
src_path = os.path.join(src_dir, filename)
|
686 |
+
if os.path.exists(src_path):
|
687 |
+
dailymoth_pathlist.append(src_path)
|
688 |
+
else:
|
689 |
+
print(f"Warning: File not found at {src_path}")
|
690 |
+
|
691 |
+
with gr.Blocks(title="ASL Video to English Text Translation") as interface:
|
692 |
+
gr.Markdown(f"# ASL Video to English Text Translation\n\n{description}")
|
693 |
+
|
694 |
+
with gr.Row():
|
695 |
+
with gr.Column():
|
696 |
+
video_input = gr.Video(label="ASL Video (under 20 seconds)", format="mp4", height=480, width=640)
|
697 |
+
submit_btn = gr.Button("Translate", variant="primary")
|
698 |
+
with gr.Column():
|
699 |
+
output_text = gr.Textbox(label="English Translation", lines=3)
|
700 |
+
|
701 |
+
# Add examples section in the right column
|
702 |
+
if dailymoth_pathlist: # Only show examples if we have valid files
|
703 |
+
gr.Examples(
|
704 |
+
examples=dailymoth_pathlist,
|
705 |
+
inputs=video_input,
|
706 |
+
label="Click a video to try an example"
|
707 |
+
)
|
708 |
+
|
709 |
+
# Add attribution note for the examples
|
710 |
+
gr.Markdown("""
|
711 |
+
---
|
712 |
+
**Example Videos Attribution:**
|
713 |
+
The example videos used in this demo are from [The Daily Moth](https://www.youtube.com/@TheDailyMoth),
|
714 |
+
a popular ASL news channel made by deaf creators. Specifically, they are from the Previews of [July 10](https://www.youtube.com/watch?v=rDUefZVPfmU) and [July 11](https://www.youtube.com/watch?v=L5hUxT5YbnY) 2025 Top Stories.
|
715 |
+
The videos are only used for illustrative purposes, and all rights to the content belong to The Daily Moth. In this light, we encourage to subscribe to their [channel](https://members.dailymoth.com/about).
|
716 |
+
""")
|
717 |
+
else:
|
718 |
+
gr.Markdown("*No example videos available at this time.*")
|
719 |
+
|
720 |
+
# Set up the button click handler AFTER both input and output are defined
|
721 |
+
submit_btn.click(fn=process_video, inputs=video_input, outputs=output_text)
|
722 |
+
|
723 |
+
return interface
|
724 |
+
|
725 |
|
726 |
# Create the demo
|
727 |
demo = create_interface()
|