Spaces:

ManishThota
/

GSoC-Super-Rapid-Annotator

Runtime error

App Files Files Community

ManishThota commited on Aug 21, 2024

Commit

07b3c85

verified ·

1 Parent(s): db192ba

Update old_app.py

Browse files

Files changed (1) hide show

old_app.py +83 -66

old_app.py CHANGED Viewed

@@ -1,85 +1,102 @@
-# --- main.py (your Gradio app file) ---
 import warnings
 warnings.filterwarnings("ignore")
 import gradio as gr
-from src.video_model import describe_video  # Ensure this function is defined in src.video_model
-from src.text_processor import process_description  # Ensure this function is defined in src.text_processor
-# --- Function to handle both video and text processing ---
-def process_video(video, sitting, hands, location, screen):
-    query = "Describe this video in detail and answer the questions."
     additional_info = []
-    if sitting:
-        additional_info.append("Is the subject in the video standing or sitting?")
     if hands:
-        additional_info.append("Is the subject holding any object in their hands, if so, are the hands free?")
     if location:
-        additional_info.append("Is the subject present indoors or outdoors?")
     if screen:
-        additional_info.append("Is the subject interacting with a screen in the background by facing the screen?")
     final_query = query + " " + " ".join(additional_info)
-    video_description = describe_video(video, final_query)
-    return video_description
-# --- Function to handle JSON processing ---
-def process_and_display_json(video_description):
-    json_response = process_description(video_description)
-    return json_response
-# --- Gradio Blocks Interface ---
-with gr.Blocks(title="GSoC Super Raid Annotator", theme=gr.themes.Soft(primary_hue="red", secondary_hue="red")) as demo:
-    gr.Markdown("Annotate Videos")
-    gr.Markdown(
-        "<p style='text-align: center'><a href='https://github.com/OpenBMB/MiniCPM-V' target='_blank'>Model GitHub Repo</a> | <a href='https://huggingface.co/openbmb/MiniCPM-V-2_6' target='_blank'>Model Page</a></p>"
-    )
-    with gr.Row():
-        with gr.Column():
-            # Input components
-            video = gr.Video(label="Video")
-            sitting = gr.Checkbox(label="Sitting/Standing")
-            hands = gr.Checkbox(label="Hands Free/Not Free")
-            location = gr.Checkbox(label="Indoors/Outdoors")
-            screen = gr.Checkbox(label="Screen Interaction")
-            # Submit buttons
-            with gr.Row():
-                process_video_btn = gr.Button("Process Video")
-                process_json_btn = gr.Button("Process JSON")
-        with gr.Column():
-            # Output components
-            video_description = gr.Textbox(label="Video Description", show_label=True, show_copy_button=True)
-            json_output = gr.JSON(label="JSON Output")
-    # Examples for the interface
-    examples = [
-        ["videos/2016-01-01_0100_US_KNBC_Channel_4_News_1867.16-1871.38_now.mp4"],
-        ["videos/2016-01-01_0200_US_KNBC_Channel_4_News_1329.12-1333.29_tonight.mp4"],
-        ["videos/2016-01-01_0830_US_KNBC_Tonight_Show_with_Jimmy_Fallon_725.45-729.76_tonight.mp4"],
-        ["videos/2016-01-01_0200_US_KOCE_The_PBS_Newshour_577.03-581.31_tonight.mp4"],
-        ["videos/2016-01-01_1400_US_KTTV-FOX_Morning_News_at_6AM_1842.36-1846.68_this_year.mp4"],
-        ["videos/2016-01-02_0735_US_KCBS_Late_Show_with_Stephen_Colbert_285.94-290.67_this_year.mp4"],
-        ["videos/2016-01-13_2200_US_KTTV-FOX_The_Doctor_Oz_Show_1709.79-1714.17_this_month.mp4"],
-        ["videos/2016-01-01_1400_US_KTTV-FOX_Morning_News_at_6AM_1842.36-1846.68_this_year.mp4"],
-        ["videos/2016-01-01_1300_US_KNBC_Today_in_LA_at_5am_12.46-16.95_this_morning.mp4"],
-        ["videos/2016-01-05_0200_US_KNBC_Channel_4_News_1561.29-1565.95_next_week.mp4"],
-        ["videos/2016-01-28_0700_US_KNBC_Channel_4_News_at_11PM_629.56-633.99_in_the_future.mp4"]
-    ]
-    # Event handling
-    process_video_btn.click(
-        fn=process_video,
-        inputs=[video, sitting, hands, location, screen],
-        outputs=video_description,
-    )
-    process_json_btn.click(
-        fn=process_and_display_json,
-        inputs=video_description,
-        outputs=json_output,
     )
-# Launch the interface
 demo.launch(debug=False)

 import warnings
 warnings.filterwarnings("ignore")
 import gradio as gr
+import pandas as pd
+from src.video_model import describe_video
+from src.utils import parse_string, parse_annotations
+import os
+# --- Function to construct the final query ---
+def process_video_and_questions(video, standing, hands, location, screen):
+    video_name = os.path.basename(video)
+    query = f"Answer the questions from the video\n"
     additional_info = []
+    if standing:
+        additional_info.append("Is the subject in the video standing or sitting?\n")
     if hands:
+        additional_info.append("Is the subject holding any object in their hands?\n")
     if location:
+        additional_info.append("Is the subject present indoors?\n")
     if screen:
+        additional_info.append("Is the subject interacting with a screen in the background by facing the screen?\n")
+    end_query = """Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present. Follow the below examples\n:
+        <annotation>indoors: 0</annotation>
+        <annotation>standing: 1</annotation>
+        <annotation>hands.free: 0</annotation>
+        <annotation>screen.interaction_yes: 0</annotation>
+        """
     final_query = query + " " + " ".join(additional_info)
+    final_prompt = final_query + " " + end_query
+    response = describe_video(video, final_prompt)
+    final_response = f"<video_name>{video_name}</video_name>" + " \n" + response
+    conditions = {
+        'standing': (standing, 'standing: 1', 'standing: None'),
+        'hands': (hands, 'hands.free: 1', 'hands.free: None'),
+        'location': (location, 'indoors: 1', 'indoors: None'),
+        'screen': (screen, 'screen.interaction_yes: 1', 'screen.interaction_yes: None')
+    }
+    for key, (condition, to_replace, replacement) in conditions.items():
+        if not condition:
+            final_response = final_response.replace(to_replace, replacement)
+    return final_response
+def process_multiple_videos(video_files, standing, hands, location, screen):
+    # Initialize an empty DataFrame to store results for all videos
+    all_results_df = pd.DataFrame()
+    for video in video_files:
+        final_response = process_video_and_questions(video.name, standing, hands, location, screen)
+        video_df = output_to_csv(final_response)
+        all_results_df = pd.concat([all_results_df, video_df], ignore_index=True)
+    # Save the combined results as a CSV file
+    csv_file_path = "multiple_videos_annotations.csv"
+    all_results_df.to_csv(csv_file_path, index=False)
+    return csv_file_path  # Return the path to the CSV file for download
+def output_to_csv(final_response):
+    parsed_content = parse_string(final_response, ["video_name", "annotation"])
+    video_name = parsed_content['video_name'][0] if parsed_content['video_name'] else None
+    annotations_dict = parse_annotations(parsed_content['annotation']) if parsed_content['annotation'] else {}
+    df = pd.DataFrame([{'video_name': video_name, **annotations_dict}])
+    return df
+title = "GSoC Super Raid Annotator"
+description = "Annotate Multiple Videos"
+article = "<p style='text-align: center'><a href='https://github.com/OpenBMB/MiniCPM-V' target='_blank'>Model GitHub Repo</a> | <a href='https://huggingface.co/openbmb/MiniCPM-V-2_6' target='_blank'>Model Page</a></p>"
+custom_theme = gr.themes.Soft(primary_hue="red", secondary_hue="red")
+with gr.Blocks(theme=custom_theme) as demo:
+    gr.Markdown(f"# {title}")
+    gr.Markdown(description)
+    gr.Markdown(article)
+    with gr.Row():
+        with gr.Column():
+            video_files = gr.Files(label="Upload Videos", file_count="multiple")
+            standing = gr.Checkbox(label="Standing")
+            hands = gr.Checkbox(label="Hands Free")
+            location = gr.Checkbox(label="Indoors")
+            screen = gr.Checkbox(label="Screen Interaction")
+            generate_csv_btn = gr.Button("Process and Generate CSV")
+        with gr.Column():
+            csv_output = gr.File(label="Download CSV", interactive=False)
+    generate_csv_btn.click(
+        fn=process_multiple_videos,
+        inputs=[video_files, standing, hands, location, screen],
+        outputs=csv_output
     )
 demo.launch(debug=False)