ManishThota commited on
Commit
07b3c85
·
verified ·
1 Parent(s): db192ba

Update old_app.py

Browse files
Files changed (1) hide show
  1. old_app.py +83 -66
old_app.py CHANGED
@@ -1,85 +1,102 @@
1
- # --- main.py (your Gradio app file) ---
2
  import warnings
3
  warnings.filterwarnings("ignore")
4
  import gradio as gr
5
- from src.video_model import describe_video # Ensure this function is defined in src.video_model
6
- from src.text_processor import process_description # Ensure this function is defined in src.text_processor
 
 
7
 
8
- # --- Function to handle both video and text processing ---
9
- def process_video(video, sitting, hands, location, screen):
10
- query = "Describe this video in detail and answer the questions."
 
11
  additional_info = []
12
- if sitting:
13
- additional_info.append("Is the subject in the video standing or sitting?")
14
  if hands:
15
- additional_info.append("Is the subject holding any object in their hands, if so, are the hands free?")
16
  if location:
17
- additional_info.append("Is the subject present indoors or outdoors?")
18
  if screen:
19
- additional_info.append("Is the subject interacting with a screen in the background by facing the screen?")
 
 
 
 
 
 
 
20
 
21
  final_query = query + " " + " ".join(additional_info)
22
- video_description = describe_video(video, final_query)
23
- return video_description
 
 
24
 
25
- # --- Function to handle JSON processing ---
26
- def process_and_display_json(video_description):
27
- json_response = process_description(video_description)
28
- return json_response
 
 
 
 
 
 
 
 
29
 
30
- # --- Gradio Blocks Interface ---
31
- with gr.Blocks(title="GSoC Super Raid Annotator", theme=gr.themes.Soft(primary_hue="red", secondary_hue="red")) as demo:
32
- gr.Markdown("Annotate Videos")
33
- gr.Markdown(
34
- "<p style='text-align: center'><a href='https://github.com/OpenBMB/MiniCPM-V' target='_blank'>Model GitHub Repo</a> | <a href='https://huggingface.co/openbmb/MiniCPM-V-2_6' target='_blank'>Model Page</a></p>"
35
- )
36
-
37
- with gr.Row():
38
- with gr.Column():
39
- # Input components
40
- video = gr.Video(label="Video")
41
- sitting = gr.Checkbox(label="Sitting/Standing")
42
- hands = gr.Checkbox(label="Hands Free/Not Free")
43
- location = gr.Checkbox(label="Indoors/Outdoors")
44
- screen = gr.Checkbox(label="Screen Interaction")
45
 
46
- # Submit buttons
47
- with gr.Row():
48
- process_video_btn = gr.Button("Process Video")
49
- process_json_btn = gr.Button("Process JSON")
 
 
 
 
 
 
50
 
51
- with gr.Column():
52
- # Output components
53
- video_description = gr.Textbox(label="Video Description", show_label=True, show_copy_button=True)
54
- json_output = gr.JSON(label="JSON Output")
 
 
 
 
55
 
56
- # Examples for the interface
57
- examples = [
58
- ["videos/2016-01-01_0100_US_KNBC_Channel_4_News_1867.16-1871.38_now.mp4"],
59
- ["videos/2016-01-01_0200_US_KNBC_Channel_4_News_1329.12-1333.29_tonight.mp4"],
60
- ["videos/2016-01-01_0830_US_KNBC_Tonight_Show_with_Jimmy_Fallon_725.45-729.76_tonight.mp4"],
61
- ["videos/2016-01-01_0200_US_KOCE_The_PBS_Newshour_577.03-581.31_tonight.mp4"],
62
- ["videos/2016-01-01_1400_US_KTTV-FOX_Morning_News_at_6AM_1842.36-1846.68_this_year.mp4"],
63
- ["videos/2016-01-02_0735_US_KCBS_Late_Show_with_Stephen_Colbert_285.94-290.67_this_year.mp4"],
64
- ["videos/2016-01-13_2200_US_KTTV-FOX_The_Doctor_Oz_Show_1709.79-1714.17_this_month.mp4"],
65
- ["videos/2016-01-01_1400_US_KTTV-FOX_Morning_News_at_6AM_1842.36-1846.68_this_year.mp4"],
66
- ["videos/2016-01-01_1300_US_KNBC_Today_in_LA_at_5am_12.46-16.95_this_morning.mp4"],
67
- ["videos/2016-01-05_0200_US_KNBC_Channel_4_News_1561.29-1565.95_next_week.mp4"],
68
- ["videos/2016-01-28_0700_US_KNBC_Channel_4_News_at_11PM_629.56-633.99_in_the_future.mp4"]
69
- ]
70
 
71
- # Event handling
72
- process_video_btn.click(
73
- fn=process_video,
74
- inputs=[video, sitting, hands, location, screen],
75
- outputs=video_description,
76
- )
77
 
78
- process_json_btn.click(
79
- fn=process_and_display_json,
80
- inputs=video_description,
81
- outputs=json_output,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  )
83
-
84
- # Launch the interface
85
  demo.launch(debug=False)
 
 
1
  import warnings
2
  warnings.filterwarnings("ignore")
3
  import gradio as gr
4
+ import pandas as pd
5
+ from src.video_model import describe_video
6
+ from src.utils import parse_string, parse_annotations
7
+ import os
8
 
9
+ # --- Function to construct the final query ---
10
+ def process_video_and_questions(video, standing, hands, location, screen):
11
+ video_name = os.path.basename(video)
12
+ query = f"Answer the questions from the video\n"
13
  additional_info = []
14
+ if standing:
15
+ additional_info.append("Is the subject in the video standing or sitting?\n")
16
  if hands:
17
+ additional_info.append("Is the subject holding any object in their hands?\n")
18
  if location:
19
+ additional_info.append("Is the subject present indoors?\n")
20
  if screen:
21
+ additional_info.append("Is the subject interacting with a screen in the background by facing the screen?\n")
22
+
23
+ end_query = """Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present. Follow the below examples\n:
24
+ <annotation>indoors: 0</annotation>
25
+ <annotation>standing: 1</annotation>
26
+ <annotation>hands.free: 0</annotation>
27
+ <annotation>screen.interaction_yes: 0</annotation>
28
+ """
29
 
30
  final_query = query + " " + " ".join(additional_info)
31
+ final_prompt = final_query + " " + end_query
32
+
33
+ response = describe_video(video, final_prompt)
34
+ final_response = f"<video_name>{video_name}</video_name>" + " \n" + response
35
 
36
+ conditions = {
37
+ 'standing': (standing, 'standing: 1', 'standing: None'),
38
+ 'hands': (hands, 'hands.free: 1', 'hands.free: None'),
39
+ 'location': (location, 'indoors: 1', 'indoors: None'),
40
+ 'screen': (screen, 'screen.interaction_yes: 1', 'screen.interaction_yes: None')
41
+ }
42
+
43
+ for key, (condition, to_replace, replacement) in conditions.items():
44
+ if not condition:
45
+ final_response = final_response.replace(to_replace, replacement)
46
+
47
+ return final_response
48
 
49
+ def process_multiple_videos(video_files, standing, hands, location, screen):
50
+ # Initialize an empty DataFrame to store results for all videos
51
+ all_results_df = pd.DataFrame()
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
+ for video in video_files:
54
+ final_response = process_video_and_questions(video.name, standing, hands, location, screen)
55
+ video_df = output_to_csv(final_response)
56
+ all_results_df = pd.concat([all_results_df, video_df], ignore_index=True)
57
+
58
+ # Save the combined results as a CSV file
59
+ csv_file_path = "multiple_videos_annotations.csv"
60
+ all_results_df.to_csv(csv_file_path, index=False)
61
+
62
+ return csv_file_path # Return the path to the CSV file for download
63
 
64
+ def output_to_csv(final_response):
65
+ parsed_content = parse_string(final_response, ["video_name", "annotation"])
66
+ video_name = parsed_content['video_name'][0] if parsed_content['video_name'] else None
67
+ annotations_dict = parse_annotations(parsed_content['annotation']) if parsed_content['annotation'] else {}
68
+
69
+ df = pd.DataFrame([{'video_name': video_name, **annotations_dict}])
70
+
71
+ return df
72
 
73
+ title = "GSoC Super Raid Annotator"
74
+ description = "Annotate Multiple Videos"
75
+ article = "<p style='text-align: center'><a href='https://github.com/OpenBMB/MiniCPM-V' target='_blank'>Model GitHub Repo</a> | <a href='https://huggingface.co/openbmb/MiniCPM-V-2_6' target='_blank'>Model Page</a></p>"
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ custom_theme = gr.themes.Soft(primary_hue="red", secondary_hue="red")
 
 
 
 
 
78
 
79
+ with gr.Blocks(theme=custom_theme) as demo:
80
+ gr.Markdown(f"# {title}")
81
+ gr.Markdown(description)
82
+ gr.Markdown(article)
83
+
84
+ with gr.Row():
85
+ with gr.Column():
86
+ video_files = gr.Files(label="Upload Videos", file_count="multiple")
87
+ standing = gr.Checkbox(label="Standing")
88
+ hands = gr.Checkbox(label="Hands Free")
89
+ location = gr.Checkbox(label="Indoors")
90
+ screen = gr.Checkbox(label="Screen Interaction")
91
+ generate_csv_btn = gr.Button("Process and Generate CSV")
92
+
93
+ with gr.Column():
94
+ csv_output = gr.File(label="Download CSV", interactive=False)
95
+
96
+ generate_csv_btn.click(
97
+ fn=process_multiple_videos,
98
+ inputs=[video_files, standing, hands, location, screen],
99
+ outputs=csv_output
100
  )
101
+
 
102
  demo.launch(debug=False)