ManishThota commited on
Commit
dd3f4f3
·
verified ·
1 Parent(s): 07b3c85

Rename old_app.py to multi_video_app.py

Browse files
Files changed (2) hide show
  1. multi_video_app.py +125 -0
  2. old_app.py +0 -102
multi_video_app.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
+ import gradio as gr
4
+ import re
5
+ from typing import Dict, List
6
+ import csv
7
+ import os
8
+ import torch
9
+ from src.video_model import describe_video
10
+ from src.utils import parse_string, parse_annotations
11
+
12
+ # Utility functions (from your provided utilities)
13
+
14
+ # Function to save data to a CSV file
15
+ def save_to_csv(observations: List[Dict], output_dir: str = "outputs") -> str:
16
+ if not os.path.exists(output_dir):
17
+ os.makedirs(output_dir)
18
+
19
+ csv_file = os.path.join(output_dir, "video_observations.csv")
20
+
21
+ with open(csv_file, mode='w', newline='') as file:
22
+ writer = csv.writer(file)
23
+ writer.writerow(["video_name", "standing", "hands_free", "indoors", "screen_interaction_yes"])
24
+ for observation in observations:
25
+ writer.writerow([
26
+ observation['video_name'],
27
+ observation['standing'],
28
+ observation['hands_free'],
29
+ observation['indoors'],
30
+ observation['screen_interaction_yes']
31
+ ])
32
+
33
+ return csv_file
34
+
35
+ # Function to process a single video and return the observation data
36
+ def process_single_video(video_path: str, sitting, hands, location, screen) -> Dict:
37
+ video_name = os.path.basename(video_path) # Extract video name from the path
38
+ query = "Describe this video in detail and answer the questions"
39
+ additional_info = []
40
+ if sitting:
41
+ additional_info.append("Is the subject in the video standing or sitting?")
42
+ if hands:
43
+ additional_info.append("Is the subject holding any object in their hands, if so the hands are not free else they are free?")
44
+ if location:
45
+ additional_info.append("Is the subject present indoors or outdoors?")
46
+ if screen:
47
+ additional_info.append("Is the subject interacting with a screen in the background by facing the screen?")
48
+
49
+ end_query = """Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present. Follow the below examples:
50
+ <annotation>indoors: 0</annotation>
51
+ <annotation>standing: 1</annotation>
52
+ <annotation>hands.free: 0</annotation>
53
+ <annotation>screen.interaction_yes: 0</annotation>
54
+ """
55
+
56
+ final_query = query + " " + " ".join(additional_info)
57
+ final_prompt = final_query + " " + end_query
58
+
59
+ # Assuming your describe_video function handles the video processing
60
+ response = describe_video(video_path, final_prompt)
61
+
62
+ try:
63
+ # Parse the annotations from the response
64
+ tags = ["annotation"]
65
+ parsed_data = parse_string(response, tags)
66
+ annotations_list = parsed_data.get("annotation", [])
67
+ annotations_dict = parse_annotations(annotations_list)
68
+
69
+ return {
70
+ "video_name": video_name,
71
+ "standing": annotations_dict.get("standing", 'N/A'),
72
+ "hands_free": annotations_dict.get("hands.free", 'N/A'),
73
+ "indoors": annotations_dict.get("indoors", 'N/A'),
74
+ "screen_interaction_yes": annotations_dict.get("screen.interaction_yes", 'N/A'),
75
+ }
76
+ except Exception as e:
77
+ return {"error": f"An error occurred with {video_name}: {e}"}
78
+
79
+ # Function to process all videos in a folder
80
+ def process_folder_of_videos(videos_folder: str, sitting, hands, location, screen):
81
+ all_observations = []
82
+ video_files = [os.path.join(videos_folder, f) for f in os.listdir(videos_folder) if f.endswith(('.mp4', '.avi', '.mkv'))]
83
+
84
+ for video_path in video_files:
85
+ observation = process_single_video(video_path, sitting, hands, location, screen)
86
+ if "error" not in observation:
87
+ all_observations.append(observation)
88
+ else:
89
+ print(observation["error"]) # Log any errors
90
+
91
+ # Clear GPU cache
92
+ torch.cuda.empty_cache()
93
+
94
+ # Save all observations to a CSV file and return the file path
95
+ csv_file = save_to_csv(all_observations)
96
+ return "Processing completed. Download the CSV file.", csv_file
97
+
98
+ # Gradio interface
99
+ def gradio_interface(videos_folder, sitting, hands, location, screen):
100
+ return process_folder_of_videos(videos_folder, sitting, hands, location, screen)
101
+
102
+ # Inputs
103
+ videos_folder = gr.Directory(label="Upload a folder of videos")
104
+ sitting = gr.Checkbox(label="Sitting/Standing")
105
+ hands = gr.Checkbox(label="Hands Free/Not Free")
106
+ location = gr.Checkbox(label="Indoors/Outdoors")
107
+ screen = gr.Checkbox(label="Screen Interaction")
108
+
109
+ # Outputs
110
+ response = gr.Textbox(label="Status")
111
+ download_link = gr.File(label="Download CSV")
112
+
113
+ # Gradio interface setup
114
+ interface = gr.Interface(
115
+ fn=gradio_interface,
116
+ inputs=[videos_folder, sitting, hands, location, screen],
117
+ outputs=[response, download_link],
118
+ title="Batch Video Annotation",
119
+ description="Upload a folder of videos and process them sequentially, saving the results to a downloadable CSV file.",
120
+ theme=gr.themes.Soft(primary_hue="red", secondary_hue="red"),
121
+ allow_flagging="never"
122
+ )
123
+
124
+ # Launch interface
125
+ interface.launch(debug=False)
old_app.py DELETED
@@ -1,102 +0,0 @@
1
- import warnings
2
- warnings.filterwarnings("ignore")
3
- import gradio as gr
4
- import pandas as pd
5
- from src.video_model import describe_video
6
- from src.utils import parse_string, parse_annotations
7
- import os
8
-
9
- # --- Function to construct the final query ---
10
- def process_video_and_questions(video, standing, hands, location, screen):
11
- video_name = os.path.basename(video)
12
- query = f"Answer the questions from the video\n"
13
- additional_info = []
14
- if standing:
15
- additional_info.append("Is the subject in the video standing or sitting?\n")
16
- if hands:
17
- additional_info.append("Is the subject holding any object in their hands?\n")
18
- if location:
19
- additional_info.append("Is the subject present indoors?\n")
20
- if screen:
21
- additional_info.append("Is the subject interacting with a screen in the background by facing the screen?\n")
22
-
23
- end_query = """Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present. Follow the below examples\n:
24
- <annotation>indoors: 0</annotation>
25
- <annotation>standing: 1</annotation>
26
- <annotation>hands.free: 0</annotation>
27
- <annotation>screen.interaction_yes: 0</annotation>
28
- """
29
-
30
- final_query = query + " " + " ".join(additional_info)
31
- final_prompt = final_query + " " + end_query
32
-
33
- response = describe_video(video, final_prompt)
34
- final_response = f"<video_name>{video_name}</video_name>" + " \n" + response
35
-
36
- conditions = {
37
- 'standing': (standing, 'standing: 1', 'standing: None'),
38
- 'hands': (hands, 'hands.free: 1', 'hands.free: None'),
39
- 'location': (location, 'indoors: 1', 'indoors: None'),
40
- 'screen': (screen, 'screen.interaction_yes: 1', 'screen.interaction_yes: None')
41
- }
42
-
43
- for key, (condition, to_replace, replacement) in conditions.items():
44
- if not condition:
45
- final_response = final_response.replace(to_replace, replacement)
46
-
47
- return final_response
48
-
49
- def process_multiple_videos(video_files, standing, hands, location, screen):
50
- # Initialize an empty DataFrame to store results for all videos
51
- all_results_df = pd.DataFrame()
52
-
53
- for video in video_files:
54
- final_response = process_video_and_questions(video.name, standing, hands, location, screen)
55
- video_df = output_to_csv(final_response)
56
- all_results_df = pd.concat([all_results_df, video_df], ignore_index=True)
57
-
58
- # Save the combined results as a CSV file
59
- csv_file_path = "multiple_videos_annotations.csv"
60
- all_results_df.to_csv(csv_file_path, index=False)
61
-
62
- return csv_file_path # Return the path to the CSV file for download
63
-
64
- def output_to_csv(final_response):
65
- parsed_content = parse_string(final_response, ["video_name", "annotation"])
66
- video_name = parsed_content['video_name'][0] if parsed_content['video_name'] else None
67
- annotations_dict = parse_annotations(parsed_content['annotation']) if parsed_content['annotation'] else {}
68
-
69
- df = pd.DataFrame([{'video_name': video_name, **annotations_dict}])
70
-
71
- return df
72
-
73
- title = "GSoC Super Raid Annotator"
74
- description = "Annotate Multiple Videos"
75
- article = "<p style='text-align: center'><a href='https://github.com/OpenBMB/MiniCPM-V' target='_blank'>Model GitHub Repo</a> | <a href='https://huggingface.co/openbmb/MiniCPM-V-2_6' target='_blank'>Model Page</a></p>"
76
-
77
- custom_theme = gr.themes.Soft(primary_hue="red", secondary_hue="red")
78
-
79
- with gr.Blocks(theme=custom_theme) as demo:
80
- gr.Markdown(f"# {title}")
81
- gr.Markdown(description)
82
- gr.Markdown(article)
83
-
84
- with gr.Row():
85
- with gr.Column():
86
- video_files = gr.Files(label="Upload Videos", file_count="multiple")
87
- standing = gr.Checkbox(label="Standing")
88
- hands = gr.Checkbox(label="Hands Free")
89
- location = gr.Checkbox(label="Indoors")
90
- screen = gr.Checkbox(label="Screen Interaction")
91
- generate_csv_btn = gr.Button("Process and Generate CSV")
92
-
93
- with gr.Column():
94
- csv_output = gr.File(label="Download CSV", interactive=False)
95
-
96
- generate_csv_btn.click(
97
- fn=process_multiple_videos,
98
- inputs=[video_files, standing, hands, location, screen],
99
- outputs=csv_output
100
- )
101
-
102
- demo.launch(debug=False)