Spaces:

ManishThota
/

GSoC-Super-Rapid-Annotator

Runtime error

App Files Files Community

ManishThota commited on Aug 21, 2024

Commit

88cedc3

verified ·

1 Parent(s): 3035f99

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -15

app.py CHANGED Viewed

@@ -6,7 +6,42 @@ from src.video_model import describe_video
 from src.utils import parse_string, parse_annotations
 import os
-# --- Function to construct the final query ---
 def process_video_and_questions(video, sitting, hands, location, screen):
     # Extract the video name (filename)
     video_name = os.path.basename(video)
@@ -14,32 +49,45 @@ def process_video_and_questions(video, sitting, hands, location, screen):
     # Construct the query with the video name included
     query = f"Describe the video in detail and answer the questions"
     additional_info = []
-    if sitting:
         additional_info.append("Is the subject in the video standing or sitting?")
-    if hands:
         additional_info.append("Is the subject holding any object in their hands, if so the hands are not free else they are free?")
-    if location:
         additional_info.append("Is the subject present indoors or outdoors?")
-    if screen:
         additional_info.append("Is the subject interacting with a screen in the background by facing the screen?")
     end_query = """Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present. Below is an example:
-        <instructions>
-            <annotation>indoors: 0</annotation>
-            <annotation>standing: 1</annotation>
-            <annotation>hands.free: None</annotation>
-            <annotation>screen.interaction_yes: None</annotation>
-        </instructions>
-        """
     final_query = query + " " + " ".join(additional_info)
     final_prompt = final_query + " " + end_query
     # Assuming your describe_video function handles the video processing
     response = describe_video(video, final_prompt)
     final_response = f"<video_name>{video_name}</video_name>" + " " + response
-    return final_response
 def output_to_csv(final_response):

 from src.utils import parse_string, parse_annotations
 import os
+# # --- Function to construct the final query ---
+# def process_video_and_questions(video, sitting, hands, location, screen):
+#     # Extract the video name (filename)
+#     video_name = os.path.basename(video)
+#     # Construct the query with the video name included
+#     query = f"Describe the video in detail and answer the questions"
+#     additional_info = []
+#     if sitting:
+#         additional_info.append("Is the subject in the video standing or sitting?")
+#     if hands:
+#         additional_info.append("Is the subject holding any object in their hands, if so the hands are not free else they are free?")
+#     if location:
+#         additional_info.append("Is the subject present indoors or outdoors?")
+#     if screen:
+#         additional_info.append("Is the subject interacting with a screen in the background by facing the screen?")
+#     end_query = """Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present. Below is an example:
+#         <instructions>
+#             <annotation>indoors: 0</annotation>
+#             <annotation>standing: 1</annotation>
+#             <annotation>hands.free: None</annotation>
+#             <annotation>screen.interaction_yes: None</annotation>
+#         </instructions>
+#         """
+    # final_query = query + " " + " ".join(additional_info)
+    # final_prompt = final_query + " " + end_query
+    # # Assuming your describe_video function handles the video processing
+    # response = describe_video(video, final_prompt)
+    # final_response = f"<video_name>{video_name}</video_name>" + " " + response
+    # return final_response
 def process_video_and_questions(video, sitting, hands, location, screen):
     # Extract the video name (filename)
     video_name = os.path.basename(video)
     # Construct the query with the video name included
     query = f"Describe the video in detail and answer the questions"
     additional_info = []
+    # Handle each checkbox option, including those not selected (None)
+    if sitting is not None:
         additional_info.append("Is the subject in the video standing or sitting?")
+    else:
+        additional_info.append("<annotation>standing: None</annotation>")
+    if hands is not None:
         additional_info.append("Is the subject holding any object in their hands, if so the hands are not free else they are free?")
+    else:
+        additional_info.append("<annotation>hands.free: None</annotation>")
+    if location is not None:
         additional_info.append("Is the subject present indoors or outdoors?")
+    else:
+        additional_info.append("<annotation>indoors: None</annotation>")
+    if screen is not None:
         additional_info.append("Is the subject interacting with a screen in the background by facing the screen?")
+    else:
+        additional_info.append("<annotation>screen.interaction_yes: None</annotation>")
+    # Updated end_query string with clear explanation and example
     end_query = """Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present. Below is an example:
+    <annotation>indoors: 0</annotation>
+    <annotation>standing: 1</annotation>
+    <annotation>hands.free: None</annotation>
+    <annotation>screen.interaction_yes: None</annotation>
+    """
     final_query = query + " " + " ".join(additional_info)
     final_prompt = final_query + " " + end_query
     # Assuming your describe_video function handles the video processing
     response = describe_video(video, final_prompt)
     final_response = f"<video_name>{video_name}</video_name>" + " " + response
+    return final_response
 def output_to_csv(final_response):