Spaces:
Running
on
L4
Running
on
L4
from enum import Enum | |
class Mode(Enum): | |
OBJECT_ACTION_ALIGNMENT = 1 | |
OBJECT_EXISTENCE = 2 | |
OVERALL_CONSISTENCY = 3 | |
SPATIAL_RELATIONSHIP = 4 | |
mode_prompts = { | |
Mode.OBJECT_ACTION_ALIGNMENT: ( | |
"\"object_action_alignment\":\n" | |
"Extract actions and their participating objects. Each proposition must describe an action and its related objects.\n" | |
"Example:\n" | |
"\"object_action_alignment\": [\"person holds hotdog\", \"person walks\"]" | |
), | |
Mode.OBJECT_EXISTENCE: ( | |
"\"object_existence\":\n" | |
"Extract only the tangible objects mentioned in the prompt.\n" | |
"Example:\n" | |
"\"object_existence\": [\"person\", \"hotdog\", \"car\", \"truck\"]" | |
), | |
Mode.OVERALL_CONSISTENCY: ( | |
"\"overall_consistency\":\n" | |
"Extract all meaningful event propositions that describe the combined semantics of objects, actions, and spatial relationships β " | |
"but avoid TL keywords such as 'and', 'or', 'not', 'until', 'eventually'.\n" | |
"Example:\n" | |
"\"overall_consistency\": [\"person holds hotdog\", \"person walks\", \"car next to truck\"]" | |
), | |
Mode.SPATIAL_RELATIONSHIP: ( | |
"\"spatial_relationships\":\n" | |
"Extract only spatial relationships between tangible objects (e.g., \"object A next to object B\"). Do not infer or hallucinate spatial relationships.\n" | |
"Example:\n" | |
"\"spatial_relationships\": [\"car next to truck\"]" | |
) | |
} | |
mode_outputs = { | |
Mode.OBJECT_ACTION_ALIGNMENT: ( | |
" \"object_action_alignment\": [...],\n" | |
" \"object_action_alignment_spec\": \"...\"," | |
), | |
Mode.OBJECT_EXISTENCE: ( | |
" \"object_existence\": [...],\n" | |
" \"object_existence_spec\": \"...\"," | |
), | |
Mode.OVERALL_CONSISTENCY: ( | |
" \"overall_consistency\": [...],\n" | |
" \"overall_consistency_spec\": \"...\"," | |
), | |
Mode.SPATIAL_RELATIONSHIP: ( | |
" \"spatial_relationships\": [...],\n" | |
" \"spatial_relationships_spec\": \"...\"" | |
) | |
} | |
header = ( | |
"You are an intelligent agent designed to extract structured representations from video description prompts. " | |
"You will operate in two stages: (1) proposition extraction and (2) TL specification generation.\n\n" | |
) | |
stage1_intro = ( | |
"Stage 1: Proposition Extraction\n\n" | |
"Given an input prompt summarizing a video, extract atomic propositions in the following four modes. " | |
"Return all outputs in JSON format.\n\n" | |
) | |
stage2_intro = "Stage 2: TL Specification Generation\n\n" | |
spec_gen_intro = ( | |
"For each of the {n} list(s) of propositions extracted in Stage 1, generate a separate Temporal Logic (TL) specification " | |
"describing the structure or sequence of events in that list.\n\n" | |
) | |
tl_instructions = ( | |
"Rules for TL specification:\n" | |
"- The input is a single list of propositions from one of the extraction modes.\n" | |
"- The output is a single TL formula using **only** the propositions from that list and the allowed TL symbols: " | |
"['AND', 'OR', 'NOT', 'UNTIL', 'ALWAYS', 'EVENTUALLY']\n" | |
"- Do not introduce any new propositions.\n" | |
"- Each formula should reflect the temporal or logical relationships between the propositions in a way that makes semantic sense.\n\n" | |
) | |
input_template = "Input:\n{{\n \"prompt\": \"{}\"\n}}\n\n" | |
expected_output_header = "Expected Output:\n{\n" | |
expected_output_footer = "\n}" | |