Stremly commited on
Commit
f2cd623
Β·
verified Β·
1 Parent(s): 88bf1eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -0
app.py CHANGED
@@ -55,6 +55,15 @@ def navigate(screenshot, task: str, platform: str, history):
55
  else:
56
  messages = history
57
 
 
 
 
 
 
 
 
 
 
58
 
59
 
60
  # ─────────────────────────── model forward ─────────────────────────────
 
55
  else:
56
  messages = history
57
 
58
+ if messages is None:
59
+ prompt_header = (
60
+ "You are a GUI agent. You are given a task and your action history, with screenshots."
61
+ " You need to perform the next action to complete the task. \n\n## Output Format\n```\nThought: ...\nAction: ...\n```\n\n## Action Space\n\nclick(start_box='<|box_start|>(x1, y1)<|box_end|>')\nleft_double(start_box='<|box_start|>(x1, y1)<|box_end|>')\nright_single(start_box='<|box_start|>(x1, y1)<|box_end|>')\ndrag(start_box='<|box_start|>(x1, y1)<|box_end|>', end_box='<|box_start|>(x3, y3)<|box_end|>')\nhotkey(key='')\ntype(content='') #If you want to submit your input, use \"\\n\" at the end of `content`.\nscroll(start_box='<|box_start|>(x1, y1)<|box_end|>', direction='down or up or right or left')\nwait() #Sleep for 5s and take a screenshot to check for any changes.\nfinished(content='xxx') # Use escape characters \\', \\\", and \\n in content part to ensure we can parse the content in normal python string format.\n\n\n## Note\n- Use Chinese in `Thought` part.\n- Write a small plan and finally summarize your next action (with its target element) in one sentence in `Thought` part.\n\n"
62
+ f"## User Instruction\n{task}"
63
+ )
64
+ user_history = [{"role":"user","content":prompt_header},{"role":"user","content":{"type": "image_url", "image_url": {"url": screenshot}}}]
65
+ messages = user_history
66
+
67
 
68
 
69
  # ─────────────────────────── model forward ─────────────────────────────