robinsmits commited on
Commit
4483b98
Β·
1 Parent(s): 629c40b

Code Update: 11 out of 20 correct.

Browse files
Files changed (3) hide show
  1. agents.py +2 -2
  2. app.py +1 -1
  3. tooling.py +10 -12
agents.py CHANGED
@@ -46,7 +46,7 @@ def create_web_agent():
46
  tools = [FinalAnswerTool(),
47
  GoogleSearchTool(),
48
  DuckDuckGoSearchTool(),
49
- VisitWebpageTool(max_output_length = 75000),
50
  WikipediaSearchTool(user_agent = "FinalAssignmentResearchBot ([email protected])",
51
  language = "en",
52
  content_type = "text",
@@ -97,6 +97,6 @@ def create_manager_agent():
97
  planning_interval = 3,
98
  verbosity_level = 2,
99
  stream_outputs = True,
100
- max_steps = 20,
101
  provide_run_summary = True,
102
  managed_agents = [vision_agent, web_agent])
 
46
  tools = [FinalAnswerTool(),
47
  GoogleSearchTool(),
48
  DuckDuckGoSearchTool(),
49
+ VisitWebpageTool(max_output_length = 100000),
50
  WikipediaSearchTool(user_agent = "FinalAssignmentResearchBot ([email protected])",
51
  language = "en",
52
  content_type = "text",
 
97
  planning_interval = 3,
98
  verbosity_level = 2,
99
  stream_outputs = True,
100
+ max_steps = 25,
101
  provide_run_summary = True,
102
  managed_agents = [vision_agent, web_agent])
app.py CHANGED
@@ -117,7 +117,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
117
  # Run Manager Agent
118
  submitted_answer = manager_agent.run(get_manager_agent_prompt(question_text, file_prompt))
119
 
120
- # Basic verification...convert both to string...
121
  if type(submitted_answer) is list or type(submitted_answer) is dict:
122
  submitted_answer = str(submitted_answer)
123
 
 
117
  # Run Manager Agent
118
  submitted_answer = manager_agent.run(get_manager_agent_prompt(question_text, file_prompt))
119
 
120
+ # Basic verification...convert both to string...shouldn't happen as output...but occassionally does ;-)
121
  if type(submitted_answer) is list or type(submitted_answer) is dict:
122
  submitted_answer = str(submitted_answer)
123
 
tooling.py CHANGED
@@ -1,5 +1,3 @@
1
- # https://github.com/huggingface/smolagents/blob/v1.17.0/src/smolagents/default_tools.py#L479
2
-
3
  # Import Modules
4
  import os
5
  import pandas as pd
@@ -10,11 +8,9 @@ import re
10
  import torch
11
  from transformers import AutoProcessor, AutoModelForVision2Seq
12
  from smolagents import tool, Tool
13
- from smolagents.tools import PipelineTool
14
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
15
  import librosa
16
  import numpy as np
17
-
18
 
19
  gradio_main_instructions = """
20
  **Instructions:**
@@ -126,12 +122,14 @@ def vision_language_tool(question: str, file_name: str) -> str:
126
  """
127
 
128
  prompt = f"""
 
129
  You are provided with an image.
130
 
131
- Answer the following question about the image very specifically and in detail:
132
 
133
- {question}"""
134
- print(f"vlt: {os.listdir('./')}")
 
135
  conversation = [
136
  {
137
  "role": "user",
@@ -145,13 +143,13 @@ Answer the following question about the image very specifically and in detail:
145
  return_tensors = "pt").to(device)
146
 
147
 
148
- # autoregressively complete prompt
149
  model_output = vision_model.generate(**inputs,
150
- max_new_tokens = 1024,
151
- temperature = 0.2,
152
  do_sample = True,
153
- top_p = 0.975,
154
- top_k = 75,
155
  min_p = 0.05,
156
  repetition_penalty = 1.15)
157
  answer = vision_processor.decode(model_output[0], skip_special_tokens = True)
 
 
 
1
  # Import Modules
2
  import os
3
  import pandas as pd
 
8
  import torch
9
  from transformers import AutoProcessor, AutoModelForVision2Seq
10
  from smolagents import tool, Tool
 
11
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
12
  import librosa
13
  import numpy as np
 
14
 
15
  gradio_main_instructions = """
16
  **Instructions:**
 
122
  """
123
 
124
  prompt = f"""
125
+ # Objective:
126
  You are provided with an image.
127
 
128
+ Answer the following question about the image very specifically and in detail. Think step by step.
129
 
130
+ # Question:
131
+ {question}
132
+ """
133
  conversation = [
134
  {
135
  "role": "user",
 
143
  return_tensors = "pt").to(device)
144
 
145
 
146
+ # Generate
147
  model_output = vision_model.generate(**inputs,
148
+ max_new_tokens = 2048,
149
+ temperature = 0.5,
150
  do_sample = True,
151
+ top_p = 0.98,
152
+ top_k = 80,
153
  min_p = 0.05,
154
  repetition_penalty = 1.15)
155
  answer = vision_processor.decode(model_output[0], skip_special_tokens = True)