File size: 5,613 Bytes
e836bd4 0546a33 e225216 aafca9e 0546a33 e225216 188a166 e225216 6d51abb e225216 02c61e8 e225216 0546a33 e225216 0546a33 e225216 0546a33 e225216 02c61e8 e225216 0546a33 e225216 0546a33 e225216 02c61e8 e225216 0546a33 e225216 aafca9e e225216 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import os
import tempfile
import requests
import re
import pandas as pd
from langchain_openai import ChatOpenAI
from langchain.agents import initialize_agent, Tool
from langchain.agents.agent_types import AgentType
from langchain_community.tools import DuckDuckGoSearchRun
# Audio transcription tool (OpenAI Whisper)
def transcribe_audio_tool(file_url: str) -> str:
import openai
openai.api_key = os.getenv("OPENAI_API_KEY")
try:
r = requests.get(file_url, timeout=20)
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
f.write(r.content)
f.flush()
path = f.name
transcript = openai.Audio.transcribe("whisper-1", open(path, "rb"))
return transcript.get("text", "")
except Exception as e:
return ""
# Excel reading tool
def read_excel_tool(file_url: str) -> str:
try:
r = requests.get(file_url, timeout=20)
with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as f:
f.write(r.content)
f.flush()
path = f.name
df = pd.read_excel(path)
if 'Type' in df.columns and 'Sales' in df.columns:
total = df[df['Type'].str.lower() == 'food']['Sales'].sum()
return str(round(total, 2))
# fallback: sum all numbers
total = df.select_dtypes(include='number').sum().sum()
return str(round(total, 2))
except Exception as e:
return ""
# Python code execution tool (CAUTION: sandbox this for production!)
def execute_python_tool(code_url: str) -> str:
try:
r = requests.get(code_url, timeout=20)
code = r.content.decode("utf-8")
import io, contextlib
buf = io.StringIO()
with contextlib.redirect_stdout(buf):
exec(code, {})
output = buf.getvalue().strip().split('\n')[-1]
# Only final numeric output if possible
numbers = re.findall(r'[-+]?\d*\.\d+|\d+', output)
return numbers[-1] if numbers else output
except Exception as e:
return ""
# Number extraction tool (example of "reasoning" tool)
def extract_numbers(text: str) -> str:
nums = re.findall(r'\b\d+\b', text)
return ', '.join(nums) if nums else ""
def extract_names(text: str) -> str:
words = re.findall(r'\b[A-Z][a-z]{2,}\b', text)
return ', '.join(words) if words else ""
# Tools list
tools = [
Tool(
name="DuckDuckGo Search",
func=DuckDuckGoSearchRun().run,
description="Use to find factual information or recent events."
),
Tool(
name="Transcribe Audio",
func=transcribe_audio_tool,
description="Use to transcribe an audio file from a URL (mp3 or wav)."
),
Tool(
name="Read Excel File",
func=read_excel_tool,
description="Use to read an Excel spreadsheet file from a URL (xlsx) and sum food sales or extract tables."
),
Tool(
name="Execute Python",
func=execute_python_tool,
description="Use to execute a Python file from a URL and get the final output."
),
Tool(
name="Extract Numbers",
func=extract_numbers,
description="Use to extract all numbers from provided text."
),
Tool(
name="Extract Names",
func=extract_names,
description="Use to extract capitalized names from provided text."
)
]
PROMPT = (
"You are a general AI assistant. I will ask you a question. "
"Reason step by step, and use tools as needed. Only after you are sure, answer with the template: "
"FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. "
"If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. "
"If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. "
"If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
)
llm = ChatOpenAI(model="gpt-4o", temperature=0)
class BasicAgent:
def __init__(self):
self.agent = initialize_agent(
tools=tools,
llm=llm,
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
verbose=False,
handle_parsing_errors=True
)
self.prompt = PROMPT
def fetch_file_url(self, task_id):
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
try:
url = f"{DEFAULT_API_URL}/files/{task_id}"
r = requests.head(url, timeout=5)
if r.status_code == 200:
return url
except:
pass
return None
def __call__(self, question: str, task_id: str = None) -> str:
file_url = self.fetch_file_url(task_id) if task_id else None
if file_url:
# If file is attached, add the info for the agent
question_aug = f"{question}\nThis task has assigned file at this URL: {file_url}"
else:
question_aug = question
# Add instruction prompt
full_prompt = self.prompt + "\n" + question_aug
result = self.agent.run(full_prompt)
# Extract only FINAL ANSWER
for line in result.splitlines():
if line.strip().lower().startswith("final answer:"):
return line.split(":", 1)[-1].strip(" .\"'")
return result |