Final_Assignment_Template

Sleeping

EtienneB

Update tools.py

eaad534 22 days ago

26.4 kB

	import base64
	import datetime
	import math
	import os
	import urllib.parse
	from pathlib import Path
	from typing import Dict, Union
	# from dotenv import load_dotenv


	import pandas
	import pytz
	import requests
	from bs4 import BeautifulSoup
	from langchain_community.document_loaders import (
	ArxivLoader, AssemblyAIAudioTranscriptLoader, WikipediaLoader)
	from langchain_community.document_loaders.generic import GenericLoader
	from langchain_community.document_loaders.parsers import LanguageParser
	from langchain_core.messages import HumanMessage
	# from langchain_community.tools import DuckDuckGoSearchRun
	# from langchain_community.tools import DuckDuckGoSearchRun
	from langchain_core.tools import tool
	from langchain_google_genai import ChatGoogleGenerativeAI
	from langchain_openai import ChatOpenAI
	from langchain_tavily import TavilySearch

	# load_dotenv()
	# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
	# os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY


	@tool
	def download_file(url: str, filename: str = None) -> str:
	"""
	Download a file from a URL and save it locally for analysis.
	Analysis can be done using one or more other tools.

	Args:
	url: The URL of the file to download
	filename: Optional custom filename. If not provided, will extract from URL

	Returns:
	The local file path where the file was saved
	"""
	try:
	# Clean and validate URL
	url = url.strip()
	if not url.startswith(('http://', 'https://')):
	return f"Error: Invalid URL format: {url}"

	# Create downloads directory if it doesn't exist
	download_dir = Path("downloads")
	download_dir.mkdir(exist_ok=True)

	# Determine filename
	if not filename:
	# Extract filename from URL
	parsed_url = urllib.parse.urlparse(url)
	filename = os.path.basename(parsed_url.path)

	# If no filename in URL, create one based on URL hash
	if not filename or '.' not in filename:
	url_hash = str(hash(url))[-8:] # Last 8 characters of hash
	filename = f"downloaded_file_{url_hash}"

	# Ensure filename has proper extension if we can detect content type
	filepath = download_dir / filename

	# Download the file
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
	}

	print(f"Downloading file from: {url}")
	response = requests.get(url, headers=headers, timeout=30, stream=True)
	response.raise_for_status()

	# Try to get better filename from Content-Disposition header
	if 'content-disposition' in response.headers:
	import re
	cd = response.headers['content-disposition']
	filename_match = re.findall('filename=(.+)', cd)
	if filename_match:
	suggested_filename = filename_match[0].strip('"')
	if suggested_filename:
	filepath = download_dir / suggested_filename

	# Write file to disk
	with open(filepath, 'wb') as f:
	for chunk in response.iter_content(chunk_size=8192):
	f.write(chunk)

	file_size = os.path.getsize(filepath)
	print(f"Successfully downloaded {filename} ({file_size} bytes)")

	return str(filepath)

	except requests.exceptions.RequestException as e:
	return f"Error downloading file: {str(e)}"
	except Exception as e:
	return f"Error saving file: {str(e)}"


	@tool
	def multiply(a: Union[int, float], b: Union[int, float]) -> Union[int, float]:
	"""Multiplies two numbers and returns the product.

	Args:
	a: The first number.
	b: The second number.

	Returns:
	The product of the two input numbers.
	"""
	try:
	result = a * b
	return int(result) if isinstance(a, int) and isinstance(b, int) else result
	except Exception as e:
	return f"Error in multiplication: {str(e)}"


	@tool
	def add(a: Union[int, float], b: Union[int, float]) -> Union[int, float]:
	"""Adds two numbers and returns the sum.

	Args:
	a: The first number.
	b: The second number.

	Returns:
	The sum of the two input numbers.
	"""
	try:
	result = a + b
	return int(result) if isinstance(a, int) and isinstance(b, int) else result
	except Exception as e:
	return f"Error in addition: {str(e)}"


	@tool
	def power(a: Union[int, float], b: Union[int, float]) -> float:
	"""Raises a number to the power of another.

	Args:
	a: The base number.
	b: The exponent.

	Returns:
	The result of raising `a` to the power of `b`.
	"""
	try:
	if a == 0 and b < 0:
	return "Error: Cannot raise 0 to a negative power"
	result = a ** b
	return result
	except OverflowError:
	return "Error: Result too large to compute"
	except Exception as e:
	return f"Error in power calculation: {str(e)}"


	@tool
	def subtract(a: Union[int, float], b: Union[int, float]) -> Union[int, float]:
	"""Subtracts the second number from the first.

	Args:
	a: The number from which to subtract.
	b: The number to subtract.

	Returns:
	The result of `a` minus `b`.
	"""
	try:
	result = a - b
	return int(result) if isinstance(a, int) and isinstance(b, int) else result
	except Exception as e:
	return f"Error in subtraction: {str(e)}"


	@tool
	def divide(a: Union[int, float], b: Union[int, float]) -> float:
	"""Divides one number by another.

	Args:
	a: The numerator.
	b: The denominator.

	Returns:
	The result of `a` divided by `b`.
	"""
	try:
	if b == 0:
	return "Error: Division by zero is not allowed"
	return a / b
	except Exception as e:
	return f"Error in division: {str(e)}"


	@tool
	def modulus(a: int, b: int) -> Union[int, str]:
	"""Returns the remainder of the division of two integers.

	Args:
	a: The dividend.
	b: The divisor.

	Returns:
	The remainder when `a` is divided by `b`.
	"""
	try:
	if b == 0:
	return "Error: Modulus by zero is not allowed"
	return a % b
	except Exception as e:
	return f"Error in modulus operation: {str(e)}"


	@tool
	def square_root(x: Union[int, float]) -> Union[float, str]:
	"""Returns the square root of a number.

	Args:
	x: The input number. Must be non-negative.

	Returns:
	The square root of `x`.
	"""
	try:
	if x < 0:
	return "Error: Square root of negative number is not allowed"
	return math.sqrt(x)
	except Exception as e:
	return f"Error in square root calculation: {str(e)}"


	@tool
	def floor_divide(a: int, b: int) -> Union[int, str]:
	"""Performs integer division (floor division) of two numbers.

	Args:
	a: The dividend.
	b: The divisor.

	Returns:
	The floor of the quotient.
	"""
	try:
	if b == 0:
	return "Error: Division by zero is not allowed"
	return a // b
	except Exception as e:
	return f"Error in floor division: {str(e)}"


	@tool
	def absolute(x: Union[int, float]) -> Union[int, float]:
	"""Returns the absolute value of a number.

	Args:
	x: The input number.

	Returns:
	The absolute value of `x`.
	"""
	try:
	result = abs(x)
	return int(result) if isinstance(x, int) else result
	except Exception as e:
	return f"Error in absolute value calculation: {str(e)}"


	@tool
	def logarithm(x: Union[int, float], base: Union[int, float] = math.e) -> Union[float, str]:
	"""Returns the logarithm of a number with a given base.

	Args:
	x: The number to take the logarithm of. Must be positive.
	base: The logarithmic base. Must be positive and not equal to 1.

	Returns:
	The logarithm of `x` to the given base.
	"""
	try:
	if x <= 0:
	return "Error: Logarithm input must be positive"
	if base <= 0 or base == 1:
	return "Error: Logarithm base must be positive and not equal to 1"
	return math.log(x, base)
	except Exception as e:
	return f"Error in logarithm calculation: {str(e)}"


	@tool
	def exponential(x: Union[int, float]) -> Union[float, str]:
	"""Returns e raised to the power of `x`.

	Args:
	x: The exponent.

	Returns:
	The value of e^x.
	"""
	try:
	if x > 700: # Prevent overflow
	return "Error: Exponent too large, would cause overflow"
	return math.exp(x)
	except OverflowError:
	return "Error: Result too large to compute"
	except Exception as e:
	return f"Error in exponential calculation: {str(e)}"


	@tool
	def roman_calculator_converter(value1: int, value2: int, oper: str) -> str:
	"""Performs an operation on 2 numbers and returns the result as a Roman numeral.

	Args:
	value1: The first value
	value2: The second value
	oper: Operator for the calculation ("add", "subtract", "multiply", "divide")

	Returns:
	The result as a Roman numeral string.
	"""
	try:
	# Input validation
	if not isinstance(value1, int) or not isinstance(value2, int):
	return "Error: Both values must be integers"

	if oper not in ["add", "subtract", "multiply", "divide"]:
	return "Error: Operator must be 'add', 'subtract', 'multiply', or 'divide'"

	# Roman numeral mapping
	roman_numerals = [
	(1000, "M"), (900, "CM"), (500, "D"), (400, "CD"),
	(100, "C"), (90, "XC"), (50, "L"), (40, "XL"),
	(10, "X"), (9, "IX"), (5, "V"), (4, "IV"), (1, "I")
	]

	# Perform calculation
	if oper == "add":
	result = value1 + value2
	elif oper == "subtract":
	result = value1 - value2
	elif oper == "multiply":
	result = value1 * value2
	elif oper == "divide":
	if value2 == 0:
	return "Error: Division by zero is not allowed"
	result = int(value1 / value2) # Integer division for Roman numerals

	# Handle invalid results for Roman numerals
	if result <= 0:
	return f"Error: Roman numerals cannot represent zero or negative numbers. Result was: {result}"

	if result > 3999: # Roman numerals traditionally don't go beyond this
	return f"Error: Result ({result}) is too large for standard Roman numeral representation"

	# Convert to Roman numeral
	roman_string = ""
	for value, numeral in roman_numerals:
	count = result // value
	if count:
	roman_string += numeral * count
	result -= value * count

	return f"The result of {oper}ing {value1} and {value2} is: {roman_string}"

	except Exception as e:
	return f"Error in Roman calculator: {str(e)}"


	@tool
	def get_current_time_in_timezone(timezone: str) -> str:
	"""Fetches the current local time in a specified timezone.

	Args:
	timezone: A string representing a valid timezone (e.g., 'America/New_York', 'Europe/London').

	Returns:
	The current time in the specified timezone.
	"""
	try:
	if not timezone or not timezone.strip():
	return "Error: Timezone cannot be empty"

	# Clean the timezone string
	timezone = timezone.strip()

	# Handle common timezone aliases
	timezone_aliases = {
	'EST': 'America/New_York',
	'PST': 'America/Los_Angeles',
	'MST': 'America/Denver',
	'CST': 'America/Chicago',
	'GMT': 'GMT',
	'UTC': 'UTC',
	'CET': 'Europe/Berlin',
	'JST': 'Asia/Tokyo',
	}

	if timezone.upper() in timezone_aliases:
	timezone = timezone_aliases[timezone.upper()]

	# Create timezone object
	tz = pytz.timezone(timezone)

	# Get current time in that timezone
	local_time = datetime.datetime.now(tz)
	formatted_time = local_time.strftime("%Y-%m-%d %H:%M:%S %Z")

	return f"The current local time in {timezone} is: {formatted_time}"

	except pytz.exceptions.UnknownTimeZoneError:
	return f"Error: Unknown timezone '{timezone}'. Please use a valid timezone like 'America/New_York' or 'Europe/London'"
	except Exception as e:
	return f"Error fetching time for timezone '{timezone}': {str(e)}"


	@tool
	def factorial(n: int) -> Union[int, str]:
	"""Calculates the factorial of a non-negative integer.

	Args:
	n: A non-negative integer.

	Returns:
	The factorial of n.
	"""
	try:
	if not isinstance(n, int):
	return "Error: Input must be an integer"
	if n < 0:
	return "Error: Factorial is not defined for negative numbers"
	if n > 170: # Prevent overflow
	return "Error: Number too large for factorial calculation"

	result = math.factorial(n)
	return result
	except Exception as e:
	return f"Error calculating factorial: {str(e)}"


	@tool
	def greatest_common_divisor(a: int, b: int) -> Union[int, str]:
	"""Finds the greatest common divisor of two integers.

	Args:
	a: First integer.
	b: Second integer.

	Returns:
	The greatest common divisor of a and b.
	"""
	try:
	if not isinstance(a, int) or not isinstance(b, int):
	return "Error: Both inputs must be integers"
	return math.gcd(abs(a), abs(b))
	except Exception as e:
	return f"Error calculating GCD: {str(e)}"


	@tool
	def least_common_multiple(a: int, b: int) -> Union[int, str]:
	"""Finds the least common multiple of two integers.

	Args:
	a: First integer.
	b: Second integer.

	Returns:
	The least common multiple of a and b.
	"""
	try:
	if not isinstance(a, int) or not isinstance(b, int):
	return "Error: Both inputs must be integers"
	if a == 0 or b == 0:
	return 0
	return abs(a * b) // math.gcd(abs(a), abs(b))
	except Exception as e:
	return f"Error calculating LCM: {str(e)}"


	@tool
	def is_prime(n: int) -> Union[bool, str]:
	"""Checks if a number is prime.

	Args:
	n: The number to check.

	Returns:
	True if n is prime, False otherwise.
	"""
	try:
	if not isinstance(n, int):
	return "Error: Input must be an integer"
	if n < 2:
	return False
	if n == 2:
	return True
	if n % 2 == 0:
	return False

	# Check odd divisors up to sqrt(n)
	for i in range(3, int(math.sqrt(n)) + 1, 2):
	if n % i == 0:
	return False
	return True
	except Exception as e:
	return f"Error checking if prime: {str(e)}"


	@tool
	def percentage_calculator(part: Union[int, float], whole: Union[int, float]) -> Union[float, str]:
	"""Calculates what percentage 'part' is of 'whole'.

	Args:
	part: The part value.
	whole: The whole value.

	Returns:
	The percentage as a float.
	"""
	try:
	if whole == 0:
	return "Error: Cannot calculate percentage when whole is zero"
	percentage = (part / whole) * 100
	return round(percentage, 2)
	except Exception as e:
	return f"Error calculating percentage: {str(e)}"


	@tool
	def compound_interest(principal: Union[int, float], rate: Union[int, float],
	time: Union[int, float], compound_frequency: int = 1) -> Union[float, str]:
	"""Calculates compound interest.

	Args:
	principal: The initial amount of money.
	rate: The annual interest rate (as a percentage, e.g., 5 for 5%).
	time: The time period in years.
	compound_frequency: How many times per year the interest is compounded (default: 1).

	Returns:
	The final amount after compound interest.
	"""
	try:
	if principal <= 0:
	return "Error: Principal must be positive"
	if rate < 0:
	return "Error: Interest rate cannot be negative"
	if time < 0:
	return "Error: Time cannot be negative"
	if compound_frequency <= 0:
	return "Error: Compound frequency must be positive"

	# Convert percentage to decimal
	rate_decimal = rate / 100

	# Compound interest formula: A = P(1 + r/n)^(nt)
	amount = principal * (1 + rate_decimal / compound_frequency) ** (compound_frequency * time)

	return round(amount, 2)
	except Exception as e:
	return f"Error calculating compound interest: {str(e)}"


	@tool
	def convert_temperature(value: Union[int, float], from_unit: str, to_unit: str) -> Union[float, str]:
	"""Converts temperature between Celsius, Fahrenheit, and Kelvin.

	Args:
	value: The temperature value to convert.
	from_unit: The source unit ('C', 'F', or 'K').
	to_unit: The target unit ('C', 'F', or 'K').

	Returns:
	The converted temperature value.
	"""
	try:
	from_unit = from_unit.upper().strip()
	to_unit = to_unit.upper().strip()

	valid_units = ['C', 'F', 'K', 'CELSIUS', 'FAHRENHEIT', 'KELVIN']

	# Normalize unit names
	unit_map = {
	'CELSIUS': 'C', 'FAHRENHEIT': 'F', 'KELVIN': 'K'
	}

	from_unit = unit_map.get(from_unit, from_unit)
	to_unit = unit_map.get(to_unit, to_unit)

	if from_unit not in ['C', 'F', 'K'] or to_unit not in ['C', 'F', 'K']:
	return "Error: Units must be 'C' (Celsius), 'F' (Fahrenheit), or 'K' (Kelvin)"

	if from_unit == to_unit:
	return float(value)

	# Convert to Celsius first
	if from_unit == 'F':
	celsius = (value - 32) * 5/9
	elif from_unit == 'K':
	celsius = value - 273.15
	else: # from_unit == 'C'
	celsius = value

	# Convert from Celsius to target unit
	if to_unit == 'F':
	result = celsius * 9/5 + 32
	elif to_unit == 'K':
	result = celsius + 273.15
	else: # to_unit == 'C'
	result = celsius

	return round(result, 2)
	except Exception as e:
	return f"Error converting temperature: {str(e)}"

	@tool
	def wikipedia_search(query: str) -> str:
	"""
	Search Wikipedia for a query and return maximum 2 results.

	Args:
	query: The search query.
	"""
	search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
	formatted_search_docs = "\n\n---\n\n".join(
	[
	f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
	for doc in search_docs
	])
	return {"wiki_results": formatted_search_docs}

	@tool
	def arxiv_search(query: str) -> str:
	"""
	Search Arxiv for a query and return maximum 3 result.

	Args:
	query: The search query.
	"""
	search_docs = ArxivLoader(query=query, load_max_docs=3).load()
	formatted_search_docs = "\n\n---\n\n".join(
	[
	f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
	for doc in search_docs
	])
	return {"arxiv_results": formatted_search_docs}


	@tool
	def analyze_excel_file(file_path: str, query: str) -> str:
	"""
	Analyze an Excel file using pandas and answer a question about it.
	Args:
	file_path (str): the path to the Excel file.
	query (str): Question about the data
	"""
	try:
	file = pandas.read_excel(file_path)

	result = (
	f"Excel file loaded with {len(file)} rows and {len(file.columns)} columns.\n"
	)
	result += f"Columns: {', '.join(file.columns)}\n\n"

	result += "Summary statistics:\n"
	result += str(file.describe())

	return result

	except Exception as e:
	return f"Error analyzing Excel file: {str(e)}"


	@tool
	def python_code_parser(file_path: str) -> str:
	"""
	Parse Python code to extract function names and their docstrings.

	Args:
	file_path: The path to the Python file to parse.

	Returns:
	Interpreted Python code as a string.
	"""
	if not os.path.exists(file_path):
	return "0"


	loader = GenericLoader.from_filesystem(
	file_path,
	glob="*/",
	suffixes=[".py"],
	parser=LanguageParser()
	)
	search_docs = loader.load()

	formatted_search_docs = "\n\n---\n\n".join(
	[
	f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
	for doc in search_docs
	])

	return {"python_results": formatted_search_docs}


	@tool
	def audio_transcription(file_path: str) -> str:
	"""
	Transcribe an audio file to text using AssemblyAI.

	Args:
	file_path: The path to the audio file.

	Returns:
	The transcribed text from the audio file.
	"""
	search_docs = AssemblyAIAudioTranscriptLoader(file_path=file_path).load()

	formatted_search_docs = "\n\n---\n\n".join(
	[
	f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
	for doc in search_docs
	])

	return {"audio_results": formatted_search_docs}


	@tool
	def analyze_csv_file(file_path: str, query: str) -> str:
	"""
	Analyze a CSV file using pandas and answer a question about it.
	Args:
	file_path (str): the path to the CSV file.
	query (str): Question about the data
	"""
	try:
	file = pandas.read_csv(file_path)

	result = f"CSV file loaded with {len(file)} rows and {len(file.columns)} columns.\n"
	result += f"Columns: {', '.join(file.columns)}\n\n"

	result += "Summary statistics:\n"
	result += str(file.describe())

	return result

	except Exception as e:
	return f"Error analyzing CSV file: {str(e)}"


	@tool
	def extract_text(img_path: str) -> str:
	"""
	Extract text from an image file using a multimodal model.

	This allowsto properly analyze the contents.
	"""
	vision_llm = ChatOpenAI(model="gpt-4o")
	all_text = ""

	# Read image and encode as base64
	with open(img_path, "rb") as image_file:
	image_bytes = image_file.read()

	image_base64 = base64.b64encode(image_bytes).decode("utf-8")

	# Prepare the prompt including the base64 image data
	message = [
	HumanMessage(
	content=[
	{
	"type": "text",
	"text": (
	"Extract all the text from this image. "
	"Return only the extracted text, no explanations."
	),
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/png;base64,{image_base64}"
	},
	},
	]
	)
	]

	# Call the vision-capable model
	response = vision_llm.invoke(message)

	# Append extracted text
	all_text += response.content + "\n\n"

	return all_text.strip()


	@tool
	def reverse_sentence(text: str) -> str:
	"""
	Reverses the input text. In case a question is written in reversed text, it can be corrected with this tool.
	Args:
	text (str): The input string to be reversed.
	Returns:
	str: The reversed string.
	"""
	return text[::-1]

	@tool
	def web_search(query: str) -> str:
	"""
	Searches the web and returns a list of the most relevant URLs.
	Use this FIRST for complex queries, metadata questions, or to find the right sources.
	Then follow up with web_content_extract on the most promising URL.
	"""
	try:
	tavily_search = TavilySearch(
	max_results=5,
	topic="general",
	search_depth="advanced",
	include_raw_content=False, # Just URLs and snippets
	)

	results = tavily_search.invoke(query)
	# Format results to show URLs and brief descriptions
	web_search_results = "Search Results:\n"
	for i, result in enumerate(results["results"], 1):
	web_search_results += f"{i}. {result['title']}: {result['url']}\n {result['content'][:150]}...\n\n"

	return web_search_results
	except Exception as e:
	return f"web_search tool error: {str(e)}"

	@tool
	def web_content_extract(url: str) -> str:
	"""
	Extracts and analyzes specific content from a URL using BeautifulSoup.
	Particularly effective for Wikipedia metadata pages, discussion pages,
	and structured web content.
	Can be used after web_search to get detailed information.
	"""
	try:

	headers = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
	}

	response = requests.get(url, headers=headers, timeout=10)
	response.raise_for_status() # Raise exception for 4XX/5XX responses

	soup = BeautifulSoup(response.text, 'html.parser')
	for element in soup.select('script, style, footer, nav, header'):
	if element:
	element.decompose()
	text = soup.body.get_text(separator='\n', strip=True) if soup.body else soup.get_text(separator='\n', strip=True)

	# Limit content length for response
	return f"Content extracted from {url}:\n\n{text[:10000]}..." if len(text) > 10000 else text

	except Exception as e:
	return f"web_content_extract tool error: {str(e)}"