GAIA_Agent_Rendel

Sleeping

GAIA_Agent_Rendel / tools.py

Markus Schramm

Add updated project files

4565986 3 months ago

16 kB

	import os
	import pandas as pd
	# import wikipediaapi
	from markdownify import markdownify as md
	from smolagents import tool, LiteLLMModel
	import whisper

	from youtube_transcript_api import YouTubeTranscriptApi
	from youtube_transcript_api.formatters import JSONFormatter

	import base64
	import mimetypes
	import requests # Keep for consistency, though not used for fetching image in this version
	import os # Added for os.path.join

	import re
	from bs4 import BeautifulSoup, Tag, Comment


	# that could be better done via a managed agent, but this is a quick hack to get it working
	@tool
	def describe_image_file(local_image_path: str) -> str:
	"""
	Describe the contents of a local image file in detail and return the description as text.
	Args:
	local_image_path (str): The path to the local image file to be described.
	Returns:
	str: A detailed description of the image contents.
	"""
	model = LiteLLMModel(
	model_id='ollama/gemma3:27b',
	api_base="https://192.168.5.217:8000", # replace with remote open-ai compatible server if necessary
	api_key=os.getenv("OLLAMA_REVPROXY_SRVML"),
	num_ctx=16384, # ollama default is 2048 which will often fail horribly. 8192 works for easy tasks, more is better. Check https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator to calculate how much VRAM this will need for the selected model
	ssl_verify=False, # Explicitly disable SSL verification
	extra_headers={
	"Authorization": f"Bearer {os.getenv('OLLAMA_REVPROXY_SRVML')}", # Explicitly set auth header
	},
	flatten_messages_as_text = False
	)

	text_prompt = "What is in this image? Describe it in detail."

	try:

	if not os.path.exists(local_image_path):
	raise FileNotFoundError(f"Image file not found at {local_image_path}. Please ensure it was downloaded correctly.")

	# 1. Read the image content from the local file
	with open(local_image_path, "rb") as image_file:
	image_content_bytes = image_file.read()

	# 2. Base64 encode the image content
	base64_image_bytes = base64.b64encode(image_content_bytes)
	base64_image_string = base64_image_bytes.decode('utf-8')

	# 3. Set MIME type based on file extension
	if local_image_path.lower().endswith('.png'):
	content_type = 'image/png'
	elif local_image_path.lower().endswith('.jpg') or local_image_path.lower().endswith('.jpeg'):
	content_type = 'image/jpeg'
	elif local_image_path.lower().endswith('.gif'):
	content_type = 'image/gif'
	elif local_image_path.lower().endswith('.bmp'):
	content_type = 'image/bmp'
	elif local_image_path.lower().endswith('.webp'):
	content_type = 'image/webp'
	else:
	content_type = mimetypes.guess_type(local_image_path)[0] or 'application/octet-stream'
	print(f"Using specified MIME type: {content_type}")

	# 4. Construct the data URI
	data_uri = f"data:{content_type};base64,{base64_image_string}"

	# Construct the messages payload
	messages = [
	{
	"role": "user",
	"content": [
	{"type": "text", "text": text_prompt},
	{
	"type": "image_url",
	"image_url": {
	"url": data_uri # Use the base64 data URI here
	}
	}
	]
	}
	]

	# Assuming 'model' is your LiteLLMModel instance initialized in a previous cell (e.g., cell 'dfc845ab')
	if 'model' not in locals():
	raise NameError("Variable 'model' is not defined. Please run the cell that initializes the LiteLLMModel.")

	response = model.generate(messages)
	return response

	except FileNotFoundError as fnf_err:
	print(f"File error: {fnf_err}")
	except NameError as ne:
	print(f"A required variable might not be defined (e.g., filename, model): {ne}")
	print("Please ensure the cells defining these variables have been run.")
	except Exception as e:
	print(f"An error occurred: {e}")


	@tool
	def get_youtube_video_transcript(video_id: str) -> str:
	"""
	Fetches the transcript of a YouTube video by its ID and returns it in JSON format.
	The video ID can be found in the YouTube video URL:
	https://www.youtube.com/watch?v=VIDEO_ID, where VIDEO_ID is the part after "v=".
	example: for the url https://www.youtube.com/watch?v=L1vXCYZAYYM the video_id is "L1vXCYZAYYM".

	Args:
	video_id (str): The YouTube video ID.
	Returns:
	str: The transcript in JSON format.
	"""

	ytt_api = YouTubeTranscriptApi()
	transcript = ytt_api.fetch(video_id)

	formatter = JSONFormatter()

	# .format_transcript(transcript) turns the transcript into a JSON string.
	json_formatted = formatter.format_transcript(transcript)
	return json_formatted


	@tool
	def transcribe_mp3(mp3_path: str, model_size: str = "base") -> str:
	"""
	Transcribe an MP3 file to text using Whisper.

	Args:
	mp3_path (str): Path to the MP3 file.
	model_size (str): Whisper model size (tiny, base, small, medium, large).

	Returns:
	str: Transcribed text.
	"""
	transcription_path = mp3_path.replace(".mp3", "_transcript.txt")

	# Check if transcription already exists
	if os.path.exists(transcription_path):
	with open(transcription_path, 'r', encoding='utf-8') as f:
	return f.read()

	# Load model
	model = whisper.load_model(model_size)

	# Transcribe
	result = model.transcribe(mp3_path)

	transcription = result["text"]

	# Save transcription to file
	with open(transcription_path, 'w', encoding='utf-8') as f:
	f.write(transcription)

	# Return the text
	return transcription


	@tool
	def get_text_from_ascii_file(filepath: str) -> str:
	"""
	Reads the content of an ASCII text file and returns it as a string.
	Args:
	filepath (str): The path to the ASCII text file.
	Returns:
	str: The content of the file as a string.
	"""
	if not os.path.exists(filepath):
	raise FileNotFoundError(f"The file at {filepath} does not exist.")
	with open(filepath, "r") as f:
	return f.read()


	# @tool
	# def get_wikipedia_page_content(page_title: str, lang: str='en') -> str:
	# """
	# This function uses the `wikipediaapi` library to retrieve the content of a specified Wikipedia page in a given language.
	# For example: for the url 'https://en.wikipedia.org/wiki/Python_(programming_language)' the page_title would be 'Python_(programming_language)' and the lang would be 'en'.
	# It returns the content of the page as a Markdown-formatted string.

	# Args:
	# page_title (str): The title of the Wikipedia page to fetch.
	# lang (str): The language of the Wikipedia page (default is 'en' for English).
	# Returns:
	# str: The content of the Wikipedia page.
	# """

	# MY_EMAIL = os.getenv("MY_EMAIL", None)
	# if MY_EMAIL is None:
	# raise ValueError("MY_EMAIL environment variable is not set. Please set it to your email address.")

	# wiki_wiki = wikipediaapi.Wikipedia(user_agent=f'Wiki Agent ({MY_EMAIL})', language=lang)
	# page = wiki_wiki.page(page_title)
	# if not page.exists():
	# raise ValueError(f"The Wikipedia page '{page_title}' does not exist.")
	# return md(page.text)





	@tool
	def get_wikipedia_markdown(
	title: str,
	lang: str = 'en',
	ignore_references: bool = True,
	ignore_links: bool = True
	) -> str:
	"""
	Fetches the main content of a Wikipedia page and returns it as Markdown,
	excluding infoboxes, navigation templates, images, and—if requested—the
	References, Further reading, and External links sections. It's recommended
	to start with ignore_references=True and ignore_links=True
	to reduce the amount of output to the pure infomation.

	Args:
	title (str): Wikipedia page title (e.g., "Mercedes_Sosa").
	lang (str): Language code (default 'en').
	ignore_references (bool): If True, drop "References", "Further reading",
	and "External links" sections entirely.
	ignore_links (bool): If True, strip out all <a> tags entirely.

	Returns:
	str: Markdown-formatted content of the main article body.
	"""
	# 1. Fetch raw HTML
	url = f"https://{lang}.wikipedia.org/wiki/{title}"
	try:
	response = requests.get(url)
	response.raise_for_status()
	except requests.exceptions.HTTPError as e:

	# use wikipedia's API to check if the page exists
	api_url = f"https://{lang}.wikipedia.org/w/api.php"
	search_params = {
	'list': 'search',
	'srprop': '',
	'srlimit': 10,
	'limit': 10,
	'srsearch': title.replace("_", " "),
	'srinfo': 'suggestion',
	'format': 'json',
	'action': 'query'
	}

	headers = {
	'User-Agent': "mozilla /5.0 (Windows NT 10.0; Win64; x64)"
	}

	r = requests.get(api_url, params=search_params, headers=headers)

	raw_results = r.json()
	search_results = [d['title'].replace(" ", "_") for d in raw_results['query']['search']]
	if ('searchinfo' in raw_results['query']) and ('suggestion' in raw_results['query']['searchinfo']):
	search_results.insert(0, raw_results['query']['searchinfo']['suggestion'].replace(" ", "_"))

	errorMsg = f"Could not fetch page '{title}' for language '{lang}' (HTTP {response.status_code})."
	if search_results:
	errorMsg += f" Did you mean one of these pages? {', '.join(search_results)}"

	raise ValueError(errorMsg) from e

	html = response.text

	# 2. Parse with BeautifulSoup and isolate the article’s main <div>
	soup = BeautifulSoup(html, "lxml")
	content_div = soup.find("div", class_="mw-parser-output") #
	if content_div is None:
	raise ValueError(f"Could not find main content for page '{title}'")

	# 2a. Remove all “[edit]” links (<span class="mw-editsection">…)
	for edit_span in content_div.find_all("span", class_="mw-editsection"):
	edit_span.decompose() #

	# 2b. Remove any superscript footnote markers (<sup class="reference">…)
	for sup in content_div.find_all("sup", class_="reference"):
	sup.decompose() #

	# 2c. Remove any parser‐debug comments (e.g., “NewPP limit report…”, “Transclusion expansion time report…”)
	for comment in content_div.find_all(string=lambda text: isinstance(text, Comment)):
	comment_text = str(comment)
	# If the comment contains debug keywords, extract it
	if (
	"NewPP limit report" in comment_text
	or "Transclusion expansion time report" in comment_text
	or "Saved in parser cache" in comment_text
	):
	comment.extract() #

	# 3. Remove unwanted “boilerplate” elements:
	# a) Infoboxes (sidebars)
	for infobox in content_div.find_all("table", class_=re.compile(r"infobox")):
	infobox.decompose() #

	# b) Table of Contents
	toc = content_div.find("div", id="toc")
	if toc:
	toc.decompose() #

	# c) Navigation templates (navbox/vertical-navbox/metadata)
	for nav in content_div.find_all(
	["div", "table"],
	class_=re.compile(r"navbox\|vertical-navbox\|metadata")
	):
	nav.decompose() #

	# d) Thumbnails / image wrappers
	for thumb in content_div.find_all("div", class_=re.compile(r"thumb")):
	thumb.decompose() #

	# e) Raw <img> tags
	for img in content_div.find_all("img"):
	img.decompose() #

	# 4. Convert any remaining <table> into a Markdown table in-place
	def table_to_markdown(table_tag: Tag) -> str:
	"""
	Converts a <table> into a Markdown-formatted table, preserving <th> headers.
	"""
	headers = []
	header_row = table_tag.find("tr")
	if header_row:
	for th in header_row.find_all("th"):
	headers.append(th.get_text(strip=True))
	md_table = ""
	if headers:
	md_table += "\| " + " \| ".join(headers) + " \|\n"
	md_table += "\| " + " \| ".join("---" for _ in headers) + " \|\n"
	# Now process data rows (skip the first <tr> if it was header row)
	for row in table_tag.find_all("tr")[1:]:
	cells = row.find_all(["td", "th"])
	if not cells:
	continue
	row_texts = [cell.get_text(strip=True) for cell in cells]
	md_table += "\| " + " \| ".join(row_texts) + " \|\n"
	return md_table.rstrip()

	for table in content_div.find_all("table"):
	# Skip infobox/navigation tables (already removed above)
	if "infobox" in table.get("class", []) or table.get("role") == "navigation":
	continue
	markdown_table = table_to_markdown(table) #
	new_node = soup.new_string("\n\n" + markdown_table + "\n\n")
	table.replace_with(new_node)

	# 5. Remove “References”, “Further reading” & “External links” sections if requested
	if ignore_references:
	section_ids = {"references", "further_reading", "external_links"}
	# We look for wrapper <div class="mw-heading mw-heading2"> or mw-heading3
	for wrapper in content_div.find_all("div", class_=re.compile(r"mw-heading mw-heading[23]")):
	heading_tag = wrapper.find(re.compile(r"^h[2-3]$"))
	if heading_tag and heading_tag.get("id", "").strip().lower() in section_ids:
	# Collect every sibling until the next wrapper of the same form
	siblings_to_remove = []
	for sib in wrapper.find_next_siblings():
	if (
	sib.name == "div"
	and "mw-heading" in (sib.get("class") or [])
	and re.match(r"mw-heading mw-heading[23]", " ".join(sib.get("class") or []))
	):
	break
	siblings_to_remove.append(sib)
	# First delete those siblings
	for node in siblings_to_remove:
	node.decompose() #
	# Finally delete the wrapper itself
	wrapper.decompose() #

	# 6. Convert the cleaned HTML into Markdown
	markdown_options = {}
	if ignore_links:
	markdown_options["strip"] = ["a"] # strip all <a> tags (keep only their text)

	raw_html = "".join(str(child) for child in content_div.children)
	markdown_text = md(raw_html, **markdown_options) #

	# 7. Collapse 3+ blank lines into exactly two
	markdown_text = re.sub(r"\n{3,}", "\n\n", markdown_text).strip()

	return markdown_text


	@tool
	def read_xls_File(file_path: str) -> object:
	"""This tool loads xls file into pandas and returns it.
	Args:
	file_path (str): File path to the xls file.
	Returns:
	object: The loaded xls file as a pandas DataFrame.
	"""
	return pd.read_excel(file_path)