Spaces:

robinsmits
/

Agents_Course_Final_Project

Runtime error

App Files Files Community

robinsmits commited on 7 days ago

Commit

b800e08

1 Parent(s): 401718e

Multi Agent Setup

Browse files

Files changed (5) hide show

README.md +9 -7
agents.py +100 -0
app.py +234 -0
requirements.txt +14 -0
tooling.py +302 -0

README.md CHANGED Viewed

@@ -1,13 +1,15 @@
 ---
-title: Agents Course Final Project
-emoji: 🏢
-colorFrom: red
-colorTo: gray
 sdk: gradio
-sdk_version: 5.33.2
 app_file: app.py
 pinned: false
-license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Template Final Assignment
+emoji: 🕵🏻‍♂️
+colorFrom: indigo
+colorTo: indigo
 sdk: gradio
+sdk_version: 5.25.2
 app_file: app.py
 pinned: false
+hf_oauth: true
+# optional, default duration is 8 hours/480 minutes. Max duration is 30 days/43200 minutes.
+hf_oauth_expiration_minutes: 480
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

agents.py ADDED Viewed

	@@ -0,0 +1,100 @@

+# Import Modules
+import os
+import pandas as pd
+import torch
+from smolagents import LiteLLMModel, OpenAIServerModel
+from smolagents import (ToolCallingAgent,
+                        CodeAgent,
+                        DuckDuckGoSearchTool,
+                        VisitWebpageTool,
+                        WikipediaSearchTool,
+                        FinalAnswerTool,
+                        PythonInterpreterTool)
+# Custom Modules
+from tooling import (vision_language_tool,
+                     read_excel_tool,
+                     speech_to_text_tool,
+                     youtube_captions_tool)
+# Agent Model
+model = OpenAIServerModel(model_id = "gpt-4.1",
+                          api_key = os.getenv('OPENAI_KEY'))
+# Create Vision Agent
+def create_vision_agent():
+    # Create Vision Agent
+    return ToolCallingAgent(model = model,
+                            tools = [FinalAnswerTool(),
+                                     vision_language_tool],
+                            name = 'vision_agent',
+                            planning_interval = 2,
+                            verbosity_level = 2,
+                            max_steps = 4,
+                            provide_run_summary = True,
+                            description = """
+A team member that will use a vision language model to answer a question about an image.
+Ask him for all your questions that require answering a question about a picture or image.
+Provide the file name of the image and the specific question that you want it answer.
+""")
+# Create Web Agent
+def create_web_agent():
+    # Create Web Agent
+    return CodeAgent(model = model,
+                     tools = [FinalAnswerTool(),
+                              DuckDuckGoSearchTool(max_results = 15),
+                              VisitWebpageTool(max_output_length = 75000),
+                              WikipediaSearchTool(user_agent = "FinalAssignmentResearchBot ([email protected])",
+                                                  language = "en",
+                                                  content_type = "text",
+                                                  extract_format = "WIKI")],
+                    additional_authorized_imports = ["json",
+                                                    "pandas",
+                                                    're',
+                                                    'bs4',
+                                                    'requests',
+                                                    'numpy',
+                                                    'math',
+                                                    'xml',
+                                                    'scikit-learn'],
+                    name = 'web_agent',
+                    planning_interval = 3,
+                    verbosity_level = 2,
+                    max_steps = 12,
+                    provide_run_summary = True,
+                    description = """
+A team member that will use various tools to search for websites, to visit websites and to parse and read information from websites.
+Every question that requires to retrieve information from the internet to be answered must be answered by using the web_agent.
+The gathered information to create the final answer will be reported back to the manager_agent.
+""")
+# Create Manager Agent
+def create_manager_agent():
+    # Create Managed Agents
+    vision_agent = create_vision_agent()
+    web_agent = create_web_agent()
+    # Return Manager Agent
+    return CodeAgent(model = model,
+                     tools = [FinalAnswerTool(),
+                              PythonInterpreterTool(),
+                              speech_to_text_tool,
+                              youtube_captions_tool,
+                              read_excel_tool],
+                     name = 'manager_agent',
+                     additional_authorized_imports = ['json',
+                                                      'pandas',
+                                                      're',
+                                                      'bs4',
+                                                      'requests',
+                                                      'numpy',
+                                                      'math',
+                                                      'xml',
+                                                      'scikit-learn'],
+                     planning_interval = 3,
+                     verbosity_level = 2,
+                     stream_outputs = True,
+                     max_steps = 12,
+                     provide_run_summary = True,
+                     managed_agents = [vision_agent, web_agent])

app.py ADDED Viewed

	@@ -0,0 +1,234 @@

+import os
+import gradio as gr
+import requests
+import inspect
+import pandas as pd
+import gc
+import json
+# Custom
+from tooling import (check_for_file_name_and_return_prompt,
+                     get_manager_agent_prompt,
+                     gradio_main_instructions)
+from agents import create_manager_agent
+# --- Constants ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+def run_and_submit_all( profile: gr.OAuthProfile | None):
+    """
+    Fetches all questions, runs the BasicAgent on them, submits all answers,
+    and displays the results.
+    """
+    # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
+    if profile:
+        username= f"{profile.username}"
+        print(f"User logged in: {username}")
+    else:
+        print("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent ( modify this part to create your agent)
+    try:
+        # Create Manager Agent
+        manager_agent = create_manager_agent()
+    except Exception as e:
+        print(f"Error instantiating agent: {e}")
+        return f"Error initializing agent: {e}", None
+    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(agent_code)
+    # 2. Fetch Questions
+    print(f"Fetching questions from: {questions_url}")
+    try:
+        response = requests.get(questions_url, timeout=15)
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+             print("Fetched questions list is empty.")
+             return "Fetched questions list is empty or invalid format.", None
+        print(f"Fetched {len(questions_data)} questions.")
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None
+    except requests.exceptions.JSONDecodeError as e:
+         print(f"Error decoding JSON response from questions endpoint: {e}")
+         print(f"Response text: {response.text[:500]}")
+         return f"Error decoding server response for questions: {e}", None
+    except Exception as e:
+        print(f"An unexpected error occurred fetching questions: {e}")
+        return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run your Agent
+    results_log = []
+    answers_payload = []
+    print(f"Running agent on {len(questions_data)} questions...")
+    for index, item in enumerate(questions_data):
+        print(f"Running question {index}  {item.get('question')}")
+        task_id = item.get("task_id")
+        question_text = item.get("question")
+        file_name = item.get("file_name")
+        # File Check
+        file_prompt = check_for_file_name_and_return_prompt(file_name)
+        # File Download
+        if file_name != '':
+            # GET /files/{task_id}: Download a specific file associated with a given task ID.
+            files_url = f"{api_url}/files/{task_id}"
+            print(f"Fetching files for task_id: {task_id}")
+            try:
+                response = requests.get(files_url, stream=True, timeout=30)
+                response.raise_for_status()
+                # Save file to disk
+                with open(file_name, 'wb') as f:
+                    for chunk in response.iter_content(chunk_size=8192):
+                        if chunk:  # filter out keep-alive chunks
+                            f.write(chunk)
+                print(f"File '{file_name}' downloaded and saved successfully.")
+            except requests.exceptions.RequestException as e:
+                print(f"Request error while fetching files: {e}")
+                return f"Request error while fetching files: {e}", None
+            except Exception as e:
+                print(f"An unexpected error occurred while saving the file: {e}")
+                return f"An unexpected error occurred while saving the file: {e}", None
+        ################################################################################
+        ###### RUN MANAGER AGENT
+        ################################################################################
+        if not task_id or question_text is None:
+            print(f"Skipping item with missing task_id or question: {item}")
+            continue
+        try:
+            # Run Manager Agent
+            submitted_answer = manager_agent.run(get_manager_agent_prompt(question_text, file_prompt))
+            # Basic verification...convert both to string...
+            if type(submitted_answer) is list or type(submitted_answer) is dict:
+                submitted_answer = str(submitted_answer)
+            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
+        except Exception as e:
+            print(f"Error running agent on task {task_id}: {e}")
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
+        #################################################################################
+        # Writing the list of dictionaries to a plain text file (overwriting the existing file)
+        with open('results_log.txt', 'w') as file:
+            json.dump(results_log, file, indent=4)
+    if not answers_payload:
+        print("Agent did not produce any answers to submit.")
+        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    print(status_update)
+    # 5. Submit
+    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
+    try:
+        response = requests.post(submit_url, json=submission_data, timeout=60)
+        response.raise_for_status()
+        result_data = response.json()
+        final_status = (
+            f"Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
+            f"Overall Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
+        )
+        print("Submission successful.")
+        results_df = pd.DataFrame(results_log)
+        return final_status, results_df
+    except requests.exceptions.HTTPError as e:
+        error_detail = f"Server responded with status {e.response.status_code}."
+        try:
+            error_json = e.response.json()
+            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except requests.exceptions.JSONDecodeError:
+            error_detail += f" Response: {e.response.text[:500]}"
+        status_message = f"Submission Failed: {error_detail}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.Timeout:
+        status_message = "Submission Failed: The request timed out."
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.RequestException as e:
+        status_message = f"Submission Failed: Network error - {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except Exception as e:
+        status_message = f"An unexpected error occurred during submission: {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+# --- Build Gradio Interface using Blocks ---
+with gr.Blocks() as demo:
+    gr.Markdown("# Basic Agent Evaluation Runner")
+    gr.Markdown(gradio_main_instructions)
+    gr.LoginButton()
+    run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+    # Removed max_rows=10 from DataFrame constructor
+    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    run_button.click(fn = run_and_submit_all,
+                     outputs = [status_output, results_table])
+if __name__ == "__main__":
+    print("\n" + "-"*30 + " App Starting " + "-"*30)
+    # Check for SPACE_HOST and SPACE_ID at startup for information
+    space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
+    if space_host_startup:
+        print(f"✅ SPACE_HOST found: {space_host_startup}")
+        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
+    else:
+        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup: # Print repo URLs if SPACE_ID is found
+        print(f"✅ SPACE_ID found: {space_id_startup}")
+        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
+    else:
+        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=False)
+"""
+Submission Failed: Server responded with status 422. Detail: [{'type': 'string_type', 'loc': ['body', 'answers', 13, 'submitted_answer', 'str'], 'msg': 'Input should be a valid string', 'input': ['45', '50', '67', '89']}, {'type': 'int_type', 'loc': ['body', 'answers', 13, 'submitted_answer', 'int'], 'msg': 'Input should be a valid integer', 'input': ['45', '50', '67', '89']}, {'type': 'float_type', 'loc': ['body', 'answers', 13, 'submitted_answer', 'float'], 'msg': 'Input should be a valid number', 'input': ['45', '50', '67', '89']}]
+"""

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+gradio[oauth]
+numpy
+openpyxl
+pandas
+requests
+smolagents[all]
+autoawq
+transformers==4.51.3
+scikit-learn
+wikipedia-api
+num2words==0.5.14
+yt-dlp
+librosa
+soundfile

tooling.py ADDED Viewed

	@@ -0,0 +1,302 @@

+# https://github.com/huggingface/smolagents/blob/v1.17.0/src/smolagents/default_tools.py#L479
+# Import Modules
+import os
+import pandas as pd
+import yt_dlp
+import re
+# Smolagents
+import torch
+from transformers import AutoProcessor, AutoModelForVision2Seq
+from smolagents import (tool)
+from smolagents.tools import PipelineTool
+from transformers import WhisperProcessor, WhisperForConditionalGeneration
+import librosa
+import numpy as np
+gradio_main_instructions =  """
+**Instructions:**
+1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
+2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
+---
+**Disclaimers:**
+Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
+This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
+"""
+def get_manager_agent_prompt(question_text, file_prompt):
+    return f"""
+# Objective:
+Your task is to analyze the following question and to provide a final answer.
+{file_prompt}
+# Question:
+{question_text}
+# Final Answer requirements:
+The final answer should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
+If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
+If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
+If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+!! Note !! If the question itself mentions specific instructions for how the answer should be formatted than make absolutely sure those are also applied to the answer!!
+"""
+def check_for_file_name_and_return_prompt(file_name):
+    if file_name == '':
+        return 'For this question there is no file with additional information available.'
+    else:
+        # Detect File Type
+        if '.xlsx' in file_name:
+            file_type = 'Excel Sheet'
+            return f"""
+# File Information
+For this question there is a file named "{file_name}" with additional information related to the question available.
+The specific file is of type: {file_type}.
+The file is already downloaded and available for use.
+Load the file based on the file name with the pandas python library or use the read_excel_tool. Choose what works best for you.
+Carefully load the file and use its content in the best and correct way possible to help you answer the question."""
+        elif '.csv' in file_name:
+            file_type = 'CSV File'
+            return f"""
+# File Information
+For this question there is a file named "{file_name}" with additional information related to the question available.
+The specific file is of type: {file_type}.
+The file is already downloaded and available for use.
+Load the file based on the file name with the pandas python library.
+Carefully load the file and use its content in the best and correct way possible to help you answer the question."""
+        elif '.mp3' in file_name:
+            file_type = 'MP3 Audio File'
+            return f"""
+# File Information
+For this question there is a file named '{file_name}' with additional information related to the question available.
+The specific file is of type: {file_type}.
+The file is already downloaded and available for use with the available tools to load the specific file.
+Carefully load the file and use its content in the best and correct way possible to help you answer the question.
+If the file name mentioned specifically in the question is different from the following file name '{file_name}' then keep using the following file name: '{file_name}'.
+"""
+        elif '.png' in file_name:
+            file_type = 'PNG Image File'
+            return f"""
+# File Information
+For this question there is a file named "{file_name}" with additional information related to the question available.
+The specific file is of type: {file_type}.
+The file is already downloaded and available for use. Use the 'vision_agent' to load the file and answer the question.
+Make sure to pass the file name and question!!"""
+        elif '.py' in file_name:
+            file_type = 'Python Script File'
+            with open(file_name, "r") as py_file:
+                python_script_contents = py_file.read()
+            return f"""
+# File Information
+For this question there is a file named '{file_name}' with additional information related to the question available.
+The specific file is of type: {file_type}.
+The file is already downloaded and available for use with the available tools to load the specific file.
+As an extra service below is the content of the Python Script File also visible.
+# Python Script File Content
+```
+{python_script_contents}
+```
+"""
+# Create Models for Vision Tool
+device = "cuda"
+vision_model_path = "ibm-granite/granite-vision-3.2-2b"
+vision_processor = AutoProcessor.from_pretrained(vision_model_path)
+vision_model = AutoModelForVision2Seq.from_pretrained(vision_model_path,
+                                                      torch_dtype = torch.bfloat16).to(device)
+@tool
+def vision_language_tool(question: str, file_name: str) -> str:
+    """
+    This vision language tool will load any image based on the provided file_name and will answer the question that is provided.
+    Args:
+        question: A string that contains the question that we need to answer about the image.
+        file_name: A string containing the image file name.
+    Returns:
+        A string containing the answer to the question.
+    """
+    prompt = f"""
+You are provided with an image.
+Answer the following question about the image very specifically and in detail:
+{question}"""
+    print(f"vlt: {os.listdir('./')}")
+    conversation = [
+        {
+            "role": "user",
+            "content": [{"type": "image", "url": file_name}, {"type": "text", "text": prompt}],
+        },
+    ]
+    inputs = vision_processor.apply_chat_template(conversation,
+                                                  add_generation_prompt = True,
+                                                  tokenize = True,
+                                                  return_dict = True,
+                                                  return_tensors = "pt").to(device)
+    # autoregressively complete prompt
+    model_output = vision_model.generate(**inputs,
+                                         max_new_tokens = 1024,
+                                         temperature = 0.2,
+                                         do_sample = True,
+                                         top_p = 0.975,
+                                         top_k = 75,
+                                         min_p = 0.05,
+                                         repetition_penalty = 1.15)
+    answer = vision_processor.decode(model_output[0], skip_special_tokens = True)
+    return answer
+@tool
+def speech_to_text_tool(file_name: str) -> str:
+    """
+    This speech to text tool will use the provided file name to load an mp3 audio file and and output a transcription of the audio file as a text string.
+    Args:
+        file_name: A string containing the audio file name.
+    Returns:
+        A string containing the transcribed text of the audio file.
+    """
+    # Load model and processor
+    model_name = "openai/whisper-small"
+    processor = WhisperProcessor.from_pretrained(model_name)
+    model = WhisperForConditionalGeneration.from_pretrained(model_name).to('cpu')
+    model.config.forced_decoder_ids = None
+    # Load and resample audio to 16kHz mono
+    speech_array, sampling_rate = librosa.load(file_name, sr = 16000, mono=True)
+    # Define chunk size: 30 seconds at 16kHz = 480000 samples
+    chunk_size = 30 * 16000  # 480000
+    # Split into chunks
+    chunks = [
+        speech_array[i:i+chunk_size]
+        for i in range(0, len(speech_array), chunk_size)
+    ]
+    # Pad last chunk if it's shorter
+    if len(chunks[-1]) < chunk_size:
+        chunks[-1] = np.pad(chunks[-1], (0, chunk_size - len(chunks[-1])))
+    # Prepare input features in batch
+    input_features = processor(chunks, sampling_rate=16000, return_tensors="pt").input_features
+    # Generate predictions in batch
+    predicted_ids = model.generate(input_features)
+    # Decode all chunks and concatenate
+    transcribed_texts = processor.batch_decode(predicted_ids, skip_special_tokens=True)
+    full_transcription = " ".join([t.strip() for t in transcribed_texts])
+    return full_transcription
+@tool
+def youtube_captions_tool(youtube_video_url: str) -> str:
+    """
+    This youtube captions tool will use a youtube video url to retrieve the captions and output them as a string containing the conversations in the video.
+    Args:
+        youtube_video_url: A string containing the url for a youtube video from which the captions will be retrieved.
+    Returns:
+        A string containing the captions of the youtube video url.
+    """
+    outtmpl = "caption.%(ext)s"
+    ydl_opts = {
+        'writesubtitles': True,
+        'writeautomaticsub': True,
+        'subtitleslangs': ['en'],
+        'skip_download': True,
+        'outtmpl': outtmpl,
+        'quiet': True
+    }
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        info = ydl.extract_info(youtube_video_url, download=True)
+    vtt_filename = None
+    for ext in ('en.vtt', 'en-US.vtt'):
+        if os.path.isfile(f'caption.{ext}'):
+            vtt_filename = f'caption.{ext}'
+            break
+    if not vtt_filename:
+        raise FileNotFoundError("Could not find English captions (.vtt) after download.")
+    with open(vtt_filename, encoding='utf-8') as f:
+        vtt_content = f.read()
+    os.remove(vtt_filename)
+    # Remove headers and unnecessary metadata
+    vtt_content = re.sub(r'WEBVTT.*?\n', '', vtt_content, flags=re.DOTALL)
+    vtt_content = re.sub(r'^Kind:.*\n?', '', vtt_content, flags=re.MULTILINE)
+    vtt_content = re.sub(r'^Language:.*\n?', '', vtt_content, flags=re.MULTILINE)
+    vtt_content = re.sub(r'^NOTE.*\n?', '', vtt_content, flags=re.MULTILINE)
+    vtt_content = re.sub(r'X-TIMESTAMP.*', '', vtt_content)
+    vtt_content = re.sub(r'\[.*?\]', '', vtt_content)
+    vtt_content = re.sub(r'<.*?>', '', vtt_content)  # Remove tags like <c> and <00:00:01.000>
+    # Split by lines, remove lines that are timestamps, metadata, or blank
+    cleaned_lines = []
+    last_line = None
+    for line in vtt_content.splitlines():
+        line = line.strip()
+        if not line:
+            continue  # Skip blank lines
+        if re.match(r'^\d{2}:\d{2}:\d{2}\.\d{3} -->', line):
+            continue  # Skip timestamps
+        if re.match(r'^\d+$', line):
+            continue  # Skip sequence numbers
+        if 'align:' in line or 'position:' in line:
+            # Remove align/position metadata but keep the actual text
+            line = re.sub(r'align:[^\s]+', '', line)
+            line = re.sub(r'position:[^\s]+', '', line)
+            line = line.strip()
+        if not line:
+            continue
+        if line == last_line:
+            continue  # Deduplicate consecutive lines
+        cleaned_lines.append(line)
+        last_line = line
+    captions = '\n'.join(cleaned_lines).strip()
+    return captions
+@tool
+def read_excel_tool(file_name: str) -> str:
+    """
+    This read excel tool will use the provided file name to load an Excel file into a Pandas DataFrame and output the various information as a text string.
+    Args:
+        file_name: A string containing the Excel file name.
+    Returns:
+        A string containing the structured output from a Pandas DataFrame after reading the Excel file.
+    """
+    # Read Excel File
+    df = pd.read_excel(file_name)
+    # Excel String
+    excel_string = f"""
+# Summary
+The text below contains the information from the Excel File that has been loaded into a Pandas DataFrame.
+## DataFrame Shape
+{df.shape}
+## DataFrame Columns
+{df.columns}
+## DataFrame Describe
+{df.describe}
+## DataFrame Head
+{df.head(25)}
+"""
+    return excel_string