Final_Assignment_Agent

Paused

App Files Files Community

Martin Bär commited on May 10

Commit

6fec0c8

1 Parent(s): 2fa94b3

Add multimodality tools

Browse files

Files changed (4) hide show

app.py +2 -1
basic_agent.py +16 -22
multimodality_tools.py +155 -0
requirements.txt +8 -1

app.py CHANGED Viewed

@@ -12,6 +12,7 @@ from basic_agent import BasicAgent
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 Settings.llm = None # disable LLM for Index Retrieval
 Settings.chunk_size = 512 # Smaller chunk size for retrieval
@@ -78,7 +79,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:

 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# For Llamaindex's LoadAndSearchTool
 Settings.llm = None # disable LLM for Index Retrieval
 Settings.chunk_size = 512 # Smaller chunk size for retrieval
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            submitted_answer = agent(question_text, task_id)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:

basic_agent.py CHANGED Viewed

@@ -1,18 +1,16 @@
-from llama_index.core.agent.workflow import AgentWorkflow
-from llama_index.core.workflow import Context
 from llama_index.core.tools import FunctionTool
 from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
 from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
 from llama_index.tools.wikipedia import WikipediaToolSpec
-from llama_index.core.tools.tool_spec.load_and_search import LoadAndSearchToolSpec
-from llama_index.readers.web import SimpleWebPageReader
-from llama_index.core.tools.ondemand_loader_tool import OnDemandLoaderTool
 from langfuse.llama_index import LlamaIndexInstrumentor
 from llama_index.llms.ollama import Ollama
-from llama_index.core.agent.workflow import ReActAgent, FunctionAgent
 class BasicAgent:
-    def __init__(self, ollama=False, langfuse=True):
         if not ollama:
             llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")
         else:
@@ -28,9 +26,6 @@ class BasicAgent:
         tool_spec = DuckDuckGoSearchToolSpec()
         search_tool = FunctionTool.from_defaults(tool_spec.duckduckgo_full_search)
-        wiki_spec = WikipediaToolSpec()
-        wiki_search_tool = wiki_spec.to_tool_list()[1]
         # Convert into a LoadAndSearchToolSpec because the wikipedia search tool returns
         # entire Wikipedia pages and this can pollute the context window of the LLM
         wiki_spec = WikipediaToolSpec()
@@ -38,18 +33,13 @@ class BasicAgent:
         # Convert into a LoadAndSearchToolSpec because the wikipedia search tool returns
         # entire Wikipedia pages and this can pollute the context window of the LLM
         # TODO this does not work so well. We need to make the retriever return the top 5 chunks or sth.
-        wiki_search_tool_las = LoadAndSearchToolSpec.from_defaults(wiki_search_tool).to_tool_list()
-        webpage_tool = OnDemandLoaderTool.from_defaults(
-            SimpleWebPageReader(html_to_text=True),
-            name="Webpage search tool",
-            description="A tool for loading the content of a webpage and querying it for information",
-        )
-        self.agent = AgentWorkflow.from_tools_or_functions( # ReActAgent(
-            tools=[search_tool], # webpage_tool does not work properly - cookies etc
             llm=llm,
             verbose=True,
             system_prompt = (
@@ -68,8 +58,12 @@ class BasicAgent:
         # self.ctx = Context(self.agent)
-    async def __call__(self, question: str) -> str:
-        response = await self.agent.run(user_msg=question) # ctx=self.ctx)
         if self.langfuse:
             self.instrumentor.flush()

 from llama_index.core.tools import FunctionTool
 from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
 from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
 from llama_index.tools.wikipedia import WikipediaToolSpec
 from langfuse.llama_index import LlamaIndexInstrumentor
 from llama_index.llms.ollama import Ollama
+from llama_index.core.agent.workflow import FunctionAgent
+from multimodality_tools import get_image_qa_tool, get_transcription_tool, \
+    get_excel_analysis_tool, get_excel_tool, get_csv_analysis_tool, get_csv_tool
 class BasicAgent:
+    def __init__(self, ollama=False, langfuse=False):
         if not ollama:
             llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")
         else:
         tool_spec = DuckDuckGoSearchToolSpec()
         search_tool = FunctionTool.from_defaults(tool_spec.duckduckgo_full_search)
         # Convert into a LoadAndSearchToolSpec because the wikipedia search tool returns
         # entire Wikipedia pages and this can pollute the context window of the LLM
         wiki_spec = WikipediaToolSpec()
         # Convert into a LoadAndSearchToolSpec because the wikipedia search tool returns
         # entire Wikipedia pages and this can pollute the context window of the LLM
         # TODO this does not work so well. We need to make the retriever return the top 5 chunks or sth.
+        # wiki_search_tool_las = LoadAndSearchToolSpec.from_defaults(wiki_search_tool).to_tool_list()
+        self.agent = FunctionAgent(
+            tools=[search_tool, wiki_search_tool, get_image_qa_tool(),
+                   get_transcription_tool(), get_excel_analysis_tool(), get_excel_tool(),
+                   get_csv_analysis_tool(), get_csv_tool()],
             llm=llm,
             verbose=True,
             system_prompt = (
         # self.ctx = Context(self.agent)
+    async def __call__(self, question: str, task_id: str = None) -> str:
+        file_str = ""
+        if task_id:
+            file_str = f'\nIf you need to load a file, do so by providing the id "{task_id}".'
+        response = await self.agent.run(user_msg=question + file_str) # ctx=self.ctx)
         if self.langfuse:
             self.instrumentor.flush()

multimodality_tools.py ADDED Viewed

	@@ -0,0 +1,155 @@

+"""Tools to handle multimodal understandig."""
+import os
+import io
+import requests
+import librosa
+import soundfile as sf
+import pandas as pd
+from llama_index.core.tools import FunctionTool
+from huggingface_hub import InferenceClient
+from transformers import pipeline
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+def transcribe_audio(file_id: str) -> str:
+    """
+    Transcribes an English audio file identfied by its id.
+    """
+    try:
+        audio, sr = sf.read(_get_file(file_id))
+        if sr != 16000:
+            audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
+    except:
+        return "Error: Invalid file. This file is either not an audio file or the id does not exist."
+    asr = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
+    output = asr(audio, language="en")
+    return output["text"].strip()
+def transcribe_audio_hf(file_id: str) -> str:
+    """
+    Transcribes an audio file identfied by its id.
+    """
+    #audio, sr = sf.read(_get_file(file_id))
+    try:
+        audio_bytes = _get_file(file_id).read()
+    except:
+        return "Error: Invalid file. This file is either not an audio file or the id does not exist."
+    client = InferenceClient(
+        provider="hf-inference",
+        api_key=os.getenv("HF_TOKEN"),
+    )
+    output = client.automatic_speech_recognition(audio_bytes, model="openai/whisper-small")
+    return output
+def get_transcription_tool():
+    return FunctionTool.from_defaults(
+        fn=transcribe_audio,
+        description="Transcribes an audio file identified by its id."
+    )
+def answer_image_question(question: str, file_id: str) -> str:
+    """
+    Answers questions about an image identified by its id.
+    """
+    client = InferenceClient(
+        provider="hf-inference",
+        api_key=os.getenv("HF_TOKEN"),
+    )
+    completion = client.chat.completions.create(
+        model= "Qwen/Qwen2.5-VL-32B-Instruct",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": question
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": DEFAULT_API_URL + f"/files/{file_id}",
+                        }
+                    }
+                ]
+            }
+        ],
+        max_tokens=512,
+    )
+    return completion.choices[0].message.content
+def get_image_qa_tool():
+    return FunctionTool.from_defaults(
+        fn=answer_image_question,
+        description="Answer a question about a given image. The image is identified by a file id."
+    )
+def read_excel(file_id: str) -> str:
+    file_io = _get_file(file_id)
+    df = pd.read_excel(file_io)
+    return df.to_markdown()
+def get_excel_tool():
+    return FunctionTool.from_defaults(
+        fn=read_excel,
+        description="Convert an excel file that is identified by its file id into a markdown string."
+    )
+def analyse_excel(file_id: str) -> str:
+    file_io = _get_file(file_id)
+    df = pd.read_excel(file_io)
+    return df.describe()
+def get_excel_analysis_tool():
+    return FunctionTool.from_defaults(
+        fn=read_excel,
+        description="Analyse an excel file that is identified by its file id and get common statistics such as mean or max per column."
+    )
+def read_csv(file_id: str) -> str:
+    file_io = _get_file(file_id)
+    df = pd.read_csv(file_io)
+    return df.to_markdown()
+def get_csv_tool():
+    return FunctionTool.from_defaults(
+        fn=read_excel,
+        description="Convert a csv file that is identified by its file id into a markdown string."
+    )
+def analyse_csv(file_id: str) -> str:
+    file_io = _get_file(file_id)
+    df = pd.read_csv(file_io)
+    return df.describe()
+def get_csv_analysis_tool():
+    return FunctionTool.from_defaults(
+        fn=read_excel,
+        description="Analyse a csv file that is identified by its file id and get common statistics such as mean or max per column."
+    )
+def watch_video(video_url: str) -> str:
+    return "You are not able to watch a Video yet. Reply with 'I don't know' to the question."
+def get_video_tool():
+    return FunctionTool.from_defaults(
+        fn=watch_video,
+        description="Watch a video and get a content description as a string."
+    )
+def _get_file(task_id: str) -> io.BytesIO:
+    res = requests.get(DEFAULT_API_URL + f"/files/{task_id}")
+    if res.status_code != 200:
+        raise FileNotFoundError("Invalid file or task id.")
+    file_like = io.BytesIO(res.content)
+    return file_like

requirements.txt CHANGED Viewed

@@ -7,4 +7,11 @@ llama_index-tools-wikipedia
 llama-index-embeddings-huggingface
 llama-index-readers-web
 llama-index-llms-ollama
-langfuse

 llama-index-embeddings-huggingface
 llama-index-readers-web
 llama-index-llms-ollama
+langfuse
+tabulate
+soundfile
+librosa
+pillow
+pandas
+huggingface_hub
+transformers