Final_Assignment_Project

Sleeping

App Files Files Community

wt002 commited on Jun 3

Commit

28a1f20

verified ·

1 Parent(s): 8ec51fb

Update app.py

Browse files

Files changed (1) hide show

app.py +295 -251

app.py CHANGED Viewed

@@ -1,278 +1,322 @@
 import os
 import gradio as gr
 import requests
-import inspect
 import pandas as pd
-from smolagents import tool, Tool, CodeAgent, DuckDuckGoSearchTool, HfApiModel, VisitWebpageTool, SpeechToTextTool, FinalAnswerTool
 from dotenv import load_dotenv
-import heapq
-from collections import Counter
-import re
-from io import BytesIO
-from youtube_transcript_api import YouTubeTranscriptApi
-from langchain_community.tools.tavily_search import TavilySearchResults
-from langchain_community.document_loaders import WikipediaLoader
-from langchain_community.utilities import WikipediaAPIWrapper
-from langchain_community.document_loaders import ArxivLoader
-from transformers import AutoModelForCausalLM, AutoTokenizer
-# (Keep Constants as is)
-# --- Constants ---
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-#Load environment variables
 load_dotenv()
-#from smolagents import Tool
-#from langchain_community.document_loaders import WikipediaLoader
-class WikiSearchTool(Tool):
-    name = "wiki_search"
-    description = "Search Wikipedia for a query and return up to 2 results."
-    inputs = {
-        "query": {"type": "string", "description": "The search term for Wikipedia."}
-    }
-    output_type = "string"
-    def forward(self, query: str) -> str:
-        search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
-        formatted_search_docs = "\n\n---\n\n".join(
-            [
-                f'<Document source="{doc.metadata.get("source", "Wikipedia")}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
-                for doc in search_docs
-            ]
-        )
-        return formatted_search_docs
-class StringReverseTool(Tool):
-    name = "reverse_message"
-    description = "When you received a strange text, try to reverse it and perform action described in reversed message."
-    inputs = {
-        "message": {
-            "type": "string",
-            "description": "A message, which looks like strange and can be reversed to get actions to execute."
-        }
-    }
-    output_type = "string"
-    def __init__(self):
-        return
-    def forward(self, message: str):
-        return message[::-1]
-class KeywordsExtractorTool(Tool):
-    """Extracts top 5 keywords from a given text based on frequency."""
-    name = "keywords_extractor"
-    description = "This tool returns the 5 most frequent keywords occur in provided block of text."
-    inputs = {
-        "text": {
-            "type": "string",
-            "description": "Text to analyze for keywords.",
-        }
-    }
-    output_type = "string"
-    def forward(self, text: str) -> str:
-        try:
-            all_words = re.findall(r'\b\w+\b', text.lower())
-            conjunctions = {'a', 'and', 'of', 'is', 'in', 'to', 'the'}
-            filtered_words = []
-            for w in all_words:
-                if w not in conjunctions:
-                    filtered_words.push(w)
-            word_counts = Counter(filtered_words)
-            k = 5
-            return heapq.nlargest(k, word_counts.items(), key=lambda x: x[1])
-        except Exception as e:
-            return f"Error during extracting most common words: {e}"
-@tool
-def parse_excel_to_json(task_id: str) -> dict:
     """
-    For a given task_id fetch and parse an Excel file and save parsed data in structured JSON file.
-    Args:
-        task_id: An task ID to fetch.
-    Returns:
-        {
-            "task_id": str,
-            "sheets": {
-                "SheetName1": [ {col1: val1, col2: val2, ...}, ... ],
-                ...
-            },
-            "status": "Success" | "Error"
-        }
     """
-    url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
     try:
-        response = requests.get(url, timeout=100)
         if response.status_code != 200:
-            return {"task_id": task_id, "sheets": {}, "status": f"{response.status_code} - Failed"}
-        xls_content = pd.ExcelFile(BytesIO(response.content))
-        json_sheets = {}
-        for sheet in xls_content.sheet_names:
-            df = xls_content.parse(sheet)
-            df = df.dropna(how="all")
-            rows = df.head(20).to_dict(orient="records")
-            json_sheets[sheet] = rows
-        return {
-            "task_id": task_id,
-            "sheets": json_sheets,
-            "status": "Success"
-        }
     except Exception as e:
-        return {
-            "task_id": task_id,
-            "sheets": {},
-            "status": f"Error in parsing Excel file: {str(e)}"
-        }
-class VideoTranscriptionTool(Tool):
-    """Fetch transcripts from YouTube videos"""
-    name = "transcript_video"
-    description = "Fetch text transcript from YouTube movies with optional timestamps"
-    inputs = {
-        "url": {"type": "string", "description": "YouTube video URL or ID"},
-        "include_timestamps": {"type": "boolean", "description": "If timestamps should be included in output", "nullable": True}
-    }
-    output_type = "string"
-    def forward(self, url: str, include_timestamps: bool = False) -> str:
-        if "youtube.com/watch" in url:
-            video_id = url.split("v=")[1].split("&")[0]
-        elif "youtu.be/" in url:
-            video_id = url.split("youtu.be/")[1].split("?")[0]
-        elif len(url.strip()) == 11:  # Direct ID
-            video_id = url.strip()
-        else:
-            return f"YouTube URL or ID: {url} is invalid!"
-        try:
-            transcription = YouTubeTranscriptApi.get_transcript(video_id)
-            if include_timestamps:
-                formatted_transcription = []
-                for part in transcription:
-                    timestamp = f"{int(part['start']//60)}:{int(part['start']%60):02d}"
-                    formatted_transcription.append(f"[{timestamp}] {part['text']}")
-                return "\n".join(formatted_transcription)
-            else:
-                return " ".join([part['text'] for part in transcription])
-        except Exception as e:
-            return f"Error in extracting YouTube transcript: {str(e)}"
 class BasicAgent:
     def __init__(self):
-        # Configuration for Qwen2.5-Coder-32B-Instruct
-        model_name = "Qwen/Qwen2.5-Coder-32B-Instruct"
-        # Load the model and tokenizer directly using Hugging Face Transformers
-        # This will download the model weights and load them onto your device (GPU if available)
-        self.model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            torch_dtype="auto", # Uses bfloat16 or float16 if supported, otherwise float32
-            device_map="auto" # Automatically maps model layers to available devices (e.g., GPU(s), CPU)
         )
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        # Note: You won't use 'token' for direct Hugging Face model loading unless
-        # the model is private and requires authentication. For public models like Qwen,
-        # it's usually not needed for loading.
-        token = os.environ.get("HF_API_TOKEN") # This line might not be needed now
-        search_tool = DuckDuckGoSearchTool()
-        wiki_search_tool = WikiSearchTool()
-        str_reverse_tool = StringReverseTool()
-        keywords_extract_tool = KeywordsExtractorTool()
-        speech_to_text_tool = SpeechToTextTool()
-        visit_webpage_tool = VisitWebpageTool()
-        final_answer_tool = FinalAnswerTool()
-        video_transcription_tool = VideoTranscriptionTool()
-        system_prompt = f"""
-You are my general AI assistant. Your task is to answer the question I asked.
-First, provide an explanation of your reasoning, step by step, to arrive at the answer.
-Then, return your final answer in a single line, formatted as follows: "FINAL ANSWER: [YOUR FINAL ANSWER]".
-[YOUR FINAL ANSWER] should be a number, a string, or a comma-separated list of numbers and/or strings, depending on the question.
-If the answer is a number, do not use commas or units (e.g., $, %) unless specified.
-If the answer is a string, do not use articles or abbreviations (e.g., for cities), and write digits in plain text unless specified.
-If the answer is a comma-separated list, apply the above rules for each element based on whether it is a number or a string.
-"""
-        # Here, you might need to adapt how CodeAgent expects the model.
-        # If CodeAgent is built to work with LangChain's LLM instances,
-        # you'll need to wrap your Qwen model with a custom LangChain LLM
-        # or adjust CodeAgent to accept direct Hugging Face model/tokenizer.
-        # For simplicity, if CodeAgent can take a callable for 'model',
-        # you could define a simple wrapper.
-        # Otherwise, you might need to write a custom LangChain LLM class.
-        # For demonstration, let's assume CodeAgent can handle a custom callable
-        # that performs inference using your loaded model and tokenizer.
-        # This is a simplification and might require adjustment to CodeAgent.
-        class CustomQwenLLM:
-            def __init__(self, model, tokenizer):
-                self.model = model
-                self.tokenizer = tokenizer
-            def __call__(self, prompt: str) -> str:
-                messages = [
-                    {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
-                    {"role": "user", "content": prompt}
-                ]
-                text = self.tokenizer.apply_chat_template(
-                    messages,
-                    tokenize=False,
-                    add_generation_prompt=True
-                )
-                model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
-                generated_ids = self.model.generate(
-                    **model_inputs,
-                    max_new_tokens=512,
-                    do_sample=True, # Added for better response quality
-                    temperature=0.7 # Added for better response quality
-                )
-                input_length = model_inputs.input_ids.shape[1]
-                generated_text = self.tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0]
-                return generated_text
-        self.llm_for_agent = CustomQwenLLM(self.model, self.tokenizer)
-        self.agent = CodeAgent(
-            model=self.llm_for_agent, # Pass the custom wrapper
-            tools=[search_tool, wiki_search_tool, str_reverse_tool, keywords_extract_tool, speech_to_text_tool, visit_webpage_tool, final_answer_tool, parse_excel_to_json, video_transcription_tool],
-            add_base_tools=True
-        )
-        self.agent.prompt_templates["system_prompt"] = self.agent.prompt_templates["system_prompt"] + system_prompt
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        answer = self.agent.run(question)
-        print(f"Agent returning answer: {answer}")
         return answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,

 import os
+from typing import Annotated, Optional, TypedDict
 import gradio as gr
+from langchain_core.messages import AnyMessage, HumanMessage, SystemMessage
+from langchain_openai import ChatOpenAI
+from langgraph.graph.message import add_messages
+from langgraph.graph import StateGraph, START
+from langgraph.prebuilt import tools_condition, ToolNode
 import requests
 import pandas as pd
+from langchain.tools import Tool
 from dotenv import load_dotenv
+from arxiv_searcher import ArxivSearcher
+from chess_algebraic_notation_retriever import ChessAlgebraicNotationMoveRetriever
+from excel_file_reader import ExcelFileReader
+from image_question_answer_tool import ImageQuestionAnswerTool
+from python_code_question_answer_tool import PythonCodeQuestionAnswerTool
+from tavily_searcher import TavilySearcher
+from transcriber import Transcriber
+from wikipedia_searcher import WikipediaSearcher
+from youtube_video_question_answer_tool import YoutubeVideoQuestionAnswerTool
 load_dotenv()
+# (Keep Constants as is)
+# --- Constants ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+ASSOCIATED_FILE_ENDPOINT = f"{DEFAULT_API_URL}/files/"
+# --- Basic Agent Definition ---
+# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
+#search_tool = DuckDuckGoSearchRun()
+#search_tool = DuckDuckGoSearcherTool()
+def retrieve_task_file(task_id: str) -> Optional[bytes]:
     """
+    Retrieve the task file for a given task ID.
     """
     try:
+        response = requests.get(ASSOCIATED_FILE_ENDPOINT + task_id, timeout=15)
+        response.raise_for_status()
         if response.status_code != 200:
+            print(f"Error fetching file: {response.status_code}")
+            return None
+        #print(f"Fetched file: {response.content}")
+        return response.content
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching file: {e}")
+        return None
     except Exception as e:
+        print(f"An unexpected error occurred fetching file: {e}")
+        return None
+def retrieve_next_chess_move_in_algebraic_notation(task_file_path: str, is_black_turn: bool) -> str:
+    """
+    Retrieve the next chess move in algebraic notation from an image path.
+    """
+    if task_file_path is None:
+        return "Error: Task file not found."
+    # Retrieve the next chess move in algebraic notation
+    next_chess_move = ChessAlgebraicNotationMoveRetriever().retrieve(task_file_path, is_black_turn)
+    return next_chess_move
+# Initialize the tool
+retrieve_next_chess_move_in_algebraic_notation_tool = Tool(
+    name="retrieve_next_chess_move_in_algebraic_notation",
+    func=retrieve_next_chess_move_in_algebraic_notation,
+    description="Retrieve the next chess move in algebraic notation from an image path."
+)
+def transcribe_audio(file_path: str) -> str:
+    if file_path is None:
+        return "Error: Audio path not found."
+    # Transcribe the audio
+    return Transcriber().transcribe(file_path)
+# Initialize the tool
+transcribe_audio_tool = Tool(
+    name="transcribe_audio",
+    func=transcribe_audio,
+    description="Transcribe the audio from an audio path."
+)
+# Initialize the tool
+answer_python_code_tool = PythonCodeQuestionAnswerTool()
+# Initialize the tool
+answer_image_question_tool = ImageQuestionAnswerTool()
+# Initialize the tool
+answer_youtube_video_question_tool = YoutubeVideoQuestionAnswerTool()
+'''def answer_youtube_video_question(youtube_video_url: str, question: str) -> str:
+    """
+    Answer the question based on the youtube video.
+    """
+    if youtube_video_url is None:
+        return "Error: Video not found."
+    # Download the video
+    video_path = YoutubeVideoDownloader().download_video(youtube_video_url)
+    # Answer the question
+    return VideoQuestionAnswer().answer(video_path, question)
+# Initialize the tool
+answer_youtube_video_question_tool = Tool(
+    name="answer_youtube_video_question",
+    func=answer_youtube_video_question,
+    description="Answer the question based on the youtube video."
+)'''
+def read_excel_file(file_path: str) -> str:
+    if file_path is None:
+        return "Error: File not found."
+    return ExcelFileReader().read_file(file_path)
+# Initialize the tool
+read_excel_file_tool = Tool(
+    name="read_excel_file",
+    func=read_excel_file,
+    description="Read the excel file."
+)
+# Initialize the tool
+wikipedia_search_tool = Tool(
+    name="wikipedia_search",
+    func=WikipediaSearcher().search,
+    description="Search Wikipedia for a given query."
+)
+# Initialize the tool
+arxiv_search_tool = Tool(
+    name="arxiv_search",
+    func=ArxivSearcher().search,
+    description="Search Arxiv for a given query."
+)
+tavily_search_tool = Tool(
+    name="tavily_search",
+    func=TavilySearcher().search,
+    description="Search the web for a given query."
+)
+def format_gaia_answer(answer: str) -> str:
+    llm = ChatOpenAI(model="o3-mini", openai_api_key=os.getenv("OPENAI_API_KEY"))
+    prompt = f"""
+    You are formatting answers for the GAIA benchmark, which requires responses to be concise and unambiguous.
+    Given the answer: {answer}
+    Return the answer in the correct GAIA format:
+    - If the answer is a single word or number, return it without any additional text or formatting.
+    - If the answer is a list, return a comma-separated list without any additional text or formatting.
+    - If the answer is a string, return it without any additional text or formatting.
+    Do not include any prefixes, dots, enumerations, explanations, or quotation marks.
+    Do not include any additional text or formatting.
+    """
+    response = llm.invoke(prompt)
+    # Delete double quotes
+    return response.content.strip().replace('"', '')
+class AgentState(TypedDict):
+    # The document provided
+    messages: Annotated[list[AnyMessage], add_messages]
+    file_path: Optional[str]
 class BasicAgent:
     def __init__(self):
+        tools = [
+            tavily_search_tool,
+            arxiv_search_tool,
+            wikipedia_search_tool,
+            transcribe_audio_tool,
+            answer_python_code_tool,
+            answer_image_question_tool,
+            answer_youtube_video_question_tool,
+            read_excel_file_tool
+        ]
+        '''llm = ChatGoogleGenerativeAI(
+            model="gemini-2.0-flash",
+            temperature=0.2,
+            api_key=os.getenv("GEMINI_API_KEY")
+        )'''
+        llm = ChatOpenAI(model="o3-mini", openai_api_key=os.getenv("OPENAI_API_KEY"))
+        self.llm_with_tools = llm.bind_tools(tools)
+        builder = StateGraph(AgentState)
+        # Define nodes: these do the work
+        builder.add_node("assistant", self.assistant)
+        builder.add_node("tools", ToolNode(tools))
+        # Define edges: these determine how the control flow moves
+        builder.add_edge(START, "assistant")
+        builder.add_conditional_edges(
+            "assistant",
+            # If the latest message requires a tool, route to tools
+            # Otherwise, provide a direct response
+            tools_condition,
         )
+        builder.add_edge("tools", "assistant")
+        self.agent = builder.compile()
+        print("BasicAgent initialized.")
+    def assistant(self, state: AgentState):
+        # System message
+        textual_description_of_tools="""
+            tavily_search(query: str) -> str:
+                Search the web for a given query.
+                Args:
+                    query: Query to search the web for (string).
+                Returns:
+                    A single string containing the information found on the web.
+            arxiv_search(query: str) -> str:
+                Search Arxiv, that contains scientific papers, for a given query.
+                Args:
+                    query: Query to search Arxiv for (string).
+                Returns:
+                    A single string containing the answer to the question.
+            wikipedia_search(query: str) -> str:
+                Search Wikipedia for a given query.
+                Args:
+                    query: Query to search Wikipedia for (string).
+                Returns:
+                    A single string containing the answer to the question.
+            transcribe_audio(file_path: str) -> str:
+                Transcribe the audio from an audio path.
+                Args:
+                    file_path: File path of the audio file (string).
+                Returns:
+                    A single string containing the transcribed text from the audio.
+            answer_python_code(file_path: str, question: str) -> str:
+                Answer the question based on the python code.
+                Args:
+                    file_path: File path of the python file (string).
+                    question: Question to answer (string).
+                Returns:
+                    A single string containing the answer to the question.
+            answer_image_question(file_path: str, question: str) -> str:
+                Answer the question based on the image.
+                Args:
+                    file_path: File path of the image (string).
+                    question: Question to answer (string).
+                Returns:
+                    A single string containing the answer to the question.
+            download_youtube_video(youtube_video_url: str) -> str:
+                Download the Youtube video into a local file based on the URL
+                Args:
+                    youtube_video_url: A youtube video url (string).
+                Returns:
+                    A single string containing the file path of the downloaded youtube video.
+            answer_youtube_video_question(file_path: str, question: str) -> str:
+                Answer the question based on file path of the downloaded youtube video
+                Args:
+                    file_path: File path of the downloaded youtube video (string).
+                    question: Question to answer (string).
+                Returns:
+                    A single string containing the answer to the question.
+            read_excel_file(file_path: str) -> str:
+                Read the excel file.
+                Args:
+                    file_path: File path of the excel file (string).
+                Returns:
+                    A markdown formatted string containing the contents of the excel file.
+        """
+        file_path=state["file_path"]
+        prompt = f"""
+            You are a helpful assistant that can analyse images, videos, excel files and Python scripts and run computations with provided tools:
+            {textual_description_of_tools}
+            You have access to the file path of the attached file in case it's informed. Currently the file path is: {file_path}
+            Be direct and specific. GAIA benchmark requires exact matching answers.
+            For example, if asked "What is the capital of France?", respond simply with "Paris".
+            Do not include any prefixes, dots, enumerations, explanations, or quotation marks.
+            Do not include any additional text or formatting.
+            If you are required a number, return a number, not the items.
+        """
+        sys_msg = SystemMessage(content=prompt)
+        return {
+            "messages": [self.llm_with_tools.invoke([sys_msg] + state["messages"], config={"configurable": {"file_path": state["file_path"]}})],
+            "file_path": state["file_path"]
+        }
+        '''return {
+            "messages": [self.llm_with_tools.invoke(
+                state["messages"],
+                config={"configurable": {"file_path": state["file_path"]}}  # Aquí pasas el task_id
+            )],
+            "file_path": state["file_path"]
+        }'''
+    def __call__(self, question: str, task_id: str, file_name: str) -> str:
+        print(f"######################### Agent received question (first 50 chars): {question[:50]}... with file_name: {file_name}")
+        # Get the file path
+        tmp_file_path = None
+        if file_name is not None and file_name != "":
+            file_content = retrieve_task_file(task_id)
+            if file_content is not None:
+                print(f"Saving file {file_name} to tmp folder")
+                tmp_file_path = f"tmp/{file_name}"
+                with open(tmp_file_path, "wb") as f:
+                    f.write(file_content)
+        # Show the file path
+        print(f"File path: {tmp_file_path}")
+        messages = self.agent.invoke({"messages": [HumanMessage(question)], "file_path": tmp_file_path})
+        # Show the messages
+        for m in messages['messages']:
+            m.pretty_print()
+        answer = messages["messages"][-1].content
+        answer = format_gaia_answer(answer)
+        print(f"######################### Agent returning answer: {answer}\n")
+        # Delete the file
+        if tmp_file_path is not None:
+            os.remove(tmp_file_path)
         return answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,