Final_Assignment_Template

Sleeping

App Files Files Community

EtienneB commited on Jul 1

Commit

042d1d5

1 Parent(s): 36dd2e6

updated all

Browse files

Files changed (3) hide show

agent.py +22 -16
requirements.txt +4 -2
tools.py +83 -1

agent.py CHANGED Viewed

@@ -8,19 +8,19 @@ from langchain_huggingface import (ChatHuggingFace, HuggingFaceEmbeddings,
 from langgraph.graph import START, MessagesState, StateGraph
 from langgraph.prebuilt import ToolNode, tools_condition
-from tools import (absolute, add, analyze_excel_file, arvix_search,
-                   audio_transcription, compound_interest, convert_temperature,
-                   divide, exponential, factorial, floor_divide,
-                   get_current_time_in_timezone, greatest_common_divisor,
-                   is_prime, least_common_multiple, logarithm, modulus,
-                   multiply, percentage_calculator, power, python_code_parser,
-                   roman_calculator_converter, square_root, subtract,
-                   web_search, wiki_search)
 # Load Constants
 load_dotenv()
 HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
 tools = [
     multiply, add, subtract, power, divide, modulus,
@@ -29,18 +29,24 @@ tools = [
     get_current_time_in_timezone, compound_interest,
     convert_temperature, factorial, greatest_common_divisor,
     is_prime, least_common_multiple, percentage_calculator,
-    wiki_search, analyze_excel_file, arvix_search, audio_transcription, python_code_parser
 ]
 # Load system prompt
 system_prompt = """
-You are a helpful assistant tasked with answering questions using a set of tools.
-Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
-FINAL ANSWER: [YOUR FINAL ANSWER].
-YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
-Your answer should only start with "FINAL ANSWER: ", then follows with the answer.
 """
 # System message
 sys_msg = SystemMessage(content=system_prompt)
@@ -94,7 +100,7 @@ def build_graph():
         repo_id="gemini/gemini-2.0-flash",
         #"Qwen/Qwen2.5-Coder-32B-Instruct",
         #huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
-        api_key=os.getenv("GEMINI_API_KEY"),
         temperature=0.3,
         max_new_tokens=512,
         timeout=60,

 from langgraph.graph import START, MessagesState, StateGraph
 from langgraph.prebuilt import ToolNode, tools_condition
+from tools import (absolute, add, analyze_csv_file, analyze_excel_file,
+                   arvix_search, audio_transcription, compound_interest,
+                   convert_temperature, divide, exponential, extract_text,
+                   factorial, floor_divide, get_current_time_in_timezone,
+                   greatest_common_divisor, is_prime, least_common_multiple,
+                   logarithm, modulus, multiply, percentage_calculator, power,
+                   python_code_parser, roman_calculator_converter, square_root,
+                   subtract, web_search, wiki_search)
 # Load Constants
 load_dotenv()
 HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 tools = [
     multiply, add, subtract, power, divide, modulus,
     get_current_time_in_timezone, compound_interest,
     convert_temperature, factorial, greatest_common_divisor,
     is_prime, least_common_multiple, percentage_calculator,
+    wiki_search, analyze_excel_file, arvix_search,
+    audio_transcription, python_code_parser, analyze_csv_file,
+    extract_text
 ]
 # Load system prompt
 system_prompt = """
+You are a general AI assistant. I will ask you a question.
+Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
+YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
+If you are asked for a number, don't use comma to write your number neither use units
+such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles,
+neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
+If you are asked for a comma separated list, apply the above rules depending of whether the element to be
+put in the list is a number or a string.
 """
 # System message
 sys_msg = SystemMessage(content=system_prompt)
         repo_id="gemini/gemini-2.0-flash",
         #"Qwen/Qwen2.5-Coder-32B-Instruct",
         #huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
+        api_key=GEMINI_API_KEY,
         temperature=0.3,
         max_new_tokens=512,
         timeout=60,

requirements.txt CHANGED Viewed

@@ -13,6 +13,8 @@ langchain-huggingface
 langchain-chroma
 chromadb # Explicitly add the Chroma database
 sentence-transformers
 # Hugging Face integration
 huggingface_hub
@@ -28,8 +30,8 @@ pytz
 wikipedia # For WikipediaLoader
 arxiv # For ArxivLoader
 assemblyai # For AssemblyAIAudioTranscriptLoader
-tree-sitter # For LanguageParser
-tree-sitter-languages # For LanguageParser
 # Additional utilities
 typing-extensions

 langchain-chroma
 chromadb # Explicitly add the Chroma database
 sentence-transformers
+langfuse
+langchain-google-genai
 # Hugging Face integration
 huggingface_hub
 wikipedia # For WikipediaLoader
 arxiv # For ArxivLoader
 assemblyai # For AssemblyAIAudioTranscriptLoader
+# tree-sitter # For LanguageParser
+# tree-sitter-languages # For LanguageParser
 # Additional utilities
 typing-extensions

tools.py CHANGED Viewed

@@ -1,16 +1,19 @@
 import datetime
 import math
 import os
-from typing import Union
 import pandas
 import pytz
 from langchain_community.document_loaders import (
     ArxivLoader, AssemblyAIAudioTranscriptLoader, WikipediaLoader)
 from langchain_community.document_loaders.generic import GenericLoader
 from langchain_community.document_loaders.parsers import LanguageParser
 from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_core.tools import tool
 @tool
@@ -666,3 +669,82 @@ def audio_transcription(file_path: str) -> str:
     return {"audio_results": formatted_search_docs}

+import base64
 import datetime
 import math
 import os
+from typing import List, Union
 import pandas
 import pytz
+from langchain.schema import HumanMessage
 from langchain_community.document_loaders import (
     ArxivLoader, AssemblyAIAudioTranscriptLoader, WikipediaLoader)
 from langchain_community.document_loaders.generic import GenericLoader
 from langchain_community.document_loaders.parsers import LanguageParser
 from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_core.tools import tool
+from langchain_google_genai import ChatGoogleGenerativeAI
 @tool
     return {"audio_results": formatted_search_docs}
+@tool
+def analyze_csv_file(file_path: str, query: str) -> str:
+    """
+    Analyze a CSV file using pandas and answer a question about it.
+    Args:
+        file_path (str): the path to the CSV file.
+        query (str): Question about the data
+    """
+    try:
+        file = pandas.read_csv(file_path)
+        result = f"CSV file loaded with {len(file)} rows and {len(file.columns)} columns.\n"
+        result += f"Columns: {', '.join(file.columns)}\n\n"
+        result += "Summary statistics:\n"
+        result += str(file.describe())
+        return result
+    except Exception as e:
+        return f"Error analyzing CSV file: {str(e)}"
+# Extract Text Tool
+vision_llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0)
+def extract_text(img_path: str) -> str:
+    """
+    Extract text from an image file using a multimodal model.
+    Args:
+        img_path: A local image file path (strings).
+    Returns:
+        A single string containing the concatenated text extracted from each image.
+    """
+    all_text = ""
+    try:
+        # Read image and encode as base64
+        with open(img_path, "rb") as image_file:
+            image_bytes = image_file.read()
+        image_base64 = base64.b64encode(image_bytes).decode("utf-8")
+        # Prepare the prompt including the base64 image data
+        message = [
+            HumanMessage(
+                content=[
+                    {
+                        "type": "text",
+                        "text": (
+                            "Extract all the text from this image. "
+                            "Return only the extracted text, no explanations."
+                        ),
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/png;base64,{image_base64}"
+                        },
+                    },
+                ]
+            )
+        ]
+        # Call the vision-capable model
+        response = vision_llm.invoke(message)
+        # Append extracted text
+        all_text += response.content + "\n\n"
+        return all_text.strip()
+    except Exception as e:
+        # You can choose whether to raise or just return an empty string / error message
+        error_msg = f"Error extracting text: {str(e)}"
+        print(error_msg)
+        return ""