Spaces:
Sleeping
Sleeping
EtienneB
commited on
Commit
·
042d1d5
1
Parent(s):
36dd2e6
updated all
Browse files- agent.py +22 -16
- requirements.txt +4 -2
- tools.py +83 -1
agent.py
CHANGED
@@ -8,19 +8,19 @@ from langchain_huggingface import (ChatHuggingFace, HuggingFaceEmbeddings,
|
|
8 |
from langgraph.graph import START, MessagesState, StateGraph
|
9 |
from langgraph.prebuilt import ToolNode, tools_condition
|
10 |
|
11 |
-
from tools import (absolute, add,
|
12 |
-
audio_transcription, compound_interest,
|
13 |
-
divide, exponential,
|
14 |
-
|
15 |
-
is_prime, least_common_multiple,
|
16 |
-
multiply, percentage_calculator, power,
|
17 |
-
roman_calculator_converter, square_root,
|
18 |
-
web_search, wiki_search)
|
19 |
|
20 |
# Load Constants
|
21 |
load_dotenv()
|
22 |
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
23 |
-
|
24 |
|
25 |
tools = [
|
26 |
multiply, add, subtract, power, divide, modulus,
|
@@ -29,18 +29,24 @@ tools = [
|
|
29 |
get_current_time_in_timezone, compound_interest,
|
30 |
convert_temperature, factorial, greatest_common_divisor,
|
31 |
is_prime, least_common_multiple, percentage_calculator,
|
32 |
-
wiki_search, analyze_excel_file, arvix_search,
|
|
|
|
|
33 |
]
|
34 |
|
35 |
# Load system prompt
|
36 |
system_prompt = """
|
37 |
-
You are a
|
38 |
-
|
39 |
-
FINAL ANSWER
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
42 |
"""
|
43 |
|
|
|
44 |
# System message
|
45 |
sys_msg = SystemMessage(content=system_prompt)
|
46 |
|
@@ -94,7 +100,7 @@ def build_graph():
|
|
94 |
repo_id="gemini/gemini-2.0-flash",
|
95 |
#"Qwen/Qwen2.5-Coder-32B-Instruct",
|
96 |
#huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
|
97 |
-
api_key=
|
98 |
temperature=0.3,
|
99 |
max_new_tokens=512,
|
100 |
timeout=60,
|
|
|
8 |
from langgraph.graph import START, MessagesState, StateGraph
|
9 |
from langgraph.prebuilt import ToolNode, tools_condition
|
10 |
|
11 |
+
from tools import (absolute, add, analyze_csv_file, analyze_excel_file,
|
12 |
+
arvix_search, audio_transcription, compound_interest,
|
13 |
+
convert_temperature, divide, exponential, extract_text,
|
14 |
+
factorial, floor_divide, get_current_time_in_timezone,
|
15 |
+
greatest_common_divisor, is_prime, least_common_multiple,
|
16 |
+
logarithm, modulus, multiply, percentage_calculator, power,
|
17 |
+
python_code_parser, roman_calculator_converter, square_root,
|
18 |
+
subtract, web_search, wiki_search)
|
19 |
|
20 |
# Load Constants
|
21 |
load_dotenv()
|
22 |
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
23 |
+
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
24 |
|
25 |
tools = [
|
26 |
multiply, add, subtract, power, divide, modulus,
|
|
|
29 |
get_current_time_in_timezone, compound_interest,
|
30 |
convert_temperature, factorial, greatest_common_divisor,
|
31 |
is_prime, least_common_multiple, percentage_calculator,
|
32 |
+
wiki_search, analyze_excel_file, arvix_search,
|
33 |
+
audio_transcription, python_code_parser, analyze_csv_file,
|
34 |
+
extract_text
|
35 |
]
|
36 |
|
37 |
# Load system prompt
|
38 |
system_prompt = """
|
39 |
+
You are a general AI assistant. I will ask you a question.
|
40 |
+
Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
|
41 |
+
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
|
42 |
+
If you are asked for a number, don't use comma to write your number neither use units
|
43 |
+
such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles,
|
44 |
+
neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
|
45 |
+
If you are asked for a comma separated list, apply the above rules depending of whether the element to be
|
46 |
+
put in the list is a number or a string.
|
47 |
"""
|
48 |
|
49 |
+
|
50 |
# System message
|
51 |
sys_msg = SystemMessage(content=system_prompt)
|
52 |
|
|
|
100 |
repo_id="gemini/gemini-2.0-flash",
|
101 |
#"Qwen/Qwen2.5-Coder-32B-Instruct",
|
102 |
#huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
|
103 |
+
api_key=GEMINI_API_KEY,
|
104 |
temperature=0.3,
|
105 |
max_new_tokens=512,
|
106 |
timeout=60,
|
requirements.txt
CHANGED
@@ -13,6 +13,8 @@ langchain-huggingface
|
|
13 |
langchain-chroma
|
14 |
chromadb # Explicitly add the Chroma database
|
15 |
sentence-transformers
|
|
|
|
|
16 |
|
17 |
# Hugging Face integration
|
18 |
huggingface_hub
|
@@ -28,8 +30,8 @@ pytz
|
|
28 |
wikipedia # For WikipediaLoader
|
29 |
arxiv # For ArxivLoader
|
30 |
assemblyai # For AssemblyAIAudioTranscriptLoader
|
31 |
-
tree-sitter # For LanguageParser
|
32 |
-
tree-sitter-languages # For LanguageParser
|
33 |
|
34 |
# Additional utilities
|
35 |
typing-extensions
|
|
|
13 |
langchain-chroma
|
14 |
chromadb # Explicitly add the Chroma database
|
15 |
sentence-transformers
|
16 |
+
langfuse
|
17 |
+
langchain-google-genai
|
18 |
|
19 |
# Hugging Face integration
|
20 |
huggingface_hub
|
|
|
30 |
wikipedia # For WikipediaLoader
|
31 |
arxiv # For ArxivLoader
|
32 |
assemblyai # For AssemblyAIAudioTranscriptLoader
|
33 |
+
# tree-sitter # For LanguageParser
|
34 |
+
# tree-sitter-languages # For LanguageParser
|
35 |
|
36 |
# Additional utilities
|
37 |
typing-extensions
|
tools.py
CHANGED
@@ -1,16 +1,19 @@
|
|
|
|
1 |
import datetime
|
2 |
import math
|
3 |
import os
|
4 |
-
from typing import Union
|
5 |
|
6 |
import pandas
|
7 |
import pytz
|
|
|
8 |
from langchain_community.document_loaders import (
|
9 |
ArxivLoader, AssemblyAIAudioTranscriptLoader, WikipediaLoader)
|
10 |
from langchain_community.document_loaders.generic import GenericLoader
|
11 |
from langchain_community.document_loaders.parsers import LanguageParser
|
12 |
from langchain_community.tools import DuckDuckGoSearchRun
|
13 |
from langchain_core.tools import tool
|
|
|
14 |
|
15 |
|
16 |
@tool
|
@@ -666,3 +669,82 @@ def audio_transcription(file_path: str) -> str:
|
|
666 |
|
667 |
return {"audio_results": formatted_search_docs}
|
668 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
import datetime
|
3 |
import math
|
4 |
import os
|
5 |
+
from typing import List, Union
|
6 |
|
7 |
import pandas
|
8 |
import pytz
|
9 |
+
from langchain.schema import HumanMessage
|
10 |
from langchain_community.document_loaders import (
|
11 |
ArxivLoader, AssemblyAIAudioTranscriptLoader, WikipediaLoader)
|
12 |
from langchain_community.document_loaders.generic import GenericLoader
|
13 |
from langchain_community.document_loaders.parsers import LanguageParser
|
14 |
from langchain_community.tools import DuckDuckGoSearchRun
|
15 |
from langchain_core.tools import tool
|
16 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
17 |
|
18 |
|
19 |
@tool
|
|
|
669 |
|
670 |
return {"audio_results": formatted_search_docs}
|
671 |
|
672 |
+
|
673 |
+
@tool
|
674 |
+
def analyze_csv_file(file_path: str, query: str) -> str:
|
675 |
+
"""
|
676 |
+
Analyze a CSV file using pandas and answer a question about it.
|
677 |
+
Args:
|
678 |
+
file_path (str): the path to the CSV file.
|
679 |
+
query (str): Question about the data
|
680 |
+
"""
|
681 |
+
try:
|
682 |
+
file = pandas.read_csv(file_path)
|
683 |
+
|
684 |
+
result = f"CSV file loaded with {len(file)} rows and {len(file.columns)} columns.\n"
|
685 |
+
result += f"Columns: {', '.join(file.columns)}\n\n"
|
686 |
+
|
687 |
+
result += "Summary statistics:\n"
|
688 |
+
result += str(file.describe())
|
689 |
+
|
690 |
+
return result
|
691 |
+
|
692 |
+
except Exception as e:
|
693 |
+
return f"Error analyzing CSV file: {str(e)}"
|
694 |
+
|
695 |
+
|
696 |
+
# Extract Text Tool
|
697 |
+
vision_llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0)
|
698 |
+
|
699 |
+
def extract_text(img_path: str) -> str:
|
700 |
+
"""
|
701 |
+
Extract text from an image file using a multimodal model.
|
702 |
+
|
703 |
+
Args:
|
704 |
+
img_path: A local image file path (strings).
|
705 |
+
|
706 |
+
Returns:
|
707 |
+
A single string containing the concatenated text extracted from each image.
|
708 |
+
"""
|
709 |
+
all_text = ""
|
710 |
+
try:
|
711 |
+
|
712 |
+
# Read image and encode as base64
|
713 |
+
with open(img_path, "rb") as image_file:
|
714 |
+
image_bytes = image_file.read()
|
715 |
+
|
716 |
+
image_base64 = base64.b64encode(image_bytes).decode("utf-8")
|
717 |
+
|
718 |
+
# Prepare the prompt including the base64 image data
|
719 |
+
message = [
|
720 |
+
HumanMessage(
|
721 |
+
content=[
|
722 |
+
{
|
723 |
+
"type": "text",
|
724 |
+
"text": (
|
725 |
+
"Extract all the text from this image. "
|
726 |
+
"Return only the extracted text, no explanations."
|
727 |
+
),
|
728 |
+
},
|
729 |
+
{
|
730 |
+
"type": "image_url",
|
731 |
+
"image_url": {
|
732 |
+
"url": f"data:image/png;base64,{image_base64}"
|
733 |
+
},
|
734 |
+
},
|
735 |
+
]
|
736 |
+
)
|
737 |
+
]
|
738 |
+
|
739 |
+
# Call the vision-capable model
|
740 |
+
response = vision_llm.invoke(message)
|
741 |
+
|
742 |
+
# Append extracted text
|
743 |
+
all_text += response.content + "\n\n"
|
744 |
+
|
745 |
+
return all_text.strip()
|
746 |
+
except Exception as e:
|
747 |
+
# You can choose whether to raise or just return an empty string / error message
|
748 |
+
error_msg = f"Error extracting text: {str(e)}"
|
749 |
+
print(error_msg)
|
750 |
+
return ""
|