image2drawio / nodes /core.py
Giustino98's picture
Initial commit
9bc9c3c
from states.state import AgentState
import os
# Import the load_dotenv function from the dotenv library
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
from tools.object_detection_tools import detect_objects_in_image as object_detection_tool
from tools.object_detection_tools import select_latest_image_versions as select_latest_image_versions_tool
from tools.object_detection_tools import verify_bounding_boxes as verify_bounding_boxes_tool
from tools.object_detection_tools import adjust_bounding_boxes_in_image as adjust_bounding_boxes_in_image_tool
from tools.drawio_tools import generate_drawio_from_image_and_objects as drawio_tool
from langfuse.callback import CallbackHandler
from langgraph_supervisor import create_supervisor
from langgraph.prebuilt import create_react_agent
from typing import TypedDict, Annotated
load_dotenv()
# Read your API key from the environment variable or set it manually
api_key = os.getenv("GEMINI_API_KEY")
GEMINI_MODEL_NAME = os.getenv("GEMINI_MODEL_NAME", "gemini-2.5-pro-preview-06-05")
GEMINI_THINKING_BUDGET = int(os.getenv("GEMINI_THINKING_BUDGET", "128"))
langfuse_secret_key = os.getenv("LANGFUSE_SECRET_KEY")
langfuse_public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
# Initialize Langfuse CallbackHandler for LangGraph/Langchain (tracing)
langfuse_handler = CallbackHandler(
public_key=langfuse_public_key,
secret_key=langfuse_secret_key,
host="http://localhost:3000"
)
chat = ChatGoogleGenerativeAI(
model=GEMINI_MODEL_NAME,
temperature=0,
max_retries=2,
google_api_key=api_key,
thinking_budget=GEMINI_THINKING_BUDGET
)
object_detection_agent = create_react_agent(
model=chat,
tools=[object_detection_tool, select_latest_image_versions_tool, verify_bounding_boxes_tool, adjust_bounding_boxes_in_image_tool],
name="object_detection_agent",
prompt="You are an expert into extracting objects from images." \
"You need to extract 3rd party custom pics (not present in default drawio library) from the images of the user." \
"Which pics to extract, is something you need to decide with your tools and will not be provided by the user." \
"These pics will be then used to build a drawio diagram." \
"Once you have extracted the bounding boxes, verify if the result is correct with your tools" \
"You have access to tools for object detection: one for generating the first detection, another for corrections. " \
"The resulting bounding boxes will be used to generate a drawio diagram. " \
"You need to return the bounding boxes labels, in case there are spaces insert underscore." \
"Your labels will be used by another agent to read the images and use them to build a drawio diagram." \
"After 3 iterations with your tools, stop using the tool and provide the answer, even if partial." \
"Before returning the result, you need to call the select latest image versions tool and return the list provided by the tool. " \
)
drawio_generator_agent = create_react_agent(
model=chat,
tools=[drawio_tool],
name="drawio_generator_agent",
prompt="You are an expert into generating drawio diagrams. Your goal is to generate a drawio based on the image in input.")
# Create supervisor workflow
workflow = create_supervisor(
[object_detection_agent, drawio_generator_agent],
model=chat,
prompt=(
"You are a team supervisor managing a process that involves object detection and diagram generation. "
"Your goal is to generate a drawio file following the steps: " \
"- Extract objects from the original image that user provides in local path (use object_detection_agent)" \
"- Generate a drawio using the images extracted at the previous step (use drawio_generator_agent)" \
)
)
# Compile and run
graph = workflow.compile()