import os import json # Added for JSON parsing from google import genai from google.genai import types from PIL import Image from io import BytesIO from langchain_core.tools import tool from langfuse import Langfuse from langfuse.decorators import observe, langfuse_context GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY") GEMINI_MODEL_NAME = os.getenv("GEMINI_MODEL_NAME", "gemini-2.5-pro-preview-06-05") GEMINI_THINKING_BUDGET = int(os.getenv("GEMINI_THINKING_BUDGET", "128")) if not GOOGLE_API_KEY: # Consider raising an error or logging if the API key is critical for module loading print("Attenzione: GEMINI_API_KEY non trovato nelle variabili d'ambiente.") # raise ValueError("GEMINI_API_KEY not found in environment variables.") # Utilizziamo lo stesso modello specificato in object_detection_tools.py per coerenza, # o un modello potente per la generazione come "gemini-1.5-pro-latest". # Se "gemini-2.5-pro-preview-06-05" è disponibile e preferito: MODEL_NAME = GEMINI_MODEL_NAME # Altrimenti, un'opzione robusta: # MODEL_NAME = "gemini-1.5-pro-latest" try: client = genai.Client(api_key=GOOGLE_API_KEY) except Exception as e: print(f"Errore durante l'inizializzazione del client GenAI: {e}") client = None # o gestire l'errore come appropriato # Global safety settings SAFETY_SETTINGS = [ types.SafetySetting(category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="BLOCK_ONLY_HIGH"), types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="BLOCK_ONLY_HIGH"), types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="BLOCK_ONLY_HIGH"), types.SafetySetting(category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="BLOCK_ONLY_HIGH"), ] # Langfuse initialization LANGFUSE_PUBLIC_KEY = os.getenv("LANGFUSE_PUBLIC_KEY") LANGFUSE_SECRET_KEY = os.getenv("LANGFUSE_SECRET_KEY") LANGFUSE_HOST = os.getenv("LANGFUSE_HOST", "http://localhost:3000") # Default to local if not set if LANGFUSE_PUBLIC_KEY and LANGFUSE_SECRET_KEY: try: Langfuse( public_key=LANGFUSE_PUBLIC_KEY, secret_key=LANGFUSE_SECRET_KEY, host=LANGFUSE_HOST ) print(f"Langfuse tracing enabled for {__name__}") except Exception as e: print(f"Failed to initialize Langfuse for {__name__}: {e}. Tracing will be disabled.") import base64 import mimetypes import os import re from xml.etree import ElementTree as ET from typing import Optional, Dict, Any, List as TypingList def convert_image_to_base64(image_path: str) -> Optional[str]: """ Converte un'immagine in stringa base64 nel formato Draw.io Args: image_path: Percorso del file immagine Returns: Stringa base64 nel formato Draw.io (data:image/type,base64_data) o None se errore """ if not os.path.exists(image_path): print(f"Warning: Image {image_path} not found") return None try: # Determina il MIME type mime_type, _ = mimetypes.guess_type(image_path) if not mime_type or not mime_type.startswith('image/'): mime_type = 'image/png' # default fallback # Leggi e converti in base64 with open(image_path, 'rb') as img_file: img_data = img_file.read() base64_str = base64.b64encode(img_data).decode('utf-8') # Formato Draw.io: data:image/type,base64_data (SENZA ;base64) return f"data:{mime_type},{base64_str}" except Exception as e: print(f"Error converting {image_path} to base64: {e}") return None def replace_image_references_in_drawio_xml(xml_content: str, base_folder: str = "output_llm") -> str: """ Sostituisce tutti i riferimenti alle immagini nell'XML Draw.io con versioni base64 Args: xml_content: Contenuto XML Draw.io come stringa base_folder: Cartella base dove cercare le immagini Returns: XML modificato con immagini base64 embedded """ try: # Pattern per trovare riferimenti alle immagini negli attributi style # Cerca pattern come: image=filename.png o image='filename.png' o image="filename.png" image_patterns = [ r'image=([\'"]?)([^\'";,\s]+\.(png|jpg|jpeg|gif|bmp|svg))\1', # image=file.png, image='file.png', image="file.png" r'image=([\'"]?)(file://\.?/?([^\'";,\s]+\.(png|jpg|jpeg|gif|bmp|svg)))\1', # image=file://./file.png ] modified_xml = xml_content processed_files = set() # Per evitare conversioni duplicate for pattern in image_patterns: matches = re.finditer(pattern, modified_xml, re.IGNORECASE) for match in matches: full_match = match.group(0) quote_char = match.group(1) if match.group(1) else '' # Estrai il nome del file if 'file://' in full_match: # Per pattern file://./filename.png filename = match.group(3) if len(match.groups()) >= 3 else match.group(2) else: # Per pattern semplici filename = match.group(2) # Rimuovi eventuali prefissi di path filename = os.path.basename(filename) if filename in processed_files: continue processed_files.add(filename) image_path = os.path.join(base_folder, filename) # Converti in base64 base64_data = convert_image_to_base64(image_path) if base64_data: # Sostituisci tutti i riferimenti a questo file old_patterns = [ f'image={quote_char}{filename}{quote_char}', f'image={quote_char}file://\./{filename}{quote_char}', f'image={quote_char}file://{filename}{quote_char}', f'image={filename}', # senza quote ] new_value = f'image={quote_char}{base64_data}{quote_char}' if quote_char else f'image={base64_data}' for old_pattern in old_patterns: modified_xml = modified_xml.replace(old_pattern, new_value) print(f"Replaced image reference: {filename} -> base64 ({len(base64_data)} chars)") else: print(f"Failed to convert image: {filename}") return modified_xml except Exception as e: print(f"Error processing XML: {e}") return xml_content # Ritorna l'originale in caso di errore def replace_image_references_xml_parser(xml_content: str, base_folder: str = "output_llm") -> str: """ Versione alternativa che usa XML parser per maggiore precisione Sostituisce i riferimenti alle immagini negli attributi style dei mxCell """ try: # Parse dell'XML root = ET.fromstring(xml_content) # Trova tutti gli elementi mxCell con attributo style contenente image= for cell in root.iter('mxCell'): style = cell.get('style', '') if 'image=' in style: # Estrai il valore dell'immagine dallo style style_parts = style.split(';') new_style_parts = [] for part in style_parts: if part.startswith('image='): # Estrai il nome del file image_ref = part[6:] # Rimuovi 'image=' # Rimuovi eventuali quote if image_ref.startswith('"') and image_ref.endswith('"'): image_ref = image_ref[1:-1] elif image_ref.startswith("'") and image_ref.endswith("'"): image_ref = image_ref[1:-1] # Gestisci file:// prefix if image_ref.startswith('file://'): image_ref = image_ref.replace('file://', '').lstrip('./') filename = os.path.basename(image_ref) image_path = os.path.join(base_folder, filename) # Converti in base64 base64_data = convert_image_to_base64(image_path) if base64_data: new_style_parts.append(f'image={base64_data}') print(f"XML Parser: Replaced {filename} with base64 data") else: new_style_parts.append(part) # Mantieni originale se conversione fallisce else: new_style_parts.append(part) # Ricostruisci lo style cell.set('style', ';'.join(new_style_parts)) # Converti back in stringa return ET.tostring(root, encoding='unicode') except ET.ParseError as e: print(f"XML parsing error: {e}") # Fallback al metodo regex return replace_image_references_in_drawio_xml(xml_content, base_folder) except Exception as e: print(f"Error in XML parser method: {e}") return xml_content @tool("generate_drawio_from_image_and_objects_tool", parse_docstring=True) # Uncomment if you plan to use it directly as a langchain tool @observe(as_type="generation") def generate_drawio_from_image_and_objects(original_image_path: str, object_names: list[str]) -> str: """ Generates a Draw.io XML diagram from an original image and a list of detected object names. The function first instructs a generative model to create a Draw.io XML representation of the scene in the original image. It then incorporates references to cropped images of specified objects (expected to be in the 'output_llm' folder). Finally, it post-processes this XML to replace all local image file references with their base64 encoded data, making the Draw.io diagram self-contained. Args: original_image_path (str): The file path to the original image to be diagrammed. object_names (list[str]): A list of object names (e.g., ['cat.png', 'dog.png']) that have been previously detected and saved as image files in the 'output_llm' folder. These will be embedded into the diagram. Returns: bool: True if the Draw.io XML was successfully generated and saved, or an error message if something went wrong. """ if not GOOGLE_API_KEY or not client: return "Errore: GEMINI_API_KEY non configurato o client non inizializzato." try: with open(original_image_path, "rb") as f: img_bytes = f.read() original_image = Image.open(BytesIO(img_bytes)) original_image.thumbnail([1024, 1024], Image.Resampling.LANCZOS) object_image_folder = "output_llm" prompt_parts = [ "Generate a Draw.io XML diagram for the provided original image.", "The diagram should represent the overall scene, focusing on spatial relationships and composition." ] if object_names: object_filenames_str = ", ".join([f"'{name}'" for name in object_names]) prompt_parts.extend([ f"Incorporate the following object images as assets: {object_filenames_str}.", f"These images are in the '{object_image_folder}' directory.", "Use simple filename references in the image attribute, like: image=cat.png", "Do NOT use base64 encoding - just use the filename directly.", "The image paths will be processed later to embed the actual image data." ]) prompt_parts.extend([ "Position and size elements based on their approximate location in the original image.", "Create complete Draw.io XML structure with proper mxGraphModel, root, and mxCell elements.", "Ensure all mxCell elements have unique id attributes." ]) user_prompt = " ".join(prompt_parts) # System instructions semplificato per riferimenti diretti simple_ref_instructions = """ You are an expert Draw.io diagram generator. Create Draw.io XML using simple filename references for images. Structure: Use simple filename references like 'image=cat.png' - do NOT embed base64 data. Position elements to match the original image layout. """ # System instructions per la verifica e correzione dell'XML verify_correct_xml_instructions = """ You are an expert Draw.io diagram verifier and corrector. You will be given an original image and a Draw.io XML generated for that image. Your task is to: 1. Verify if the XML accurately represents the objects, their positions, sizes, and connections as shown in the original image. 2. Correct any inaccuracies in the XML. This includes adjusting positions, sizes, shapes, or connections. 3. Ensure all image references within the XML use simple filename references (e.g., image=filename.png). Do NOT use base64 encoding. 4. Ensure the XML structure is valid Draw.io format. Return ONLY the corrected Draw.io XML. Do not include any other text, explanations, or markdown formatting around the XML. If the XML is already perfect, return it as is. Focus on accuracy of representation and valid Draw.io XML output. """ response = client.models.generate_content( contents=[user_prompt, original_image], model=MODEL_NAME, config=types.GenerateContentConfig( system_instruction=simple_ref_instructions, temperature=0, safety_settings=SAFETY_SETTINGS, thinking_config=types.ThinkingConfig(thinking_budget=GEMINI_THINKING_BUDGET) ) ) langfuse_context.update_current_observation( input=[user_prompt, original_image], model=MODEL_NAME, usage_details={ "input": response.usage_metadata.prompt_token_count, "output": response.usage_metadata.candidates_token_count, "total": response.usage_metadata.total_token_count } ) xml_output = response.text.strip() # Clean up markdown formatting if xml_output.startswith("```xml"): xml_output = xml_output[len("```xml"):] if xml_output.endswith("```"): xml_output = xml_output[:-len("```")] xml_output = xml_output.strip() # SECONDA CHIAMATA LLM: Verifica e correzione dell'XML generato print("Second LLM call: Verifying and correcting generated XML...") verification_prompt_parts = [ original_image, # L'immagine originale f"Generated Draw.io XML to verify and correct:\n{xml_output}", # L'XML generato "Please verify this XML against the original image. Correct any errors in object placement, connections, or representation. Ensure all image references are simple filenames like 'image=filename.png'. Return only the corrected Draw.io XML." ] correction_response = client.models.generate_content( contents=verification_prompt_parts, model=MODEL_NAME, config=types.GenerateContentConfig( system_instruction=verify_correct_xml_instructions, temperature=0, # Bassa temperatura per output più deterministico/corretto safety_settings=SAFETY_SETTINGS, thinking_config=types.ThinkingConfig(thinking_budget=GEMINI_THINKING_BUDGET) ) ) langfuse_context.update_current_observation( input=verification_prompt_parts, # Aggiorna l'input per il trace della correzione model=MODEL_NAME, metadata={"step": "xml_correction"}, # Aggiungi metadati per distinguere questa chiamata usage_details={ "input": correction_response.usage_metadata.prompt_token_count, "output": correction_response.usage_metadata.candidates_token_count, "total": correction_response.usage_metadata.total_token_count } ) xml_output = correction_response.text.strip() # Sovrascrivi xml_output con la versione corretta if xml_output.startswith("```xml"): xml_output = xml_output[len("```xml"):] if xml_output.endswith("```"): xml_output = xml_output[:-len("```")] xml_output = xml_output.strip() print("XML verification/correction complete.") # POST-PROCESSING: Sostituisci i riferimenti con base64 print("Post-processing: Converting image references to base64...") final_xml = replace_image_references_xml_parser(xml_output, object_image_folder) save_message = save_drawio_xml(final_xml, "drawio_output", output_directory="output_llm") print(save_message) return True except FileNotFoundError: return f"Errore: File immagine originale non trovato a {original_image_path}." except Exception as e: print(f"Errore dettagliato in generate_drawio_from_image_and_objects_v4: {e}") return f"Errore durante la generazione dell'XML Draw.io: {str(e)}" # Funzione standalone per post-processare XML esistenti def post_process_drawio_xml_file(xml_file_path: str, base_folder: str = "output_llm", output_path: str = None) -> str: """ Post-processa un file XML Draw.io esistente per sostituire i riferimenti alle immagini Args: xml_file_path: Percorso del file XML Draw.io base_folder: Cartella base per le immagini output_path: Percorso di output (se None, sovrascrive l'originale) Returns: Percorso del file processato """ try: with open(xml_file_path, 'r', encoding='utf-8') as f: xml_content = f.read() processed_xml = replace_image_references_xml_parser(xml_content, base_folder) if output_path is None: output_path = xml_file_path with open(output_path, 'w', encoding='utf-8') as f: f.write(processed_xml) print(f"Processed XML saved to: {output_path}") return output_path except Exception as e: print(f"Error processing XML file: {e}") return xml_file_path def save_drawio_xml(xml_content: str, filename_prefix: str, output_directory: str = "output_llm") -> str: """ Salva una stringa XML di Draw.io in un file .drawio. Args: xml_content (str): La stringa XML del diagramma Draw.io. filename_prefix (str): Il prefisso per il nome del file. Il file verrà salvato come '{filename_prefix}.drawio'. output_directory (str): La directory dove salvare il file. Default 'output_llm'. Returns: str: Il percorso del file salvato o un messaggio di errore. """ try: if not os.path.exists(output_directory): os.makedirs(output_directory) # Assicurati che il nome del file finisca con .drawio if not filename_prefix.endswith(".drawio"): filename = f"{filename_prefix}.drawio" else: filename = filename_prefix file_path = os.path.join(output_directory, filename) with open(file_path, "w", encoding="utf-8") as f: f.write(xml_content) return f"File Draw.io salvato con successo in: {os.path.abspath(file_path)}" except Exception as e: return f"Errore durante il salvataggio del file Draw.io: {str(e)}"