from smolagents.tools import Tool import requests from typing import List, Dict from bs4 import BeautifulSoup from sentence_transformers import SentenceTransformer, util class OdooDocumentationSearchTool(Tool): name = "odoo_documentation_search" description = "Searches the Odoo documentation for functional or technical queries and returns related results for a specific Odoo version." inputs = { "query": {"type": "string", "description": "The search query (e.g., 'how to create a new module')"}, "version": {"type": "string", "description": "The Odoo version to search (e.g., '16.0', '17.0', '18.0')"} } output_type = "array" def __init__(self, query=None): # Load the SentenceTransformer model self.model = SentenceTransformer('all-MiniLM-L6-v2') self.is_initialized = True def forward(self, query: str, version: str) -> List[Dict]: """ Searches the Odoo documentation and returns related results using semantic search and reranking. """ base_url = f"https://www.odoo.com/documentation/{version}/" try: response = requests.get(base_url) response.raise_for_status() soup = BeautifulSoup(response.content, "html.parser") # Extract relevant sections from the documentation sections = [] for element in soup.find_all(['h1', 'h2', 'h3', 'p', 'li']): sections.append(element.get_text().strip()) # Embed the sections and the query section_embeddings = self.model.encode(sections, convert_to_tensor=True) query_embedding = self.model.encode(query, convert_to_tensor=True) # Calculate cosine similarity cosine_scores = util.pytorch_cos_sim(query_embedding, section_embeddings)[0] # Rank the sections based on similarity scores section_scores = list(zip(sections, cosine_scores)) ranked_sections = sorted(section_scores, key=lambda x: x[1], reverse=True) # Rerank the top-k sections (Placeholder - Replace with actual reranking implementation) reranked_sections = self.rerank_sections(ranked_sections[:10], query) # Return the top-n ranked sections top_n = 5 results = [] for section, score in reranked_sections[:top_n]: results.append({"Result": section, "Score": str(score.item())}) return results except requests.exceptions.RequestException as e: return [{"Error": f"Error fetching Odoo documentation: {str(e)}"}] def rerank_sections(self, ranked_sections: List[tuple], query: str) -> List[tuple]: """ Reranks the top-k sections based on a keyword-based approach. """ # Extract keywords from the query query_keywords = [word for word in query.lower().split() if word not in ['a', 'an', 'the', 'is', 'are', 'in', 'on', 'at', 'to', 'for', 'of']] # Calculate keyword scores for each section reranked_sections = [] for section, score in ranked_sections: keyword_score = 0 for keyword in query_keywords: keyword_score += section.lower().count(keyword) # Adjust the similarity scores adjusted_score = score + keyword_score reranked_sections.append((section, adjusted_score)) # Sort the sections based on the adjusted scores reranked_sections = sorted(reranked_sections, key=lambda x: x[1], reverse=True) return reranked_sections