Odoo16to18_code_document_job_search / tools /odoo_documentation_search.py
VPCSinfo's picture
[IMP] use all-MiniLM-L6-v2 for faster download and better speed.
c9c06b9
from smolagents.tools import Tool
import requests
from typing import List, Dict
from bs4 import BeautifulSoup
from sentence_transformers import SentenceTransformer, util
class OdooDocumentationSearchTool(Tool):
name = "odoo_documentation_search"
description = "Searches the Odoo documentation for functional or technical queries and returns related results for a specific Odoo version."
inputs = {
"query": {"type": "string", "description": "The search query (e.g., 'how to create a new module')"},
"version": {"type": "string", "description": "The Odoo version to search (e.g., '16.0', '17.0', '18.0')"}
}
output_type = "array"
def __init__(self, query=None):
# Load the SentenceTransformer model
self.model = SentenceTransformer('all-MiniLM-L6-v2')
self.is_initialized = True
def forward(self, query: str, version: str) -> List[Dict]:
"""
Searches the Odoo documentation and returns related results using semantic search and reranking.
"""
base_url = f"https://www.odoo.com/documentation/{version}/"
try:
response = requests.get(base_url)
response.raise_for_status()
soup = BeautifulSoup(response.content, "html.parser")
# Extract relevant sections from the documentation
sections = []
for element in soup.find_all(['h1', 'h2', 'h3', 'p', 'li']):
sections.append(element.get_text().strip())
# Embed the sections and the query
section_embeddings = self.model.encode(sections, convert_to_tensor=True)
query_embedding = self.model.encode(query, convert_to_tensor=True)
# Calculate cosine similarity
cosine_scores = util.pytorch_cos_sim(query_embedding, section_embeddings)[0]
# Rank the sections based on similarity scores
section_scores = list(zip(sections, cosine_scores))
ranked_sections = sorted(section_scores, key=lambda x: x[1], reverse=True)
# Rerank the top-k sections (Placeholder - Replace with actual reranking implementation)
reranked_sections = self.rerank_sections(ranked_sections[:10], query)
# Return the top-n ranked sections
top_n = 5
results = []
for section, score in reranked_sections[:top_n]:
results.append({"Result": section, "Score": str(score.item())})
return results
except requests.exceptions.RequestException as e:
return [{"Error": f"Error fetching Odoo documentation: {str(e)}"}]
def rerank_sections(self, ranked_sections: List[tuple], query: str) -> List[tuple]:
"""
Reranks the top-k sections based on a keyword-based approach.
"""
# Extract keywords from the query
query_keywords = [word for word in query.lower().split() if word not in ['a', 'an', 'the', 'is', 'are', 'in', 'on', 'at', 'to', 'for', 'of']]
# Calculate keyword scores for each section
reranked_sections = []
for section, score in ranked_sections:
keyword_score = 0
for keyword in query_keywords:
keyword_score += section.lower().count(keyword)
# Adjust the similarity scores
adjusted_score = score + keyword_score
reranked_sections.append((section, adjusted_score))
# Sort the sections based on the adjusted scores
reranked_sections = sorted(reranked_sections, key=lambda x: x[1], reverse=True)
return reranked_sections