Odoo16to18_code_document_job_search

Running

App Files Files Community

Odoo16to18_code_document_job_search / tools /odoo_documentation_search.py

VPCSinfo

[IMP] use all-MiniLM-L6-v2 for faster download and better speed.

c9c06b9 6 months ago

raw

history blame contribute delete

3.63 kB

	from smolagents.tools import Tool
	import requests
	from typing import List, Dict
	from bs4 import BeautifulSoup
	from sentence_transformers import SentenceTransformer, util

	class OdooDocumentationSearchTool(Tool):
	name = "odoo_documentation_search"
	description = "Searches the Odoo documentation for functional or technical queries and returns related results for a specific Odoo version."

	inputs = {
	"query": {"type": "string", "description": "The search query (e.g., 'how to create a new module')"},
	"version": {"type": "string", "description": "The Odoo version to search (e.g., '16.0', '17.0', '18.0')"}
	}

	output_type = "array"

	def __init__(self, query=None):
	# Load the SentenceTransformer model
	self.model = SentenceTransformer('all-MiniLM-L6-v2')
	self.is_initialized = True

	def forward(self, query: str, version: str) -> List[Dict]:
	"""
	Searches the Odoo documentation and returns related results using semantic search and reranking.
	"""
	base_url = f"https://www.odoo.com/documentation/{version}/"

	try:
	response = requests.get(base_url)
	response.raise_for_status()

	soup = BeautifulSoup(response.content, "html.parser")

	# Extract relevant sections from the documentation
	sections = []
	for element in soup.find_all(['h1', 'h2', 'h3', 'p', 'li']):
	sections.append(element.get_text().strip())

	# Embed the sections and the query
	section_embeddings = self.model.encode(sections, convert_to_tensor=True)
	query_embedding = self.model.encode(query, convert_to_tensor=True)

	# Calculate cosine similarity
	cosine_scores = util.pytorch_cos_sim(query_embedding, section_embeddings)[0]

	# Rank the sections based on similarity scores
	section_scores = list(zip(sections, cosine_scores))
	ranked_sections = sorted(section_scores, key=lambda x: x[1], reverse=True)

	# Rerank the top-k sections (Placeholder - Replace with actual reranking implementation)
	reranked_sections = self.rerank_sections(ranked_sections[:10], query)

	# Return the top-n ranked sections
	top_n = 5
	results = []
	for section, score in reranked_sections[:top_n]:
	results.append({"Result": section, "Score": str(score.item())})

	return results

	except requests.exceptions.RequestException as e:
	return [{"Error": f"Error fetching Odoo documentation: {str(e)}"}]

	def rerank_sections(self, ranked_sections: List[tuple], query: str) -> List[tuple]:
	"""
	Reranks the top-k sections based on a keyword-based approach.
	"""
	# Extract keywords from the query
	query_keywords = [word for word in query.lower().split() if word not in ['a', 'an', 'the', 'is', 'are', 'in', 'on', 'at', 'to', 'for', 'of']]

	# Calculate keyword scores for each section
	reranked_sections = []
	for section, score in ranked_sections:
	keyword_score = 0
	for keyword in query_keywords:
	keyword_score += section.lower().count(keyword)

	# Adjust the similarity scores
	adjusted_score = score + keyword_score

	reranked_sections.append((section, adjusted_score))

	# Sort the sections based on the adjusted scores
	reranked_sections = sorted(reranked_sections, key=lambda x: x[1], reverse=True)

	return reranked_sections