Spaces:

samspeaks5
/

web-research-agent

Sleeping

App Files Files Community

web-research-agent / tools /content_analyzer.py

samspeaks5

initial commit

d445f2a verified 4 months ago

raw

history blame

3.83 kB

	from typing import Optional, Dict, Any, Type
	from crewai.tools import BaseTool
	from pydantic import Field, BaseModel

	# Define the input schema as a separate class
	class ContentAnalyzerArgs(BaseModel):
	query: str = Field(
	...,
	description="The search query to compare content against"
	)
	content: str = Field(
	...,
	description="The content to analyze for relevance and factuality"
	)

	class ContentAnalyzerTool(BaseTool):
	"""
	A tool for analyzing content relevance and factuality.
	This tool uses LLM to judge the relevance and factual accuracy of content
	in relation to a specific query.
	"""

	name: str = Field(
	default="Content Analyzer",
	description="Name of the content analysis tool"
	)
	description: str = Field(
	default=(
	"Use this tool to analyze the relevance and factuality of content "
	"in relation to a specific query. "
	"It helps filter out irrelevant or potentially non-factual information."
	),
	description="Description of what the content analyzer does"
	)

	# Define args_schema as a class attribute
	args_schema: Type[BaseModel] = ContentAnalyzerArgs

	def _run(self, query: str, content: str) -> Dict[str, Any]:
	"""
	Analyze the content for relevance and factuality.

	Args:
	query: The original search query
	content: The content to analyze

	Returns:
	Dict with analysis results including:
	- relevance_score: A score from 0-10 indicating relevance
	- factuality_score: A score from 0-10 indicating factual reliability
	- filtered_content: The processed content with irrelevant parts removed
	- analysis: Brief explanation of the judgment
	"""
	# The actual implementation will use the agent's LLM
	# via CrewAI's mechanism, returning the placeholders
	# for now which will be replaced during execution
	prompt = f"""
	You are a strict content judge evaluating web search results.

	QUERY: {query}
	CONTENT: {content}

	Analyze the content above with these criteria:
	1. Relevance to the query (score 0-10)
	2. Factual accuracy and reliability (score 0-10)
	3. Information quality

	For content scoring below 5 on relevance, discard it entirely.
	For content with factuality concerns, flag these specifically.

	PROVIDE YOUR ANALYSIS IN THIS FORMAT:
	{{
	"relevance_score": [0-10],
	"factuality_score": [0-10],
	"filtered_content": "The filtered and cleaned content, removing irrelevant parts",
	"analysis": "Brief explanation of your judgment"
	}}

	ONLY RETURN THE JSON, nothing else.
	"""

	# This method will be handled by CrewAI's internal mechanism
	# For placeholder purposes during direct testing, we return example data.
	# In a real CrewAI run, the agent's LLM would process the prompt.
	return {
	"relevance_score": 7, # Placeholder
	"factuality_score": 8, # Placeholder
	"filtered_content": content, # Placeholder
	"analysis": "This is a placeholder analysis. The real analysis will be performed during execution."
	}

	class Config:
	"""Pydantic config for the tool"""
	arbitrary_types_allowed = True

	def run(self, query: str, content: str) -> Dict[str, Any]:
	"""Public method to run content analysis"""
	return self._run(query, content)