Spaces:
Running
Running
File size: 13,870 Bytes
a23082c b8f6b7f a23082c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 |
import os
import logging
import re
from typing import List
from llama_index.core.agent.workflow import FunctionAgent, ReActAgent
from llama_index.core.tools import FunctionTool
from llama_index.llms.google_genai import GoogleGenAI
# Setup logging
logger = logging.getLogger(__name__)
class VerificationError(Exception):
"""Custom exception for verification failures."""
pass
class Verifier:
"""
Cross-check extracted facts, identify contradictions using LLM,
and assign a confidence score to each fact.
"""
def __init__(self):
"""Initializes the Verifier, loading configuration from environment variables."""
logger.info("Initializing Verifier...")
self.threshold = float(os.getenv("VERIFIER_CONFIDENCE_THRESHOLD", 0.7))
self.verifier_llm_model = os.getenv("VERIFIER_LLM_MODEL", "models/gemini-2.0-flash") # For scoring
self.agent_llm_model = os.getenv("VERIFIER_AGENT_LLM_MODEL", "models/gemini-1.5-pro") # For agent logic & contradiction
self.gemini_api_key = os.getenv("GEMINI_API_KEY")
if not self.gemini_api_key:
logger.error("GEMINI_API_KEY not found in environment variables.")
raise ValueError("GEMINI_API_KEY must be set")
try:
self.verifier_llm = GoogleGenAI(
api_key=self.gemini_api_key,
model=self.verifier_llm_model,
)
self.agent_llm = GoogleGenAI(
api_key=self.gemini_api_key,
model=self.agent_llm_model,
)
logger.info(f"Verifier initialized with threshold {self.threshold}, verifier LLM {self.verifier_llm_model}, agent LLM {self.agent_llm_model}")
except Exception as e:
logger.error(f"Error initializing Verifier LLMs: {e}", exc_info=True)
raise
def verify_facts(self, facts: List[str]) -> List[str]:
"""
Assign a confidence score via LLM to each fact and return formatted strings.
Args:
facts (List[str]): Facts to verify.
Returns:
List[str]: Each item is "fact: score" with score ∈ [threshold, 1.0].
Raises:
VerificationError: If LLM call fails.
"""
logger.info(f"Verifying {len(facts)} facts...")
results: List[str] = []
for fact in facts:
prompt = (
"You are a fact verifier. "
"On a scale from 0.00 to 1.00, where any value below "
f"{self.threshold:.2f} indicates low confidence, rate the following statement’s trustworthiness. "
"Respond with **only** a decimal number rounded to two digits (e.g., 0.82) and no extra text.\n\n"
f"Statement: \"{fact}\""
)
try:
response = self.verifier_llm.complete(prompt)
score_text = response.text.strip()
# Try direct conversion first
try:
score = float(score_text)
except ValueError:
# Fallback: extract first float if model returns extra text
match = re.search(r"0?\.\d+|1(?:\.0+)?", score_text)
if match:
score = float(match.group(0))
logger.warning(f"Extracted score {score} from noisy LLM response: {score_text}")
else:
logger.error(f"Could not parse score from LLM response: {score_text}. Using threshold {self.threshold}.")
score = self.threshold # Fallback to threshold if parsing fails completely
# Enforce threshold floor
if score < self.threshold:
logger.info(f"Score {score:.2f} for fact {fact} below threshold {self.threshold}, raising to threshold.")
score = self.threshold
results.append(f"{fact}: {score:.2f}")
except Exception as e:
logger.error(f"LLM call failed during fact verification for {fact}: {e}", exc_info=True)
# Option 1: Raise an error
# raise VerificationError(f"LLM call failed for fact: {fact}") from e
# Option 2: Append an error message (current approach)
results.append(f"{fact}: ERROR - Verification failed")
# Option 3: Assign lowest score
# results.append(f"{fact}: {self.threshold:.2f} (Verification Error)")
logger.info(f"Fact verification complete. {len(results)} results generated.")
return results
def find_contradictions_llm(self, facts: List[str]) -> List[str]:
"""
Identify contradictions among a list of facts using an LLM.
Args:
facts (List[str]): List of fact strings.
Returns:
List[str]: Pairs of facts detected as contradictory, joined by " <> ".
Raises:
VerificationError: If LLM call fails.
"""
logger.info(f"Finding contradictions in {len(facts)} facts using LLM...")
if len(facts) < 2:
logger.info("Not enough facts to find contradictions.")
return []
facts_numbered = "\n".join([f"{i+1}. {fact}" for i, fact in enumerate(facts)])
prompt = (
"You are a logical reasoning assistant. Analyze the following numbered list of statements. "
"Identify any pairs of statements that directly contradict each other. "
"List *only* the numbers of the contradicting pairs, one pair per line, formatted as 'X, Y'. "
"If no contradictions are found, respond with 'None'. Do not include any other text or explanation.\n\n"
f"Statements:\n{facts_numbered}"
)
try:
response = self.agent_llm.complete(prompt) # Use the more powerful agent LLM
response_text = response.text.strip()
logger.info(f"LLM response for contradictions: {response_text}")
if response_text.lower() == 'none':
logger.info("LLM reported no contradictions.")
return []
contradiction_pairs = []
lines = response_text.split("\n")
for line in lines:
line = line.strip()
if not line:
continue
try:
# Expect format like "1, 5"
parts = line.split(',')
if len(parts) == 2:
idx1 = int(parts[0].strip()) - 1
idx2 = int(parts[1].strip()) - 1
# Validate indices
if 0 <= idx1 < len(facts) and 0 <= idx2 < len(facts) and idx1 != idx2:
# Ensure pair order doesn't matter and avoid duplicates
pair = tuple(sorted((idx1, idx2)))
fact1 = facts[pair[0]]
fact2 = facts[pair[1]]
contradiction_str = f"{fact1} <> {fact2}"
if contradiction_str not in contradiction_pairs:
contradiction_pairs.append(contradiction_str)
logger.info(f"Identified contradiction: {contradiction_str}")
else:
logger.warning(f"Invalid index pair found in LLM contradiction response: {line}")
else:
logger.warning(f"Could not parse contradiction pair from LLM response line: {line}")
except ValueError:
logger.warning(f"Non-integer index found in LLM contradiction response line: {line}")
except Exception as parse_err:
logger.warning(f"Error parsing LLM contradiction response line {line}: {parse_err}")
logger.info(f"Contradiction check complete. Found {len(contradiction_pairs)} pairs.")
return contradiction_pairs
except Exception as e:
logger.error(f"LLM call failed during contradiction detection: {e}", exc_info=True)
# Option 1: Raise an error
raise VerificationError("LLM call failed during contradiction detection") from e
# Option 2: Return empty list (fail silently)
# return []
# --- Tool Definitions ---
# Tools need to be created within the initialization function to bind to the instance
# --- Agent Initialization ---
# Store the initializer instance globally to ensure singleton behavior
_verifier_initializer_instance = None
class VerifierInitializer:
def __init__(self):
self.verifier = Verifier() # Initialize the Verifier class
self._create_tools()
def _create_tools(self):
self.verify_facts_tool = FunctionTool.from_defaults(
fn=self.verifier.verify_facts, # Bind to instance method
name="verify_facts",
description=(
"Assigns a numerical confidence score (based on plausibility and internal consistency) to each factual assertion in a list. "
"Input: List[str] of statements. Output: List[str] of 'statement: score' pairs."
),
)
self.find_contradictions_tool = FunctionTool.from_defaults(
fn=self.verifier.find_contradictions_llm, # Bind to instance method (using LLM version)
name="find_contradictions",
description=(
"Uses an LLM to detect logical contradictions among a list of statements. "
"Input: List[str] of factual assertions. "
"Output: List[str] where each entry is a conflicting pair in the format 'statement1 <> statement2'. Returns empty list if none found."
)
)
def get_agent(self) -> FunctionAgent:
"""Initializes and returns the Verifier Agent."""
logger.info("Creating VerifierAgent FunctionAgent instance...")
# System prompt (consider loading from file)
system_prompt = """\
You are VerifierAgent, a fact verification assistant. Given a list of factual statements, you must:
1. **Verify Facts**: Call `verify_facts` to assign a confidence score to each statement.
2. **Detect Contradictions**: Call `find_contradictions` to identify logical conflicts between the statements using an LLM.
3. **Present Results**: Output clear bullet points listing each fact with its confidence score, followed by a list of any detected contradictions.
4. **Hand-Off**: If significant contradictions or low-confidence facts are found that require deeper analysis, hand off to **reasoning_agent**. Otherwise, pass the verified facts and contradiction summary to **planner_agent** for integration.
"""
agent = FunctionAgent(
name="verifier_agent",
description=(
"Evaluates factual statements by assigning confidence scores (`verify_facts`) "
"and detecting logical contradictions using an LLM (`find_contradictions`). "
"Hands off to reasoning_agent for complex issues or planner_agent for synthesis."
),
tools=[
self.verify_facts_tool,
self.find_contradictions_tool,
],
llm=self.verifier.agent_llm, # Use the agent LLM from the Verifier instance
system_prompt=system_prompt,
can_handoff_to=["reasoning_agent", "planner_agent", "advanced_validation_agent"],
)
logger.info("VerifierAgent FunctionAgent instance created.")
return agent
def get_verifier_initializer():
"""Gets the singleton instance of VerifierInitializer."""
global _verifier_initializer_instance
if _verifier_initializer_instance is None:
logger.info("Instantiating VerifierInitializer for the first time.")
_verifier_initializer_instance = VerifierInitializer()
return _verifier_initializer_instance
def initialize_verifier_agent() -> FunctionAgent:
"""Initializes and returns the Verifier Agent using a singleton initializer."""
logger.info("initialize_verifier_agent called.")
initializer = get_verifier_initializer()
return initializer.get_agent()
# Example usage (for testing if run directly)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger.info("Running verifier_agent.py directly for testing...")
# Ensure API key is set for testing
if not os.getenv("GEMINI_API_KEY"):
print("Error: GEMINI_API_KEY environment variable not set. Cannot run test.")
else:
try:
test_agent = initialize_verifier_agent()
print("Verifier Agent initialized successfully for testing.")
# Test contradiction detection
initializer = get_verifier_initializer()
test_facts = [
"The sky is blue.",
"Water boils at 100 degrees Celsius.",
"The sky is not blue.",
"Paris is the capital of France."
]
print(f"\nTesting contradiction detection on: {test_facts}")
contradictions = initializer.verifier.find_contradictions_llm(test_facts)
print(f"Detected contradictions: {contradictions}")
# Test fact verification
print(f"\nTesting fact verification on: {test_facts}")
verified = initializer.verifier.verify_facts(test_facts)
print(f"Verified facts: {verified}")
except Exception as e:
print(f"Error during testing: {e}")
|