File size: 13,870 Bytes
a23082c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b8f6b7f
a23082c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
import os
import logging
import re
from typing import List

from llama_index.core.agent.workflow import FunctionAgent, ReActAgent
from llama_index.core.tools import FunctionTool
from llama_index.llms.google_genai import GoogleGenAI

# Setup logging
logger = logging.getLogger(__name__)

class VerificationError(Exception):
    """Custom exception for verification failures."""
    pass

class Verifier:
    """
    Cross-check extracted facts, identify contradictions using LLM,
    and assign a confidence score to each fact.
    """
    def __init__(self):
        """Initializes the Verifier, loading configuration from environment variables."""
        logger.info("Initializing Verifier...")
        self.threshold = float(os.getenv("VERIFIER_CONFIDENCE_THRESHOLD", 0.7))
        self.verifier_llm_model = os.getenv("VERIFIER_LLM_MODEL", "models/gemini-2.0-flash") # For scoring
        self.agent_llm_model = os.getenv("VERIFIER_AGENT_LLM_MODEL", "models/gemini-1.5-pro") # For agent logic & contradiction
        self.gemini_api_key = os.getenv("GEMINI_API_KEY")

        if not self.gemini_api_key:
            logger.error("GEMINI_API_KEY not found in environment variables.")
            raise ValueError("GEMINI_API_KEY must be set")

        try:
            self.verifier_llm = GoogleGenAI(
                api_key=self.gemini_api_key,
                model=self.verifier_llm_model,
            )
            self.agent_llm = GoogleGenAI(
                api_key=self.gemini_api_key,
                model=self.agent_llm_model,
            )
            logger.info(f"Verifier initialized with threshold {self.threshold}, verifier LLM {self.verifier_llm_model}, agent LLM {self.agent_llm_model}")
        except Exception as e:
            logger.error(f"Error initializing Verifier LLMs: {e}", exc_info=True)
            raise

    def verify_facts(self, facts: List[str]) -> List[str]:
        """
        Assign a confidence score via LLM to each fact and return formatted strings.

        Args:
            facts (List[str]): Facts to verify.

        Returns:
            List[str]: Each item is "fact: score" with score ∈ [threshold, 1.0].
        
        Raises:
            VerificationError: If LLM call fails.
        """
        logger.info(f"Verifying {len(facts)} facts...")
        results: List[str] = []
        for fact in facts:
            prompt = (
                "You are a fact verifier. "
                "On a scale from 0.00 to 1.00, where any value below "
                f"{self.threshold:.2f} indicates low confidence, rate the following statement’s trustworthiness. "
                "Respond with **only** a decimal number rounded to two digits (e.g., 0.82) and no extra text.\n\n"
                f"Statement: \"{fact}\""
            )
            try:
                response = self.verifier_llm.complete(prompt)
                score_text = response.text.strip()
                # Try direct conversion first
                try:
                    score = float(score_text)
                except ValueError:
                    # Fallback: extract first float if model returns extra text
                    match = re.search(r"0?\.\d+|1(?:\.0+)?", score_text)
                    if match:
                        score = float(match.group(0))
                        logger.warning(f"Extracted score {score} from noisy LLM response: {score_text}")
                    else:
                        logger.error(f"Could not parse score from LLM response: {score_text}. Using threshold {self.threshold}.")
                        score = self.threshold # Fallback to threshold if parsing fails completely

                # Enforce threshold floor
                if score < self.threshold:
                    logger.info(f"Score {score:.2f} for fact {fact} below threshold {self.threshold}, raising to threshold.")
                    score = self.threshold
                
                results.append(f"{fact}: {score:.2f}")

            except Exception as e:
                logger.error(f"LLM call failed during fact verification for {fact}: {e}", exc_info=True)
                # Option 1: Raise an error
                # raise VerificationError(f"LLM call failed for fact: {fact}") from e
                # Option 2: Append an error message (current approach)
                results.append(f"{fact}: ERROR - Verification failed")
                # Option 3: Assign lowest score
                # results.append(f"{fact}: {self.threshold:.2f} (Verification Error)")
                
        logger.info(f"Fact verification complete. {len(results)} results generated.")
        return results

    def find_contradictions_llm(self, facts: List[str]) -> List[str]:
        """
        Identify contradictions among a list of facts using an LLM.

        Args:
            facts (List[str]): List of fact strings.

        Returns:
            List[str]: Pairs of facts detected as contradictory, joined by " <> ".
        
        Raises:
            VerificationError: If LLM call fails.
        """
        logger.info(f"Finding contradictions in {len(facts)} facts using LLM...")
        if len(facts) < 2:
            logger.info("Not enough facts to find contradictions.")
            return []

        facts_numbered = "\n".join([f"{i+1}. {fact}" for i, fact in enumerate(facts)])
        
        prompt = (
            "You are a logical reasoning assistant. Analyze the following numbered list of statements. "
            "Identify any pairs of statements that directly contradict each other. "
            "List *only* the numbers of the contradicting pairs, one pair per line, formatted as 'X, Y'. "
            "If no contradictions are found, respond with 'None'. Do not include any other text or explanation.\n\n"
            f"Statements:\n{facts_numbered}"
        )

        try:
            response = self.agent_llm.complete(prompt) # Use the more powerful agent LLM
            response_text = response.text.strip()
            logger.info(f"LLM response for contradictions: {response_text}")

            if response_text.lower() == 'none':
                logger.info("LLM reported no contradictions.")
                return []

            contradiction_pairs = []
            lines = response_text.split("\n")
            for line in lines:
                line = line.strip()
                if not line:
                    continue
                try:
                    # Expect format like "1, 5"
                    parts = line.split(',')
                    if len(parts) == 2:
                        idx1 = int(parts[0].strip()) - 1
                        idx2 = int(parts[1].strip()) - 1
                        
                        # Validate indices
                        if 0 <= idx1 < len(facts) and 0 <= idx2 < len(facts) and idx1 != idx2:
                            # Ensure pair order doesn't matter and avoid duplicates
                            pair = tuple(sorted((idx1, idx2)))
                            fact1 = facts[pair[0]]
                            fact2 = facts[pair[1]]
                            contradiction_str = f"{fact1} <> {fact2}"
                            if contradiction_str not in contradiction_pairs:
                                contradiction_pairs.append(contradiction_str)
                                logger.info(f"Identified contradiction: {contradiction_str}")
                        else:
                             logger.warning(f"Invalid index pair found in LLM contradiction response: {line}")
                    else:
                        logger.warning(f"Could not parse contradiction pair from LLM response line: {line}")
                except ValueError:
                    logger.warning(f"Non-integer index found in LLM contradiction response line: {line}")
                except Exception as parse_err:
                     logger.warning(f"Error parsing LLM contradiction response line {line}: {parse_err}")
            
            logger.info(f"Contradiction check complete. Found {len(contradiction_pairs)} pairs.")
            return contradiction_pairs

        except Exception as e:
            logger.error(f"LLM call failed during contradiction detection: {e}", exc_info=True)
            # Option 1: Raise an error
            raise VerificationError("LLM call failed during contradiction detection") from e
            # Option 2: Return empty list (fail silently)
            # return []

# --- Tool Definitions --- 
# Tools need to be created within the initialization function to bind to the instance

# --- Agent Initialization --- 

# Store the initializer instance globally to ensure singleton behavior
_verifier_initializer_instance = None

class VerifierInitializer:
    def __init__(self):
        self.verifier = Verifier() # Initialize the Verifier class
        self._create_tools()

    def _create_tools(self):
        self.verify_facts_tool = FunctionTool.from_defaults(
            fn=self.verifier.verify_facts, # Bind to instance method
            name="verify_facts",
            description=(
                "Assigns a numerical confidence score (based on plausibility and internal consistency) to each factual assertion in a list. "
                "Input: List[str] of statements. Output: List[str] of 'statement: score' pairs."
            ),
        )

        self.find_contradictions_tool = FunctionTool.from_defaults(
            fn=self.verifier.find_contradictions_llm, # Bind to instance method (using LLM version)
            name="find_contradictions",
            description=(
                "Uses an LLM to detect logical contradictions among a list of statements. "
                "Input: List[str] of factual assertions. "
                "Output: List[str] where each entry is a conflicting pair in the format 'statement1 <> statement2'. Returns empty list if none found."
            )
        )

    def get_agent(self) -> FunctionAgent:
        """Initializes and returns the Verifier Agent."""
        logger.info("Creating VerifierAgent FunctionAgent instance...")
        
        # System prompt (consider loading from file)
        system_prompt = """\
        You are VerifierAgent, a fact verification assistant. Given a list of factual statements, you must:
        
        1. **Verify Facts**: Call `verify_facts` to assign a confidence score to each statement.
        2. **Detect Contradictions**: Call `find_contradictions` to identify logical conflicts between the statements using an LLM.
        3. **Present Results**: Output clear bullet points listing each fact with its confidence score, followed by a list of any detected contradictions.
        4. **Hand-Off**: If significant contradictions or low-confidence facts are found that require deeper analysis, hand off to **reasoning_agent**. Otherwise, pass the verified facts and contradiction summary to **planner_agent** for integration.
        """

        agent = FunctionAgent(
            name="verifier_agent",
            description=(
                "Evaluates factual statements by assigning confidence scores (`verify_facts`) "
                "and detecting logical contradictions using an LLM (`find_contradictions`). "
                "Hands off to reasoning_agent for complex issues or planner_agent for synthesis."
            ),
            tools=[
                self.verify_facts_tool,
                self.find_contradictions_tool,
            ],
            llm=self.verifier.agent_llm, # Use the agent LLM from the Verifier instance
            system_prompt=system_prompt,
            can_handoff_to=["reasoning_agent", "planner_agent", "advanced_validation_agent"],
        )
        logger.info("VerifierAgent FunctionAgent instance created.")
        return agent

def get_verifier_initializer():
    """Gets the singleton instance of VerifierInitializer."""
    global _verifier_initializer_instance
    if _verifier_initializer_instance is None:
        logger.info("Instantiating VerifierInitializer for the first time.")
        _verifier_initializer_instance = VerifierInitializer()
    return _verifier_initializer_instance

def initialize_verifier_agent() -> FunctionAgent:
    """Initializes and returns the Verifier Agent using a singleton initializer."""
    logger.info("initialize_verifier_agent called.")
    initializer = get_verifier_initializer()
    return initializer.get_agent()

# Example usage (for testing if run directly)
if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    logger.info("Running verifier_agent.py directly for testing...")

    # Ensure API key is set for testing
    if not os.getenv("GEMINI_API_KEY"):
        print("Error: GEMINI_API_KEY environment variable not set. Cannot run test.")
    else:
        try:
            test_agent = initialize_verifier_agent()
            print("Verifier Agent initialized successfully for testing.")
            
            # Test contradiction detection
            initializer = get_verifier_initializer()
            test_facts = [
                "The sky is blue.",
                "Water boils at 100 degrees Celsius.",
                "The sky is not blue.",
                "Paris is the capital of France."
            ]
            print(f"\nTesting contradiction detection on: {test_facts}")
            contradictions = initializer.verifier.find_contradictions_llm(test_facts)
            print(f"Detected contradictions: {contradictions}")

            # Test fact verification
            print(f"\nTesting fact verification on: {test_facts}")
            verified = initializer.verifier.verify_facts(test_facts)
            print(f"Verified facts: {verified}")

        except Exception as e:
            print(f"Error during testing: {e}")