Spaces:

mgbam
/

NeuroResearch_AI

Running

App Files Files Community

mgbam commited on Mar 25

Commit

1ac0e39

verified ·

1 Parent(s): a1bc85b

Update workflow.py

Browse files

Files changed (1) hide show

workflow.py +40 -26

workflow.py CHANGED Viewed

@@ -23,18 +23,13 @@ class AgentState(TypedDict):
 class ResearchWorkflow:
     """
-    A multi-step research workflow that leverages Retrieval-Augmented Generation (RAG).
-    Supports domains including:
-      - Biomedical Research
-      - Legal Research
-      - Environmental and Energy Studies
-      - Competitive Programming and Theoretical Computer Science
-      - Social Sciences
-    This implementation normalizes the domain and uses domain-specific prompts and fallbacks.
     """
     def __init__(self) -> None:
         self.processor = EnhancedCognitiveProcessor()
-        self.workflow = StateGraph(AgentState)  # Supply state schema
         self._build_workflow()
         self.app = self.workflow.compile()
@@ -44,6 +39,8 @@ class ResearchWorkflow:
         self.workflow.add_node("analyze", self.analyze_content)
         self.workflow.add_node("validate", self.validate_output)
         self.workflow.add_node("refine", self.refine_results)
         self.workflow.set_entry_point("ingest")
         self.workflow.add_edge("ingest", "retrieve")
         self.workflow.add_edge("retrieve", "analyze")
@@ -52,17 +49,17 @@ class ResearchWorkflow:
             self._quality_check,
             {"valid": "validate", "invalid": "refine"}
         )
-        self.workflow.add_edge("validate", END)
         self.workflow.add_edge("refine", "retrieve")
         # Extended node for multi-modal enhancement
         self.workflow.add_node("enhance", self.enhance_analysis)
-        self.workflow.add_edge("validate", "enhance")
         self.workflow.add_edge("enhance", END)
     def ingest_query(self, state: Dict) -> Dict:
         try:
             query = state["messages"][-1].content
-            # Normalize domain string to lower-case; default to 'biomedical research'
             domain = state.get("context", {}).get("domain", "Biomedical Research").strip().lower()
             new_context = {
                 "raw_query": query,
@@ -83,7 +80,7 @@ class ResearchWorkflow:
     def retrieve_documents(self, state: Dict) -> Dict:
         try:
             query = state["context"]["raw_query"]
-            # Simulate retrieval; for now, an empty list indicates no external documents found.
             docs = []
             logger.info(f"Retrieved {len(docs)} documents for query.")
             return {
@@ -102,18 +99,16 @@ class ResearchWorkflow:
     def analyze_content(self, state: Dict) -> Dict:
         try:
-            # Normalize domain and use it for prompt generation
             domain = state["context"].get("domain", "biomedical research").strip().lower()
             docs = state["context"].get("documents", [])
-            # Use retrieved documents if available; else, use raw query as fallback.
             if docs:
                 docs_text = "\n\n".join([d.page_content for d in docs])
             else:
                 docs_text = state["context"].get("raw_query", "")
-                logger.info("No documents retrieved; using dynamic synthesis (RAG mode).")
-            # Get domain-specific prompt; ensure fallback prompts exist for all supported domains.
-            domain_prompt = ResearchConfig.DOMAIN_PROMPTS.get(domain, "Consider relevant legal cases and statutory interpretations.")
-            # Build the final prompt with domain tag for clarity.
             full_prompt = f"Domain: {state['context'].get('domain', 'Biomedical Research')}\n" \
                           f"{domain_prompt}\n\n" + \
                           ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs_text)
@@ -134,10 +129,11 @@ class ResearchWorkflow:
         try:
             analysis = state["messages"][-1].content
             validation_prompt = (
-                f"Validate the following analysis for correctness, clarity, and legal grounding:\n{analysis}\n\n"
                 "Criteria:\n"
-                "1. Technical and legal accuracy\n"
-                "2. Evidence and citation support\n"
                 "3. Logical consistency\n"
                 "4. Methodological soundness\n\n"
                 "Respond with 'VALID: [justification]' or 'INVALID: [justification]'."
@@ -152,6 +148,26 @@ class ResearchWorkflow:
             logger.exception("Error during output validation.")
             return self._error_state(f"Validation Error: {str(e)}")
     def refine_results(self, state: Dict) -> Dict:
         try:
             current_count = state["context"].get("refine_count", 0)
@@ -167,8 +183,7 @@ class ResearchWorkflow:
                     f"Domain: {domain}\n"
                     "You are given the following series of refinement outputs:\n" +
                     "\n---\n".join(refinement_history) +
-                    "\n\nSynthesize these into a final, concise legal analysis report, highlighting key precedents and statutory interpretations. "
-                    "Focus on improving accuracy and relevance for legal research."
                 )
                 meta_response = self.processor.process_query(meta_prompt)
                 logger.info("Meta-refinement completed.")
@@ -180,8 +195,7 @@ class ResearchWorkflow:
                 refinement_prompt = (
                     f"Domain: {domain}\n"
                     f"Refine this analysis (current difficulty level: {difficulty_level}):\n{current_analysis}\n\n"
-                    "First, identify weaknesses such as lack of legal grounding or misinterpretation of cases. "
-                    "Then, improve the analysis with clear references to legal precedents and statutory language."
                 )
                 response = self.processor.process_query(refinement_prompt)
                 logger.info("Refinement completed.")

 class ResearchWorkflow:
     """
+    A multi-step research workflow employing Retrieval-Augmented Generation (RAG) with an additional verification step.
+    This workflow supports multiple domains (e.g., Biomedical, Legal, Environmental, Competitive Programming, Social Sciences)
+    and integrates domain-specific prompts, iterative refinement, and a final verification to reduce hallucinations.
     """
     def __init__(self) -> None:
         self.processor = EnhancedCognitiveProcessor()
+        self.workflow = StateGraph(AgentState)
         self._build_workflow()
         self.app = self.workflow.compile()
         self.workflow.add_node("analyze", self.analyze_content)
         self.workflow.add_node("validate", self.validate_output)
         self.workflow.add_node("refine", self.refine_results)
+        # New verify node to further cross-check the output
+        self.workflow.add_node("verify", self.verify_output)
         self.workflow.set_entry_point("ingest")
         self.workflow.add_edge("ingest", "retrieve")
         self.workflow.add_edge("retrieve", "analyze")
             self._quality_check,
             {"valid": "validate", "invalid": "refine"}
         )
+        self.workflow.add_edge("validate", "verify")
         self.workflow.add_edge("refine", "retrieve")
         # Extended node for multi-modal enhancement
         self.workflow.add_node("enhance", self.enhance_analysis)
+        self.workflow.add_edge("verify", "enhance")
         self.workflow.add_edge("enhance", END)
     def ingest_query(self, state: Dict) -> Dict:
         try:
             query = state["messages"][-1].content
+            # Normalize the domain string; default to 'biomedical research'
             domain = state.get("context", {}).get("domain", "Biomedical Research").strip().lower()
             new_context = {
                 "raw_query": query,
     def retrieve_documents(self, state: Dict) -> Dict:
         try:
             query = state["context"]["raw_query"]
+            # Placeholder retrieval: currently returns an empty list (simulate no documents)
             docs = []
             logger.info(f"Retrieved {len(docs)} documents for query.")
             return {
     def analyze_content(self, state: Dict) -> Dict:
         try:
             domain = state["context"].get("domain", "biomedical research").strip().lower()
             docs = state["context"].get("documents", [])
             if docs:
                 docs_text = "\n\n".join([d.page_content for d in docs])
             else:
                 docs_text = state["context"].get("raw_query", "")
+                logger.info("No documents retrieved; switching to dynamic synthesis (RAG mode).")
+            # Use domain-specific prompt; for legal research, inject legal-specific guidance.
+            domain_prompt = ResearchConfig.DOMAIN_PROMPTS.get(domain,
+                            "Provide an analysis based on the provided context.")
             full_prompt = f"Domain: {state['context'].get('domain', 'Biomedical Research')}\n" \
                           f"{domain_prompt}\n\n" + \
                           ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs_text)
         try:
             analysis = state["messages"][-1].content
             validation_prompt = (
+                f"Validate the following analysis for accuracy and domain-specific relevance:\n{analysis}\n\n"
                 "Criteria:\n"
+                "1. Factual and technical accuracy\n"
+                "2. For legal research: inclusion of relevant precedents and statutory interpretations; "
+                "for other domains: appropriate domain insights\n"
                 "3. Logical consistency\n"
                 "4. Methodological soundness\n\n"
                 "Respond with 'VALID: [justification]' or 'INVALID: [justification]'."
             logger.exception("Error during output validation.")
             return self._error_state(f"Validation Error: {str(e)}")
+    def verify_output(self, state: Dict) -> Dict:
+        try:
+            # New verify step: cross-check the analysis using an external fact-checking prompt.
+            analysis = state["messages"][-1].content
+            verification_prompt = (
+                f"Verify the following analysis by comparing it with established external legal databases and reference texts:\n{analysis}\n\n"
+                "Identify any discrepancies or hallucinations and provide a brief correction if necessary."
+            )
+            response = self.processor.process_query(verification_prompt)
+            logger.info("Output verification completed.")
+            # Here, you can merge the verification feedback with the analysis.
+            verified_analysis = analysis + "\n\nVerification Feedback: " + response.get('choices', [{}])[0].get('message', {}).get('content', '')
+            return {
+                "messages": [AIMessage(content=verified_analysis)],
+                "context": state["context"]
+            }
+        except Exception as e:
+            logger.exception("Error during output verification.")
+            return self._error_state(f"Verification Error: {str(e)}")
     def refine_results(self, state: Dict) -> Dict:
         try:
             current_count = state["context"].get("refine_count", 0)
                     f"Domain: {domain}\n"
                     "You are given the following series of refinement outputs:\n" +
                     "\n---\n".join(refinement_history) +
+                    "\n\nSynthesize these into a final, concise analysis report with improved accuracy and verifiable details."
                 )
                 meta_response = self.processor.process_query(meta_prompt)
                 logger.info("Meta-refinement completed.")
                 refinement_prompt = (
                     f"Domain: {domain}\n"
                     f"Refine this analysis (current difficulty level: {difficulty_level}):\n{current_analysis}\n\n"
+                    "Identify and correct any weaknesses or hallucinations in the analysis, providing verifiable details."
                 )
                 response = self.processor.process_query(refinement_prompt)
                 logger.info("Refinement completed.")