|
|
|
|
|
import json |
|
import asyncio |
|
from typing import List, Dict, Any, Tuple, Optional |
|
from datetime import datetime |
|
from dataclasses import dataclass |
|
|
|
from openai import AsyncOpenAI |
|
|
|
from ankigen_core.logging import logger |
|
from ankigen_core.models import Card |
|
from .base import BaseAgentWrapper, AgentConfig |
|
from .config import get_config_manager |
|
from .schemas import JudgeDecisionSchema |
|
|
|
|
|
@dataclass |
|
class JudgeDecision: |
|
"""Decision from a judge agent""" |
|
|
|
approved: bool |
|
score: float |
|
feedback: str |
|
judge_name: str |
|
improvements: Optional[List[str]] = None |
|
metadata: Optional[Dict[str, Any]] = None |
|
|
|
def __post_init__(self): |
|
if self.metadata is None: |
|
self.metadata = {} |
|
if self.improvements is None: |
|
self.improvements = [] |
|
|
|
|
|
class ContentAccuracyJudge(BaseAgentWrapper): |
|
"""Judge for factual accuracy and content correctness""" |
|
|
|
def __init__(self, openai_client: AsyncOpenAI): |
|
config_manager = get_config_manager() |
|
base_config = config_manager.get_agent_config("content_accuracy_judge") |
|
|
|
if not base_config: |
|
raise ValueError( |
|
"content_accuracy_judge configuration not found - agent system not properly initialized" |
|
) |
|
|
|
|
|
base_config.response_format = JudgeDecisionSchema |
|
|
|
super().__init__(base_config, openai_client) |
|
|
|
async def judge_card( |
|
self, card: Card, context: Optional[Dict[str, Any]] = None |
|
) -> JudgeDecision: |
|
"""Judge a card for content accuracy""" |
|
try: |
|
user_input = f"""Evaluate this flashcard for factual accuracy: |
|
|
|
Front: {card.front.question} |
|
Back: {card.back.answer} |
|
|
|
Assess: |
|
1. Factual correctness |
|
2. Completeness of information |
|
3. Clarity and precision |
|
4. Potential misconceptions |
|
|
|
Provide a score (0-1) and detailed feedback.""" |
|
|
|
response, usage = await self.execute(user_input) |
|
|
|
|
|
if usage and usage.get("total_tokens", 0) > 0: |
|
logger.info( |
|
f"π° Token Usage: {usage['total_tokens']} tokens (Input: {usage['input_tokens']}, Output: {usage['output_tokens']})" |
|
) |
|
|
|
return self._parse_judge_response(response, "ContentAccuracyJudge") |
|
|
|
except Exception as e: |
|
logger.error(f"Content accuracy judgment failed: {e}") |
|
raise |
|
|
|
def _parse_judge_response( |
|
self, response: Dict[str, Any], judge_name: str |
|
) -> JudgeDecision: |
|
"""Parse the judge response into a JudgeDecision""" |
|
decision_data = json.loads(response) if isinstance(response, str) else response |
|
decision = self._parse_decision(decision_data) |
|
|
|
|
|
logger.info(f"π― {judge_name.upper()} DECISION:") |
|
logger.info(" Card: [Card content]") |
|
logger.info(f" β
Approved: {decision.approved}") |
|
logger.info(f" π Score: {decision.score:.2f}") |
|
logger.info(f" π Feedback: {decision.feedback}") |
|
|
|
if decision.metadata.get("factual_errors"): |
|
logger.info(f" β Factual Errors: {decision.metadata['factual_errors']}") |
|
if decision.metadata.get("terminology_issues"): |
|
logger.info( |
|
f" β οΈ Terminology Issues: {decision.metadata['terminology_issues']}" |
|
) |
|
if decision.improvements: |
|
logger.info(f" π§ Suggested Improvements: {decision.improvements}") |
|
|
|
logger.info( |
|
f" π― Judge Confidence: {decision.metadata.get('confidence', 'N/A')}" |
|
) |
|
|
|
return decision |
|
|
|
def _parse_decision(self, decision_data: Dict[str, Any]) -> JudgeDecision: |
|
"""Parse the judge response into a JudgeDecision""" |
|
return JudgeDecision( |
|
approved=decision_data.get("approved", True), |
|
score=decision_data.get("accuracy_score", 0.5), |
|
feedback=decision_data.get("detailed_feedback", "No feedback provided"), |
|
improvements=decision_data.get("suggestions", []), |
|
judge_name=self.config.name, |
|
metadata={ |
|
"factual_errors": decision_data.get("factual_errors", []), |
|
"terminology_issues": decision_data.get("terminology_issues", []), |
|
"misconceptions": decision_data.get("misconceptions", []), |
|
"confidence": decision_data.get("confidence", 0.5), |
|
}, |
|
) |
|
|
|
|
|
class PedagogicalJudge(BaseAgentWrapper): |
|
"""Judge for educational effectiveness and pedagogical principles""" |
|
|
|
def __init__(self, openai_client: AsyncOpenAI): |
|
config_manager = get_config_manager() |
|
base_config = config_manager.get_agent_config("pedagogical_judge") |
|
|
|
if not base_config: |
|
base_config = AgentConfig( |
|
name="pedagogical_judge", |
|
instructions="""You are an educational assessment specialist. |
|
Evaluate flashcards for pedagogical effectiveness, learning objectives, |
|
cognitive levels, and educational best practices.""", |
|
model="gpt-4.1", |
|
temperature=0.4, |
|
) |
|
|
|
super().__init__(base_config, openai_client) |
|
|
|
async def judge_card(self, card: Card) -> JudgeDecision: |
|
"""Judge a single card for pedagogical effectiveness""" |
|
datetime.now() |
|
|
|
try: |
|
user_input = self._build_judgment_prompt(card) |
|
response, usage = await self.execute(user_input) |
|
|
|
decision_data = ( |
|
json.loads(response) if isinstance(response, str) else response |
|
) |
|
decision = self._parse_decision(decision_data) |
|
|
|
|
|
logger.info(f"π {self.config.name.upper()} DECISION:") |
|
logger.info(f" Card: {card.front.question[:80]}...") |
|
logger.info(f" β
Approved: {decision.approved}") |
|
logger.info(f" π Score: {decision.score:.2f}") |
|
logger.info(f" π Feedback: {decision.feedback}") |
|
|
|
if decision.metadata and decision.metadata.get("cognitive_level"): |
|
logger.info( |
|
f" π§ Cognitive Level: {decision.metadata['cognitive_level']}" |
|
) |
|
if decision.metadata and decision.metadata.get("pedagogical_issues"): |
|
logger.info( |
|
f" β οΈ Pedagogical Issues: {decision.metadata['pedagogical_issues']}" |
|
) |
|
if decision.improvements: |
|
logger.info(f" π§ Suggested Improvements: {decision.improvements}") |
|
|
|
return decision |
|
|
|
except Exception as e: |
|
logger.error(f"PedagogicalJudge failed: {e}") |
|
return JudgeDecision( |
|
approved=True, |
|
score=0.5, |
|
feedback=f"Judgment failed: {str(e)}", |
|
judge_name=self.config.name, |
|
) |
|
|
|
def _build_judgment_prompt(self, card: Card) -> str: |
|
"""Build the judgment prompt for pedagogical effectiveness""" |
|
return f"""Evaluate this flashcard for pedagogical effectiveness: |
|
|
|
Card: |
|
Question: {card.front.question} |
|
Answer: {card.back.answer} |
|
Explanation: {card.back.explanation} |
|
Example: {card.back.example} |
|
Difficulty: {card.metadata.get('difficulty', 'Unknown')} |
|
|
|
Evaluate based on: |
|
1. Learning Objectives: Clear, measurable learning goals? |
|
2. Bloom's Taxonomy: Appropriate cognitive level? |
|
3. Cognitive Load: Manageable information load? |
|
4. Motivation: Engaging and relevant content? |
|
5. Assessment: Valid testing of understanding vs memorization? |
|
|
|
Return your assessment as JSON: |
|
{{ |
|
"approved": true/false, |
|
"pedagogical_score": 0.0-1.0, |
|
"cognitive_level": "remember|understand|apply|analyze|evaluate|create", |
|
"cognitive_load": "low|medium|high", |
|
"learning_objectives": ["objective1", "objective2"], |
|
"engagement_factors": ["factor1", "factor2"], |
|
"pedagogical_issues": ["issue1", "issue2"], |
|
"improvement_suggestions": ["suggestion1", "suggestion2"], |
|
"detailed_feedback": "Comprehensive pedagogical assessment" |
|
}}""" |
|
|
|
def _parse_decision(self, decision_data: Dict[str, Any]) -> JudgeDecision: |
|
"""Parse the judge response into a JudgeDecision""" |
|
return JudgeDecision( |
|
approved=decision_data.get("approved", True), |
|
score=decision_data.get("pedagogical_score", 0.5), |
|
feedback=decision_data.get("detailed_feedback", "No feedback provided"), |
|
improvements=decision_data.get("improvement_suggestions", []), |
|
judge_name=self.config.name, |
|
metadata={ |
|
"cognitive_level": decision_data.get("cognitive_level", "unknown"), |
|
"cognitive_load": decision_data.get("cognitive_load", "medium"), |
|
"learning_objectives": decision_data.get("learning_objectives", []), |
|
"engagement_factors": decision_data.get("engagement_factors", []), |
|
"pedagogical_issues": decision_data.get("pedagogical_issues", []), |
|
}, |
|
) |
|
|
|
|
|
class ClarityJudge(BaseAgentWrapper): |
|
"""Judge for clarity, readability, and communication effectiveness""" |
|
|
|
def __init__(self, openai_client: AsyncOpenAI): |
|
config_manager = get_config_manager() |
|
base_config = config_manager.get_agent_config("clarity_judge") |
|
|
|
if not base_config: |
|
base_config = AgentConfig( |
|
name="clarity_judge", |
|
instructions="""You are a communication and clarity specialist. |
|
Ensure flashcards are clear, unambiguous, well-written, and accessible |
|
to the target audience.""", |
|
model="gpt-4.1-mini", |
|
temperature=0.3, |
|
) |
|
|
|
super().__init__(base_config, openai_client) |
|
|
|
async def judge_card(self, card: Card) -> JudgeDecision: |
|
"""Judge a single card for clarity and communication""" |
|
datetime.now() |
|
|
|
try: |
|
user_input = self._build_judgment_prompt(card) |
|
response, usage = await self.execute(user_input) |
|
|
|
decision_data = ( |
|
json.loads(response) if isinstance(response, str) else response |
|
) |
|
decision = self._parse_decision(decision_data) |
|
|
|
|
|
logger.info(f"β¨ {self.config.name.upper()} DECISION:") |
|
logger.info(f" Card: {card.front.question[:80]}...") |
|
logger.info(f" β
Approved: {decision.approved}") |
|
logger.info(f" π Score: {decision.score:.2f}") |
|
logger.info(f" π Feedback: {decision.feedback}") |
|
|
|
if decision.metadata and decision.metadata.get("readability_level"): |
|
logger.info( |
|
f" π Readability: {decision.metadata['readability_level']}" |
|
) |
|
if decision.metadata and decision.metadata.get("ambiguities"): |
|
logger.info(f" β Ambiguities: {decision.metadata['ambiguities']}") |
|
if decision.improvements: |
|
logger.info(f" π§ Suggested Improvements: {decision.improvements}") |
|
|
|
return decision |
|
|
|
except Exception as e: |
|
logger.error(f"ClarityJudge failed: {e}") |
|
return JudgeDecision( |
|
approved=True, |
|
score=0.5, |
|
feedback=f"Judgment failed: {str(e)}", |
|
judge_name=self.config.name, |
|
) |
|
|
|
def _build_judgment_prompt(self, card: Card) -> str: |
|
"""Build the judgment prompt for clarity assessment""" |
|
return f"""Evaluate this flashcard for clarity and communication effectiveness: |
|
|
|
Card: |
|
Question: {card.front.question} |
|
Answer: {card.back.answer} |
|
Explanation: {card.back.explanation} |
|
Example: {card.back.example} |
|
|
|
Evaluate for: |
|
1. Question Clarity: Is the question clear and unambiguous? |
|
2. Answer Completeness: Is the answer complete and coherent? |
|
3. Language Level: Appropriate for target audience? |
|
4. Readability: Easy to read and understand? |
|
5. Structure: Well-organized and logical flow? |
|
|
|
Return your assessment as JSON: |
|
{{ |
|
"approved": true/false, |
|
"clarity_score": 0.0-1.0, |
|
"question_clarity": 0.0-1.0, |
|
"answer_completeness": 0.0-1.0, |
|
"readability_level": "elementary|middle|high|college", |
|
"ambiguities": ["ambiguity1", "ambiguity2"], |
|
"clarity_issues": ["issue1", "issue2"], |
|
"improvement_suggestions": ["suggestion1", "suggestion2"], |
|
"detailed_feedback": "Comprehensive clarity assessment" |
|
}}""" |
|
|
|
def _parse_decision(self, decision_data: Dict[str, Any]) -> JudgeDecision: |
|
"""Parse the judge response into a JudgeDecision""" |
|
return JudgeDecision( |
|
approved=decision_data.get("approved", True), |
|
score=decision_data.get("clarity_score", 0.5), |
|
feedback=decision_data.get("detailed_feedback", "No feedback provided"), |
|
improvements=decision_data.get("improvement_suggestions", []), |
|
judge_name=self.config.name, |
|
metadata={ |
|
"question_clarity": decision_data.get("question_clarity", 0.5), |
|
"answer_completeness": decision_data.get("answer_completeness", 0.5), |
|
"readability_level": decision_data.get("readability_level", "unknown"), |
|
"ambiguities": decision_data.get("ambiguities", []), |
|
"clarity_issues": decision_data.get("clarity_issues", []), |
|
}, |
|
) |
|
|
|
|
|
class TechnicalJudge(BaseAgentWrapper): |
|
"""Judge for technical accuracy in programming and technical content""" |
|
|
|
def __init__(self, openai_client: AsyncOpenAI): |
|
config_manager = get_config_manager() |
|
base_config = config_manager.get_agent_config("technical_judge") |
|
|
|
if not base_config: |
|
base_config = AgentConfig( |
|
name="technical_judge", |
|
instructions="""You are a technical accuracy specialist for programming and technical content. |
|
Verify code syntax, best practices, security considerations, and technical correctness.""", |
|
model="gpt-4.1", |
|
temperature=0.2, |
|
) |
|
|
|
super().__init__(base_config, openai_client) |
|
|
|
async def judge_card(self, card: Card) -> JudgeDecision: |
|
"""Judge a single card for technical accuracy""" |
|
datetime.now() |
|
|
|
try: |
|
|
|
if not self._is_technical_content(card): |
|
return JudgeDecision( |
|
approved=True, |
|
score=1.0, |
|
feedback="Non-technical content - no technical review needed", |
|
judge_name=self.config.name, |
|
) |
|
|
|
user_input = self._build_judgment_prompt(card) |
|
response, usage = await self.execute(user_input) |
|
|
|
decision_data = ( |
|
json.loads(response) if isinstance(response, str) else response |
|
) |
|
decision = self._parse_decision(decision_data) |
|
|
|
return decision |
|
|
|
except Exception as e: |
|
logger.error(f"TechnicalJudge failed: {e}") |
|
return JudgeDecision( |
|
approved=True, |
|
score=0.5, |
|
feedback=f"Technical judgment failed: {str(e)}", |
|
judge_name=self.config.name, |
|
) |
|
|
|
def _is_technical_content(self, card: Card) -> bool: |
|
"""Determine if card contains technical content requiring technical review""" |
|
technical_keywords = [ |
|
"code", |
|
"programming", |
|
"algorithm", |
|
"function", |
|
"class", |
|
"method", |
|
"syntax", |
|
"API", |
|
"database", |
|
"SQL", |
|
"python", |
|
"javascript", |
|
"java", |
|
"framework", |
|
"library", |
|
"development", |
|
"software", |
|
"technical", |
|
] |
|
|
|
content = ( |
|
f"{card.front.question} {card.back.answer} {card.back.explanation}".lower() |
|
) |
|
subject = card.metadata.get("subject", "").lower() |
|
|
|
return any( |
|
keyword in content or keyword in subject for keyword in technical_keywords |
|
) |
|
|
|
def _build_judgment_prompt(self, card: Card) -> str: |
|
"""Build the judgment prompt for technical accuracy""" |
|
return f"""Evaluate this technical flashcard for accuracy and best practices: |
|
|
|
Card: |
|
Question: {card.front.question} |
|
Answer: {card.back.answer} |
|
Explanation: {card.back.explanation} |
|
Example: {card.back.example} |
|
Subject: {card.metadata.get('subject', 'Unknown')} |
|
|
|
Evaluate for: |
|
1. Code Syntax: Is any code syntactically correct? |
|
2. Best Practices: Does it follow established best practices? |
|
3. Security: Are there security considerations addressed? |
|
4. Performance: Are performance implications mentioned where relevant? |
|
5. Tool Accuracy: Are tool/framework references accurate? |
|
|
|
Return your assessment as JSON: |
|
{{ |
|
"approved": true/false, |
|
"technical_score": 0.0-1.0, |
|
"syntax_errors": ["error1", "error2"], |
|
"best_practice_violations": ["violation1", "violation2"], |
|
"security_issues": ["issue1", "issue2"], |
|
"performance_concerns": ["concern1", "concern2"], |
|
"tool_inaccuracies": ["inaccuracy1", "inaccuracy2"], |
|
"improvement_suggestions": ["suggestion1", "suggestion2"], |
|
"detailed_feedback": "Comprehensive technical assessment" |
|
}}""" |
|
|
|
def _parse_decision(self, decision_data: Dict[str, Any]) -> JudgeDecision: |
|
"""Parse the judge response into a JudgeDecision""" |
|
return JudgeDecision( |
|
approved=decision_data.get("approved", True), |
|
score=decision_data.get("technical_score", 0.5), |
|
feedback=decision_data.get("detailed_feedback", "No feedback provided"), |
|
improvements=decision_data.get("improvement_suggestions", []), |
|
judge_name=self.config.name, |
|
metadata={ |
|
"syntax_errors": decision_data.get("syntax_errors", []), |
|
"best_practice_violations": decision_data.get( |
|
"best_practice_violations", [] |
|
), |
|
"security_issues": decision_data.get("security_issues", []), |
|
"performance_concerns": decision_data.get("performance_concerns", []), |
|
"tool_inaccuracies": decision_data.get("tool_inaccuracies", []), |
|
}, |
|
) |
|
|
|
|
|
class CompletenessJudge(BaseAgentWrapper): |
|
"""Judge for completeness and quality standards""" |
|
|
|
def __init__(self, openai_client: AsyncOpenAI): |
|
config_manager = get_config_manager() |
|
base_config = config_manager.get_agent_config("completeness_judge") |
|
|
|
if not base_config: |
|
base_config = AgentConfig( |
|
name="completeness_judge", |
|
instructions="""You are a completeness and quality assurance specialist. |
|
Ensure flashcards meet all requirements, have complete information, |
|
and maintain consistent quality standards.""", |
|
model="gpt-4.1-mini", |
|
temperature=0.3, |
|
) |
|
|
|
super().__init__(base_config, openai_client) |
|
|
|
async def judge_card(self, card: Card) -> JudgeDecision: |
|
"""Judge a single card for completeness""" |
|
datetime.now() |
|
|
|
try: |
|
user_input = self._build_judgment_prompt(card) |
|
response, usage = await self.execute(user_input) |
|
|
|
decision_data = ( |
|
json.loads(response) if isinstance(response, str) else response |
|
) |
|
decision = self._parse_decision(decision_data) |
|
|
|
return decision |
|
|
|
except Exception as e: |
|
logger.error(f"CompletenessJudge failed: {e}") |
|
return JudgeDecision( |
|
approved=True, |
|
score=0.5, |
|
feedback=f"Completeness judgment failed: {str(e)}", |
|
judge_name=self.config.name, |
|
) |
|
|
|
def _build_judgment_prompt(self, card: Card) -> str: |
|
"""Build the judgment prompt for completeness assessment""" |
|
return f"""Evaluate this flashcard for completeness and quality standards: |
|
|
|
Card: |
|
Question: {card.front.question} |
|
Answer: {card.back.answer} |
|
Explanation: {card.back.explanation} |
|
Example: {card.back.example} |
|
Type: {card.card_type} |
|
Metadata: {json.dumps(card.metadata, indent=2)} |
|
|
|
Check for: |
|
1. Required Fields: All necessary fields present and filled? |
|
2. Metadata Completeness: Appropriate tags, categorization, difficulty? |
|
3. Content Completeness: Answer, explanation, example present and sufficient? |
|
4. Quality Standards: Consistent formatting and professional quality? |
|
5. Example Relevance: Examples relevant and helpful? |
|
|
|
Return your assessment as JSON: |
|
{{ |
|
"approved": true/false, |
|
"completeness_score": 0.0-1.0, |
|
"missing_fields": ["field1", "field2"], |
|
"incomplete_sections": ["section1", "section2"], |
|
"metadata_issues": ["issue1", "issue2"], |
|
"quality_concerns": ["concern1", "concern2"], |
|
"improvement_suggestions": ["suggestion1", "suggestion2"], |
|
"detailed_feedback": "Comprehensive completeness assessment" |
|
}}""" |
|
|
|
def _parse_decision(self, decision_data: Dict[str, Any]) -> JudgeDecision: |
|
"""Parse the judge response into a JudgeDecision""" |
|
return JudgeDecision( |
|
approved=decision_data.get("approved", True), |
|
score=decision_data.get("completeness_score", 0.5), |
|
feedback=decision_data.get("detailed_feedback", "No feedback provided"), |
|
improvements=decision_data.get("improvement_suggestions", []), |
|
judge_name=self.config.name, |
|
metadata={ |
|
"missing_fields": decision_data.get("missing_fields", []), |
|
"incomplete_sections": decision_data.get("incomplete_sections", []), |
|
"metadata_issues": decision_data.get("metadata_issues", []), |
|
"quality_concerns": decision_data.get("quality_concerns", []), |
|
}, |
|
) |
|
|
|
|
|
class JudgeCoordinator(BaseAgentWrapper): |
|
"""Coordinates multiple judges and synthesizes their decisions""" |
|
|
|
def __init__(self, openai_client: AsyncOpenAI): |
|
config_manager = get_config_manager() |
|
base_config = config_manager.get_agent_config("judge_coordinator") |
|
|
|
if not base_config: |
|
base_config = AgentConfig( |
|
name="judge_coordinator", |
|
instructions="""You are the quality assurance coordinator. |
|
Orchestrate the judging process and synthesize feedback from specialist judges. |
|
Balance speed with thoroughness in quality assessment.""", |
|
model="gpt-4.1-mini", |
|
temperature=0.3, |
|
) |
|
|
|
super().__init__(base_config, openai_client) |
|
|
|
|
|
self.content_accuracy = ContentAccuracyJudge(openai_client) |
|
self.pedagogical = PedagogicalJudge(openai_client) |
|
self.clarity = ClarityJudge(openai_client) |
|
self.technical = TechnicalJudge(openai_client) |
|
self.completeness = CompletenessJudge(openai_client) |
|
|
|
async def coordinate_judgment( |
|
self, |
|
cards: List[Card], |
|
enable_parallel: bool = True, |
|
min_consensus: float = 0.6, |
|
) -> List[Tuple[Card, List[JudgeDecision], bool]]: |
|
"""Coordinate judgment of multiple cards""" |
|
datetime.now() |
|
|
|
try: |
|
results = [] |
|
|
|
if enable_parallel: |
|
|
|
tasks = [self._judge_single_card(card, min_consensus) for card in cards] |
|
card_results = await asyncio.gather(*tasks, return_exceptions=True) |
|
|
|
for card, result in zip(cards, card_results): |
|
if isinstance(result, Exception): |
|
logger.error(f"Parallel judgment failed for card: {result}") |
|
results.append((card, [], False)) |
|
else: |
|
results.append(result) |
|
else: |
|
|
|
for card in cards: |
|
try: |
|
result = await self._judge_single_card(card, min_consensus) |
|
results.append(result) |
|
except Exception as e: |
|
logger.error(f"Sequential judgment failed for card: {e}") |
|
results.append((card, [], False)) |
|
|
|
|
|
total_cards = len(cards) |
|
approved_cards = len([result for _, _, approved in results if approved]) |
|
|
|
logger.info( |
|
f"Judge coordination complete: {approved_cards}/{total_cards} cards approved" |
|
) |
|
return results |
|
|
|
except Exception as e: |
|
logger.error(f"Judge coordination failed: {e}") |
|
raise |
|
|
|
async def _judge_single_card( |
|
self, card: Card, min_consensus: float |
|
) -> Tuple[Card, List[JudgeDecision], bool]: |
|
"""Judge a single card with all relevant judges""" |
|
|
|
|
|
judges = [ |
|
self.content_accuracy, |
|
self.pedagogical, |
|
self.clarity, |
|
self.completeness, |
|
] |
|
|
|
|
|
if self.technical._is_technical_content(card): |
|
judges.append(self.technical) |
|
|
|
|
|
judge_tasks = [judge.judge_card(card) for judge in judges] |
|
decisions = await asyncio.gather(*judge_tasks, return_exceptions=True) |
|
|
|
|
|
valid_decisions = [] |
|
for decision in decisions: |
|
if isinstance(decision, JudgeDecision): |
|
valid_decisions.append(decision) |
|
else: |
|
logger.warning(f"Judge decision failed: {decision}") |
|
|
|
|
|
if not valid_decisions: |
|
return (card, [], False) |
|
|
|
approval_votes = len([d for d in valid_decisions if d.approved]) |
|
consensus_score = approval_votes / len(valid_decisions) |
|
|
|
|
|
final_approval = consensus_score >= min_consensus |
|
|
|
|
|
logger.info("ποΈ JUDGE COORDINATION RESULT:") |
|
logger.info(f" Card: {card.front.question[:80]}...") |
|
logger.info(f" π₯ Judges Consulted: {len(valid_decisions)}") |
|
logger.info(f" β
Approval Votes: {approval_votes}/{len(valid_decisions)}") |
|
logger.info( |
|
f" π Consensus Score: {consensus_score:.2f} (min: {min_consensus:.2f})" |
|
) |
|
logger.info( |
|
f" π Final Decision: {'APPROVED' if final_approval else 'REJECTED'}" |
|
) |
|
|
|
if not final_approval: |
|
logger.info(" π Rejection Reasons:") |
|
for decision in valid_decisions: |
|
if not decision.approved: |
|
logger.info( |
|
f" β’ {decision.judge_name}: {decision.feedback[:100]}..." |
|
) |
|
|
|
return (card, valid_decisions, final_approval) |
|
|