File size: 13,739 Bytes
d09f6aa 100024e d09f6aa 100024e 0333a17 d09f6aa 313f83b d09f6aa 313f83b 56fd459 d09f6aa 313f83b d09f6aa 313f83b d09f6aa 0333a17 07fe6c3 100024e d09f6aa 07fe6c3 0333a17 d09f6aa 56fd459 313f83b 0333a17 313f83b 0333a17 313f83b 0333a17 88b029f 313f83b 56fd459 313f83b 56fd459 313f83b 56fd459 313f83b 56fd459 313f83b 0333a17 313f83b 0333a17 313f83b 0333a17 d09f6aa 100024e 0333a17 d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e 07fe6c3 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e 313f83b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 |
# Module for core card generation logic
import gradio as gr
import pandas as pd
from typing import List, Dict, Any
# Imports from our core modules
from ankigen_core.utils import (
get_logger,
ResponseCache,
strip_html_tags,
)
from ankigen_core.llm_interface import OpenAIClientManager
from ankigen_core.models import (
Card,
) # Import necessary Pydantic models
# Import agent system - required
from ankigen_core.agents.integration import AgentOrchestrator
from agents import set_tracing_disabled
logger = get_logger()
# Disable tracing to prevent metrics persistence issues
set_tracing_disabled(True)
AGENTS_AVAILABLE = True
logger.info("Agent system loaded successfully")
# --- Constants --- (Moved from app.py)
AVAILABLE_MODELS = [
{
"value": "gpt-4.1",
"label": "GPT-4.1 (Best Quality)",
"description": "Highest quality, large context window",
},
{
"value": "gpt-4.1-nano",
"label": "GPT-4.1 Nano (Ultra Fast)",
"description": "Ultra-fast and cost-effective",
},
]
GENERATION_MODES = [
{
"value": "subject",
"label": "Single Subject",
"description": "Generate cards for a specific topic",
},
{
"value": "path",
"label": "Learning Path",
"description": "Break down a job description or learning goal into subjects",
},
{
"value": "text",
"label": "From Text",
"description": "Generate cards from provided text",
},
{
"value": "web",
"label": "From Web",
"description": "Generate cards from a web page URL",
},
]
# --- Core Functions --- (Moved and adapted from app.py)
# Legacy functions removed - all card generation now handled by agent system
async def orchestrate_card_generation( # MODIFIED: Added async
client_manager: OpenAIClientManager, # Expect the manager
cache: ResponseCache, # Expect the cache instance
# --- UI Inputs --- (These will be passed from app.py handler)
api_key_input: str,
subject: str,
generation_mode: str,
source_text: str,
url_input: str,
model_name: str,
topic_number: int,
cards_per_topic: int,
preference_prompt: str,
generate_cloze: bool,
use_llm_judge: bool = False,
library_name: str = None,
library_topic: str = None,
):
"""Orchestrates the card generation process based on UI inputs."""
logger.info(f"Starting card generation orchestration in {generation_mode} mode")
logger.debug(
f"Parameters: mode={generation_mode}, topics={topic_number}, cards_per_topic={cards_per_topic}, cloze={generate_cloze}"
)
# --- AGENT SYSTEM INTEGRATION ---
if AGENTS_AVAILABLE:
logger.info("π€ Using agent system for card generation")
try:
from ankigen_core.agents.token_tracker import get_token_tracker
token_tracker = get_token_tracker()
orchestrator = AgentOrchestrator(client_manager)
logger.info(f"Using {model_name} for SubjectExpertAgent")
await orchestrator.initialize(api_key_input, {"subject_expert": model_name})
# Map generation mode to subject
agent_subject = "general"
if generation_mode == "subject":
agent_subject = subject if subject else "general"
elif generation_mode == "path":
agent_subject = "curriculum_design"
elif generation_mode == "text":
agent_subject = "content_analysis"
total_cards_needed = topic_number * cards_per_topic
context = {}
if generation_mode == "text" and source_text:
context["source_text"] = source_text
agent_cards, agent_metadata = await orchestrator.generate_cards_with_agents(
topic=subject if subject else "Mixed Topics",
subject=agent_subject,
num_cards=total_cards_needed,
difficulty="intermediate",
context=context,
library_name=library_name,
library_topic=library_topic,
generate_cloze=generate_cloze,
)
# Get token usage from session
try:
# Try both method names for compatibility
if hasattr(token_tracker, "get_session_summary"):
token_usage = token_tracker.get_session_summary()
elif hasattr(token_tracker, "get_session_usage"):
token_usage = token_tracker.get_session_usage()
else:
raise AttributeError("TokenTracker has no session summary method")
token_usage_html = f"<div style='margin-top: 8px;'><b>Token Usage:</b> {token_usage['total_tokens']} tokens</div>"
except Exception as e:
logger.error(f"Token usage collection failed: {e}")
token_usage_html = "<div style='margin-top: 8px;'><b>Token Usage:</b> No usage data</div>"
# Convert agent cards to dataframe format
if agent_cards:
formatted_cards = format_cards_for_dataframe(
agent_cards,
topic_name=subject if subject else "General",
start_index=1,
)
output_df = pd.DataFrame(
formatted_cards, columns=get_dataframe_columns()
)
total_cards_message = f"<div><b>Cards Generated:</b> <span id='total-cards-count'>{len(output_df)}</span></div>"
logger.info(
f"Agent system generated {len(output_df)} cards successfully"
)
return output_df, total_cards_message, token_usage_html
else:
logger.error("Agent system returned no cards")
gr.Error("π€ Agent system returned no cards")
return (
pd.DataFrame(columns=get_dataframe_columns()),
"Agent system returned no cards.",
"",
)
except Exception as e:
logger.error(f"Agent system failed: {e}")
gr.Error(f"π€ Agent system error: {str(e)}")
return (
pd.DataFrame(columns=get_dataframe_columns()),
f"Agent system error: {str(e)}",
"",
)
# Agent system is required and should never fail to be available
logger.error("Agent system failed but is required - this should not happen")
gr.Error("Agent system is required but not available")
return (
pd.DataFrame(columns=get_dataframe_columns()),
"Agent system error",
"",
)
# Legacy helper functions removed - all processing now handled by agent system
# --- Formatting and Utility Functions --- (Moved and adapted)
def format_cards_for_dataframe(
cards: list[Card], topic_name: str, topic_index: int = 0, start_index: int = 1
) -> list:
"""Formats a list of Card objects into a list of dictionaries for DataFrame display.
Ensures all data is plain text.
"""
formatted_cards = []
for i, card_obj in enumerate(cards):
actual_index = start_index + i
card_type = card_obj.card_type or "basic"
question = card_obj.front.question or ""
answer = card_obj.back.answer or ""
explanation = card_obj.back.explanation or ""
example = card_obj.back.example or ""
# Metadata processing
metadata = card_obj.metadata or {}
prerequisites = metadata.get("prerequisites", [])
learning_outcomes = metadata.get("learning_outcomes", [])
difficulty = metadata.get("difficulty", "N/A")
# Ensure list-based metadata are joined as plain strings for DataFrame
prerequisites_str = strip_html_tags(
", ".join(prerequisites)
if isinstance(prerequisites, list)
else str(prerequisites)
)
learning_outcomes_str = strip_html_tags(
", ".join(learning_outcomes)
if isinstance(learning_outcomes, list)
else str(learning_outcomes)
)
difficulty_str = strip_html_tags(str(difficulty))
formatted_card = {
"Index": (
f"{topic_index}.{actual_index}"
if topic_index > 0
else str(actual_index)
),
"Topic": strip_html_tags(topic_name), # Ensure topic is also plain
"Card_Type": strip_html_tags(card_type),
"Question": question, # Already stripped during Card object creation
"Answer": answer, # Already stripped
"Explanation": explanation, # Already stripped
"Example": example, # Already stripped
"Prerequisites": prerequisites_str,
"Learning_Outcomes": learning_outcomes_str,
"Difficulty": difficulty_str, # Ensure difficulty is plain text
"Source_URL": strip_html_tags(
metadata.get("source_url", "")
), # Ensure Source_URL is plain
}
formatted_cards.append(formatted_card)
return formatted_cards
def get_dataframe_columns() -> list[str]:
"""Returns the standard list of columns for the Anki card DataFrame."""
return [
"Index",
"Topic",
"Card_Type",
"Question",
"Answer",
"Explanation",
"Example",
"Prerequisites",
"Learning_Outcomes",
"Difficulty",
"Source_URL",
]
# This function might be specific to the old crawler flow if AnkiCardData is only from there.
# If orchestrate_card_generation now also produces something convertible to AnkiCardData, it might be useful.
# For now, it's used by generate_cards_from_crawled_content.
def deduplicate_cards(cards: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Deduplicates a list of card dictionaries based on the 'Question' field."""
seen_questions = set()
unique_cards = []
for card_dict in cards:
question = card_dict.get("Question")
if question is None: # Should not happen if cards are well-formed
logger.warning(f"Card dictionary missing 'Question' key: {card_dict}")
unique_cards.append(card_dict) # Keep it if no question to dedupe on
continue
# Normalize whitespace and case for deduplication
normalized_question = " ".join(str(question).strip().lower().split())
if normalized_question not in seen_questions:
seen_questions.add(normalized_question)
unique_cards.append(card_dict)
else:
logger.info(f"Deduplicated card with question: {question}")
return unique_cards
# --- Modification for generate_cards_from_crawled_content ---
def generate_cards_from_crawled_content(
all_cards: List[Card],
) -> List[Dict[str, Any]]: # Changed AnkiCardData to Card
"""
Processes a list of Card objects (expected to have plain text fields after generate_cards_batch)
and formats them into a list of dictionaries suitable for the DataFrame.
"""
if not all_cards:
return []
data_for_dataframe = []
for i, card_obj in enumerate(all_cards):
# Extract data, assuming it's already plain text from Card object creation
topic = (
card_obj.metadata.get("topic", f"Crawled Content - Card {i+1}")
if card_obj.metadata
else f"Crawled Content - Card {i+1}"
)
# Ensure list-based metadata are joined as plain strings for DataFrame
prerequisites = (
card_obj.metadata.get("prerequisites", []) if card_obj.metadata else []
)
learning_outcomes = (
card_obj.metadata.get("learning_outcomes", []) if card_obj.metadata else []
)
prerequisites_str = strip_html_tags(
", ".join(prerequisites)
if isinstance(prerequisites, list)
else str(prerequisites)
)
learning_outcomes_str = strip_html_tags(
", ".join(learning_outcomes)
if isinstance(learning_outcomes, list)
else str(learning_outcomes)
)
difficulty_str = strip_html_tags(
str(
card_obj.metadata.get("difficulty", "N/A")
if card_obj.metadata
else "N/A"
)
)
card_dict = {
"Index": str(i + 1),
"Topic": strip_html_tags(topic),
"Card_Type": strip_html_tags(card_obj.card_type or "basic"),
"Question": card_obj.front.question or "", # Should be plain
"Answer": card_obj.back.answer or "", # Should be plain
"Explanation": card_obj.back.explanation or "", # Should be plain
"Example": card_obj.back.example or "", # Should be plain
"Prerequisites": prerequisites_str,
"Learning_Outcomes": learning_outcomes_str,
"Difficulty": difficulty_str,
"Source_URL": strip_html_tags(
card_obj.metadata.get("source_url", "") if card_obj.metadata else ""
),
}
data_for_dataframe.append(card_dict)
return data_for_dataframe
def generate_token_usage_html(token_usage=None):
"""Generate HTML for token usage display"""
if token_usage and isinstance(token_usage, dict):
total_tokens = token_usage.get("total_tokens", 0)
return f"<div style='margin-top: 8px;'><b>Token Usage:</b> {total_tokens} tokens</div>"
else:
return "<div style='margin-top: 8px;'><b>Token Usage:</b> No usage data</div>"
|