Upload folder using huggingface_hub
Browse files- ankigen_core/card_generator.py +0 -101
- ankigen_core/exceptions.py +0 -18
- ankigen_core/llm_interface.py +1 -356
- ankigen_core/models.py +1 -12
- ankigen_core/ui_logic.py +23 -476
- app.py +10 -208
ankigen_core/card_generator.py
CHANGED
|
@@ -52,16 +52,6 @@ GENERATION_MODES = [
|
|
| 52 |
"label": "Single Subject",
|
| 53 |
"description": "Generate cards for a specific topic",
|
| 54 |
},
|
| 55 |
-
{
|
| 56 |
-
"value": "text",
|
| 57 |
-
"label": "From Text",
|
| 58 |
-
"description": "Generate cards from provided text",
|
| 59 |
-
},
|
| 60 |
-
{
|
| 61 |
-
"value": "web",
|
| 62 |
-
"label": "From Web",
|
| 63 |
-
"description": "Generate cards from a web page URL",
|
| 64 |
-
},
|
| 65 |
]
|
| 66 |
|
| 67 |
# --- Core Functions --- (Moved and adapted from app.py)
|
|
@@ -279,97 +269,6 @@ def get_dataframe_columns() -> list[str]:
|
|
| 279 |
]
|
| 280 |
|
| 281 |
|
| 282 |
-
# This function might be specific to the old crawler flow if AnkiCardData is only from there.
|
| 283 |
-
# If orchestrate_card_generation now also produces something convertible to AnkiCardData, it might be useful.
|
| 284 |
-
# For now, it's used by generate_cards_from_crawled_content.
|
| 285 |
-
def deduplicate_cards(cards: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
| 286 |
-
"""Deduplicates a list of card dictionaries based on the 'Question' field."""
|
| 287 |
-
seen_questions = set()
|
| 288 |
-
unique_cards = []
|
| 289 |
-
for card_dict in cards:
|
| 290 |
-
question = card_dict.get("Question")
|
| 291 |
-
if question is None: # Should not happen if cards are well-formed
|
| 292 |
-
logger.warning(f"Card dictionary missing 'Question' key: {card_dict}")
|
| 293 |
-
unique_cards.append(card_dict) # Keep it if no question to dedupe on
|
| 294 |
-
continue
|
| 295 |
-
|
| 296 |
-
# Normalize whitespace and case for deduplication
|
| 297 |
-
normalized_question = " ".join(str(question).strip().lower().split())
|
| 298 |
-
if normalized_question not in seen_questions:
|
| 299 |
-
seen_questions.add(normalized_question)
|
| 300 |
-
unique_cards.append(card_dict)
|
| 301 |
-
else:
|
| 302 |
-
logger.info(f"Deduplicated card with question: {question}")
|
| 303 |
-
return unique_cards
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
# --- Modification for generate_cards_from_crawled_content ---
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
def generate_cards_from_crawled_content(
|
| 310 |
-
all_cards: List[Card],
|
| 311 |
-
) -> List[Dict[str, Any]]: # Changed AnkiCardData to Card
|
| 312 |
-
"""
|
| 313 |
-
Processes a list of Card objects (expected to have plain text fields after generate_cards_batch)
|
| 314 |
-
and formats them into a list of dictionaries suitable for the DataFrame.
|
| 315 |
-
"""
|
| 316 |
-
if not all_cards:
|
| 317 |
-
return []
|
| 318 |
-
|
| 319 |
-
data_for_dataframe = []
|
| 320 |
-
for i, card_obj in enumerate(all_cards):
|
| 321 |
-
# Extract data, assuming it's already plain text from Card object creation
|
| 322 |
-
topic = (
|
| 323 |
-
card_obj.metadata.get("topic", f"Crawled Content - Card {i + 1}")
|
| 324 |
-
if card_obj.metadata
|
| 325 |
-
else f"Crawled Content - Card {i + 1}"
|
| 326 |
-
)
|
| 327 |
-
|
| 328 |
-
# Ensure list-based metadata are joined as plain strings for DataFrame
|
| 329 |
-
prerequisites = (
|
| 330 |
-
card_obj.metadata.get("prerequisites", []) if card_obj.metadata else []
|
| 331 |
-
)
|
| 332 |
-
learning_outcomes = (
|
| 333 |
-
card_obj.metadata.get("learning_outcomes", []) if card_obj.metadata else []
|
| 334 |
-
)
|
| 335 |
-
|
| 336 |
-
prerequisites_str = strip_html_tags(
|
| 337 |
-
", ".join(prerequisites)
|
| 338 |
-
if isinstance(prerequisites, list)
|
| 339 |
-
else str(prerequisites)
|
| 340 |
-
)
|
| 341 |
-
learning_outcomes_str = strip_html_tags(
|
| 342 |
-
", ".join(learning_outcomes)
|
| 343 |
-
if isinstance(learning_outcomes, list)
|
| 344 |
-
else str(learning_outcomes)
|
| 345 |
-
)
|
| 346 |
-
difficulty_str = strip_html_tags(
|
| 347 |
-
str(
|
| 348 |
-
card_obj.metadata.get("difficulty", "N/A")
|
| 349 |
-
if card_obj.metadata
|
| 350 |
-
else "N/A"
|
| 351 |
-
)
|
| 352 |
-
)
|
| 353 |
-
|
| 354 |
-
card_dict = {
|
| 355 |
-
"Index": str(i + 1),
|
| 356 |
-
"Topic": strip_html_tags(topic),
|
| 357 |
-
"Card_Type": strip_html_tags(card_obj.card_type or "basic"),
|
| 358 |
-
"Question": card_obj.front.question or "", # Should be plain
|
| 359 |
-
"Answer": card_obj.back.answer or "", # Should be plain
|
| 360 |
-
"Explanation": card_obj.back.explanation or "", # Should be plain
|
| 361 |
-
"Example": card_obj.back.example or "", # Should be plain
|
| 362 |
-
"Prerequisites": prerequisites_str,
|
| 363 |
-
"Learning_Outcomes": learning_outcomes_str,
|
| 364 |
-
"Difficulty": difficulty_str,
|
| 365 |
-
"Source_URL": strip_html_tags(
|
| 366 |
-
card_obj.metadata.get("source_url", "") if card_obj.metadata else ""
|
| 367 |
-
),
|
| 368 |
-
}
|
| 369 |
-
data_for_dataframe.append(card_dict)
|
| 370 |
-
return data_for_dataframe
|
| 371 |
-
|
| 372 |
-
|
| 373 |
def generate_token_usage_html(token_usage=None):
|
| 374 |
"""Generate HTML for token usage display"""
|
| 375 |
if token_usage and isinstance(token_usage, dict):
|
|
|
|
| 52 |
"label": "Single Subject",
|
| 53 |
"description": "Generate cards for a specific topic",
|
| 54 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
]
|
| 56 |
|
| 57 |
# --- Core Functions --- (Moved and adapted from app.py)
|
|
|
|
| 269 |
]
|
| 270 |
|
| 271 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
def generate_token_usage_html(token_usage=None):
|
| 273 |
"""Generate HTML for token usage display"""
|
| 274 |
if token_usage and isinstance(token_usage, dict):
|
ankigen_core/exceptions.py
CHANGED
|
@@ -41,24 +41,6 @@ class Context7APIError(APIError):
|
|
| 41 |
pass
|
| 42 |
|
| 43 |
|
| 44 |
-
class CrawlerError(AnkigenError):
|
| 45 |
-
"""Base exception for web crawler errors."""
|
| 46 |
-
|
| 47 |
-
pass
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
class URLValidationError(CrawlerError):
|
| 51 |
-
"""Raised when URL validation fails."""
|
| 52 |
-
|
| 53 |
-
pass
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
class ContentExtractionError(CrawlerError):
|
| 57 |
-
"""Raised when content extraction from web page fails."""
|
| 58 |
-
|
| 59 |
-
pass
|
| 60 |
-
|
| 61 |
-
|
| 62 |
class ExportError(AnkigenError):
|
| 63 |
"""Base exception for export-related errors."""
|
| 64 |
|
|
|
|
| 41 |
pass
|
| 42 |
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
class ExportError(AnkigenError):
|
| 45 |
"""Base exception for export-related errors."""
|
| 46 |
|
ankigen_core/llm_interface.py
CHANGED
|
@@ -2,9 +2,8 @@
|
|
| 2 |
|
| 3 |
import asyncio
|
| 4 |
import time
|
| 5 |
-
from typing import
|
| 6 |
|
| 7 |
-
import tiktoken
|
| 8 |
from agents import Agent, ModelSettings, Runner, set_default_openai_client
|
| 9 |
from openai import (
|
| 10 |
APIConnectionError,
|
|
@@ -14,15 +13,8 @@ from openai import (
|
|
| 14 |
RateLimitError,
|
| 15 |
)
|
| 16 |
from pydantic import BaseModel
|
| 17 |
-
from tenacity import (
|
| 18 |
-
retry,
|
| 19 |
-
retry_if_exception_type,
|
| 20 |
-
stop_after_attempt,
|
| 21 |
-
wait_exponential,
|
| 22 |
-
)
|
| 23 |
|
| 24 |
from ankigen_core.logging import logger
|
| 25 |
-
from ankigen_core.models import Card, CardBack, CardFront, CrawledPage
|
| 26 |
from ankigen_core.utils import ResponseCache
|
| 27 |
|
| 28 |
T = TypeVar("T", bound=BaseModel)
|
|
@@ -343,350 +335,3 @@ class OpenAIRateLimiter:
|
|
| 343 |
# This assumes a single rate limit bucket for all calls from this application instance.
|
| 344 |
# More sophisticated scenarios might need per-model or per-key limiters.
|
| 345 |
openai_rate_limiter = OpenAIRateLimiter() # Using default 60k TPM for now
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
@retry(
|
| 349 |
-
stop=stop_after_attempt(3),
|
| 350 |
-
wait=wait_exponential(multiplier=1, min=2, max=10),
|
| 351 |
-
retry=retry_if_exception_type(RETRYABLE_OPENAI_ERRORS),
|
| 352 |
-
before_sleep=lambda retry_state: logger.warning(
|
| 353 |
-
f"Retrying OpenAI call (attempt {retry_state.attempt_number}) for process_crawled_page due to {retry_state.outcome.exception() if retry_state.outcome else 'unknown reason'}"
|
| 354 |
-
),
|
| 355 |
-
)
|
| 356 |
-
async def process_crawled_page(
|
| 357 |
-
openai_client: AsyncOpenAI,
|
| 358 |
-
page: CrawledPage,
|
| 359 |
-
model: str = "gpt-4o",
|
| 360 |
-
custom_system_prompt: Optional[str] = None,
|
| 361 |
-
custom_user_prompt_template: Optional[str] = None,
|
| 362 |
-
max_prompt_content_tokens: int = 6000,
|
| 363 |
-
cache: Optional[ResponseCache] = None,
|
| 364 |
-
) -> List[Card]:
|
| 365 |
-
"""Process a crawled page and extract structured Card objects using OpenAI.
|
| 366 |
-
|
| 367 |
-
Args:
|
| 368 |
-
openai_client: The OpenAI client instance
|
| 369 |
-
page: The crawled page to process
|
| 370 |
-
model: The model to use for generation
|
| 371 |
-
custom_system_prompt: Optional custom system prompt
|
| 372 |
-
custom_user_prompt_template: Optional custom user prompt template
|
| 373 |
-
max_prompt_content_tokens: Maximum tokens for content
|
| 374 |
-
cache: Optional ResponseCache for page-level caching
|
| 375 |
-
|
| 376 |
-
Returns:
|
| 377 |
-
List of generated Card objects
|
| 378 |
-
"""
|
| 379 |
-
# Check page-level cache first
|
| 380 |
-
if cache:
|
| 381 |
-
cache_key = f"{page.url}:{model}"
|
| 382 |
-
cached_cards = cache.get(cache_key, "page_cache")
|
| 383 |
-
if cached_cards is not None:
|
| 384 |
-
logger.info(f"Using cached cards for page: {page.url}")
|
| 385 |
-
return cached_cards
|
| 386 |
-
|
| 387 |
-
logger.info(
|
| 388 |
-
f"Processing page: {page.url} with model {model}, max_prompt_content_tokens: {max_prompt_content_tokens}"
|
| 389 |
-
)
|
| 390 |
-
|
| 391 |
-
if not page.text_content or not page.text_content.strip():
|
| 392 |
-
logger.info(f"Skipping page {page.url} as it has empty text content.")
|
| 393 |
-
return []
|
| 394 |
-
|
| 395 |
-
system_prompt = (
|
| 396 |
-
custom_system_prompt
|
| 397 |
-
if custom_system_prompt and custom_system_prompt.strip()
|
| 398 |
-
else """
|
| 399 |
-
You are an expert Anki card creator. Your task is to generate Anki flashcards from the provided web page content.
|
| 400 |
-
For each card, provide:
|
| 401 |
-
- "front": A dictionary with a "question" field.
|
| 402 |
-
- "back": A dictionary with "answer", "explanation", and "example" fields.
|
| 403 |
-
- "tags": A list of relevant keywords (optional).
|
| 404 |
-
- "source_url": The URL of the page the content was extracted from (this will be provided by the system).
|
| 405 |
-
- "note_type": Specify "Basic" for question/answer cards or "Cloze" for cloze deletion cards. (This will be mapped to "card_type").
|
| 406 |
-
- "metadata": An optional dictionary for additional structured information such as:
|
| 407 |
-
- "prerequisites": ["list", "of", "prerequisites"]
|
| 408 |
-
- "learning_outcomes": ["list", "of", "learning", "outcomes"]
|
| 409 |
-
- "common_misconceptions": ["list", "of", "common", "misconceptions"]
|
| 410 |
-
- "difficulty": "beginner" | "intermediate" | "advanced"
|
| 411 |
-
- "topic": "The main topic this card relates to, derived from the content"
|
| 412 |
-
|
| 413 |
-
Focus on creating clear, concise, and accurate cards that are useful for learning.
|
| 414 |
-
If generating cloze cards, ensure the "front.question" field uses Anki's cloze syntax, e.g., "The capital of {{c1::France}} is Paris."
|
| 415 |
-
Ensure the entire response is a valid JSON object following this structure:
|
| 416 |
-
{
|
| 417 |
-
"cards": [
|
| 418 |
-
{
|
| 419 |
-
"front": {"question": "..."},
|
| 420 |
-
"back": {"answer": "...", "explanation": "...", "example": "..."},
|
| 421 |
-
"tags": ["...", "..."],
|
| 422 |
-
"card_type": "Basic",
|
| 423 |
-
"metadata": {"difficulty": "beginner", "prerequisites": [], "topic": "..."}
|
| 424 |
-
},
|
| 425 |
-
// ... more cards
|
| 426 |
-
]
|
| 427 |
-
}
|
| 428 |
-
"""
|
| 429 |
-
)
|
| 430 |
-
|
| 431 |
-
# User Prompt
|
| 432 |
-
default_user_prompt_template = """
|
| 433 |
-
Please generate Anki cards based on the following content from the URL: {url}
|
| 434 |
-
|
| 435 |
-
Content:
|
| 436 |
-
{content}
|
| 437 |
-
|
| 438 |
-
Generate a few high-quality Anki cards from this content.
|
| 439 |
-
"""
|
| 440 |
-
user_prompt: str
|
| 441 |
-
if custom_user_prompt_template and custom_user_prompt_template.strip():
|
| 442 |
-
try:
|
| 443 |
-
user_prompt = custom_user_prompt_template.format(
|
| 444 |
-
url=page.url, content=page.text_content
|
| 445 |
-
)
|
| 446 |
-
except KeyError as e:
|
| 447 |
-
logger.warning(
|
| 448 |
-
f"Custom user prompt template for {page.url} is malformed (missing key {e}). Falling back to default."
|
| 449 |
-
)
|
| 450 |
-
user_prompt = default_user_prompt_template.format(
|
| 451 |
-
url=page.url, content=page.text_content
|
| 452 |
-
)
|
| 453 |
-
else:
|
| 454 |
-
user_prompt = default_user_prompt_template.format(
|
| 455 |
-
url=page.url, content=page.text_content
|
| 456 |
-
)
|
| 457 |
-
# --- End Prompt Definition ---
|
| 458 |
-
|
| 459 |
-
try:
|
| 460 |
-
encoding = tiktoken.encoding_for_model(model)
|
| 461 |
-
except KeyError:
|
| 462 |
-
logger.warning(
|
| 463 |
-
f"Tiktoken model {model} not found, using cl100k_base for token estimation and truncation."
|
| 464 |
-
)
|
| 465 |
-
encoding = tiktoken.get_encoding("cl100k_base")
|
| 466 |
-
|
| 467 |
-
prompt_structure_tokens = len(encoding.encode(system_prompt + user_prompt))
|
| 468 |
-
available_tokens_for_content = max_prompt_content_tokens - prompt_structure_tokens
|
| 469 |
-
if available_tokens_for_content <= 0:
|
| 470 |
-
logger.error(
|
| 471 |
-
f"Max prompt tokens ({max_prompt_content_tokens}) too small for prompt structure for page {page.url}. Cannot process."
|
| 472 |
-
)
|
| 473 |
-
return []
|
| 474 |
-
|
| 475 |
-
page_content_for_prompt = page.text_content or ""
|
| 476 |
-
content_tokens = encoding.encode(page_content_for_prompt)
|
| 477 |
-
if len(content_tokens) > available_tokens_for_content:
|
| 478 |
-
truncated_content_tokens = content_tokens[:available_tokens_for_content]
|
| 479 |
-
page_content_for_prompt = encoding.decode(truncated_content_tokens)
|
| 480 |
-
logger.warning(
|
| 481 |
-
f"Content for page {page.url} was truncated from {len(content_tokens)} tokens "
|
| 482 |
-
f"to {len(truncated_content_tokens)} tokens to fit model's context window (limit: {max_prompt_content_tokens} for content portion)."
|
| 483 |
-
)
|
| 484 |
-
|
| 485 |
-
estimated_request_tokens = prompt_structure_tokens + len(
|
| 486 |
-
encoding.encode(page_content_for_prompt)
|
| 487 |
-
)
|
| 488 |
-
await openai_rate_limiter.wait_if_needed(estimated_request_tokens)
|
| 489 |
-
|
| 490 |
-
try:
|
| 491 |
-
logger.debug(
|
| 492 |
-
f"Attempting to generate cards for {page.url} using model {model}."
|
| 493 |
-
)
|
| 494 |
-
|
| 495 |
-
# Use agents SDK for structured output
|
| 496 |
-
result = await structured_agent_call(
|
| 497 |
-
openai_client=openai_client,
|
| 498 |
-
model=model,
|
| 499 |
-
instructions=system_prompt,
|
| 500 |
-
user_input=user_prompt,
|
| 501 |
-
output_type=GenericJsonOutput, # Flexible schema for card generation
|
| 502 |
-
temperature=0.5,
|
| 503 |
-
timeout=120.0,
|
| 504 |
-
)
|
| 505 |
-
|
| 506 |
-
if result is None:
|
| 507 |
-
logger.error(f"Invalid or empty response from agent for page {page.url}.")
|
| 508 |
-
return []
|
| 509 |
-
|
| 510 |
-
# Convert Pydantic model to dict for processing
|
| 511 |
-
parsed_cards = result.model_dump() if isinstance(result, BaseModel) else result
|
| 512 |
-
|
| 513 |
-
validated_cards: List[Card] = []
|
| 514 |
-
|
| 515 |
-
cards_list_from_json = []
|
| 516 |
-
if (
|
| 517 |
-
isinstance(parsed_cards, dict)
|
| 518 |
-
and "cards" in parsed_cards
|
| 519 |
-
and isinstance(parsed_cards["cards"], list)
|
| 520 |
-
):
|
| 521 |
-
cards_list_from_json = parsed_cards["cards"]
|
| 522 |
-
logger.info(
|
| 523 |
-
f"Found 'cards' key in response from {page.url} with {len(cards_list_from_json)} cards"
|
| 524 |
-
)
|
| 525 |
-
elif isinstance(parsed_cards, list):
|
| 526 |
-
cards_list_from_json = parsed_cards
|
| 527 |
-
else:
|
| 528 |
-
logger.error(
|
| 529 |
-
f"LLM response for {page.url} was not a list or valid dict. Response: {str(parsed_cards)[:200]}..."
|
| 530 |
-
)
|
| 531 |
-
return []
|
| 532 |
-
|
| 533 |
-
for card_dict in cards_list_from_json:
|
| 534 |
-
if not isinstance(card_dict, dict):
|
| 535 |
-
logger.warning(
|
| 536 |
-
f"Skipping non-dict card item for {page.url}: {card_dict}"
|
| 537 |
-
)
|
| 538 |
-
continue
|
| 539 |
-
|
| 540 |
-
try:
|
| 541 |
-
front_data = card_dict.get("front")
|
| 542 |
-
back_data = card_dict.get("back")
|
| 543 |
-
|
| 544 |
-
if not isinstance(front_data, dict) or "question" not in front_data:
|
| 545 |
-
logger.warning(
|
| 546 |
-
f"Malformed 'front' data in card_dict for {page.url}: {front_data}. Skipping card."
|
| 547 |
-
)
|
| 548 |
-
continue
|
| 549 |
-
if not isinstance(back_data, dict) or "answer" not in back_data:
|
| 550 |
-
logger.warning(
|
| 551 |
-
f"Malformed 'back' data in card_dict for {page.url}: {back_data}. Skipping card."
|
| 552 |
-
)
|
| 553 |
-
continue
|
| 554 |
-
|
| 555 |
-
metadata_payload = card_dict.get("metadata", {})
|
| 556 |
-
if not isinstance(metadata_payload, dict):
|
| 557 |
-
metadata_payload = {}
|
| 558 |
-
metadata_payload["source_url"] = page.url
|
| 559 |
-
if page.title and "topic" not in metadata_payload:
|
| 560 |
-
metadata_payload["topic"] = page.title
|
| 561 |
-
|
| 562 |
-
tags = card_dict.get("tags", [])
|
| 563 |
-
if not isinstance(tags, list) or not all(
|
| 564 |
-
isinstance(t, str) for t in tags
|
| 565 |
-
):
|
| 566 |
-
tags = []
|
| 567 |
-
|
| 568 |
-
if tags:
|
| 569 |
-
metadata_payload["tags"] = tags
|
| 570 |
-
|
| 571 |
-
card_obj = Card(
|
| 572 |
-
front=CardFront(question=str(front_data["question"])),
|
| 573 |
-
back=CardBack(
|
| 574 |
-
answer=str(back_data["answer"]),
|
| 575 |
-
explanation=str(back_data.get("explanation", "")),
|
| 576 |
-
example=str(back_data.get("example", "")),
|
| 577 |
-
),
|
| 578 |
-
card_type=str(card_dict.get("card_type", "Basic")),
|
| 579 |
-
metadata=metadata_payload,
|
| 580 |
-
)
|
| 581 |
-
validated_cards.append(card_obj)
|
| 582 |
-
except Exception as e:
|
| 583 |
-
logger.error(
|
| 584 |
-
f"Error creating Card object for {page.url} from dict: {card_dict}. Error: {e}",
|
| 585 |
-
exc_info=True,
|
| 586 |
-
)
|
| 587 |
-
|
| 588 |
-
if not validated_cards:
|
| 589 |
-
logger.info(
|
| 590 |
-
f"No valid Cards generated or parsed from {page.url} after LLM processing."
|
| 591 |
-
)
|
| 592 |
-
else:
|
| 593 |
-
logger.info(
|
| 594 |
-
f"Successfully generated {len(validated_cards)} Cards from {page.url}."
|
| 595 |
-
)
|
| 596 |
-
# Cache successful results for page-level caching
|
| 597 |
-
if cache:
|
| 598 |
-
cache_key = f"{page.url}:{model}"
|
| 599 |
-
cache.set(cache_key, "page_cache", validated_cards)
|
| 600 |
-
logger.debug(f"Cached {len(validated_cards)} cards for {page.url}")
|
| 601 |
-
|
| 602 |
-
return validated_cards
|
| 603 |
-
|
| 604 |
-
except Exception as e:
|
| 605 |
-
logger.error(
|
| 606 |
-
f"Error processing page {page.url} with agents SDK: {e}", exc_info=True
|
| 607 |
-
)
|
| 608 |
-
return []
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
async def process_crawled_pages(
|
| 612 |
-
openai_client: AsyncOpenAI,
|
| 613 |
-
pages: List[CrawledPage],
|
| 614 |
-
model: str = "gpt-4o",
|
| 615 |
-
max_prompt_content_tokens: int = 6000,
|
| 616 |
-
max_concurrent_requests: int = 5,
|
| 617 |
-
custom_system_prompt: Optional[str] = None,
|
| 618 |
-
custom_user_prompt_template: Optional[str] = None,
|
| 619 |
-
progress_callback: Optional[Callable[[int, int], None]] = None,
|
| 620 |
-
cache: Optional[ResponseCache] = None,
|
| 621 |
-
) -> List[Card]:
|
| 622 |
-
if not pages:
|
| 623 |
-
logger.info("No pages provided to process_crawled_pages.")
|
| 624 |
-
return []
|
| 625 |
-
|
| 626 |
-
logger.info(
|
| 627 |
-
f"Starting batch processing of {len(pages)} pages with model {model}. Max concurrent requests: {max_concurrent_requests}."
|
| 628 |
-
)
|
| 629 |
-
|
| 630 |
-
semaphore = asyncio.Semaphore(max_concurrent_requests)
|
| 631 |
-
tasks = []
|
| 632 |
-
processed_count = 0
|
| 633 |
-
|
| 634 |
-
async def process_with_semaphore(page: CrawledPage):
|
| 635 |
-
nonlocal processed_count
|
| 636 |
-
async with semaphore:
|
| 637 |
-
logger.debug(
|
| 638 |
-
f"Submitting task for page: {page.url} (Semaphore count: {semaphore._value})"
|
| 639 |
-
)
|
| 640 |
-
try:
|
| 641 |
-
page_cards = await process_crawled_page(
|
| 642 |
-
openai_client=openai_client,
|
| 643 |
-
page=page,
|
| 644 |
-
model=model,
|
| 645 |
-
custom_system_prompt=custom_system_prompt,
|
| 646 |
-
custom_user_prompt_template=custom_user_prompt_template,
|
| 647 |
-
max_prompt_content_tokens=max_prompt_content_tokens,
|
| 648 |
-
cache=cache,
|
| 649 |
-
)
|
| 650 |
-
if page_cards is None:
|
| 651 |
-
logger.warning(
|
| 652 |
-
f"process_crawled_page returned None for {page.url}, expected list. Defaulting to empty list."
|
| 653 |
-
)
|
| 654 |
-
page_cards = []
|
| 655 |
-
|
| 656 |
-
logger.info(
|
| 657 |
-
f"Completed processing for page: {page.url}. Generated {len(page_cards)} cards."
|
| 658 |
-
)
|
| 659 |
-
return page_cards
|
| 660 |
-
except Exception as e:
|
| 661 |
-
logger.error(
|
| 662 |
-
f"Error in process_with_semaphore for page {page.url}: {e}",
|
| 663 |
-
exc_info=True,
|
| 664 |
-
)
|
| 665 |
-
return []
|
| 666 |
-
finally:
|
| 667 |
-
processed_count += 1
|
| 668 |
-
if progress_callback:
|
| 669 |
-
progress_callback(processed_count, len(pages))
|
| 670 |
-
|
| 671 |
-
for page_to_process in pages:
|
| 672 |
-
tasks.append(asyncio.create_task(process_with_semaphore(page_to_process)))
|
| 673 |
-
|
| 674 |
-
results_from_tasks: List[List[Card]] = []
|
| 675 |
-
for i, future in enumerate(asyncio.as_completed(tasks)):
|
| 676 |
-
try:
|
| 677 |
-
result_list = await future
|
| 678 |
-
if result_list:
|
| 679 |
-
results_from_tasks.append(result_list)
|
| 680 |
-
except Exception as e:
|
| 681 |
-
logger.error(
|
| 682 |
-
f"Unhandled error gathering result for a page task: {e}", exc_info=True
|
| 683 |
-
)
|
| 684 |
-
|
| 685 |
-
all_cards: List[Card] = []
|
| 686 |
-
for card_list in results_from_tasks:
|
| 687 |
-
all_cards.extend(card_list)
|
| 688 |
-
|
| 689 |
-
logger.info(
|
| 690 |
-
f"Finished processing all {len(pages)} pages. Generated {len(all_cards)} Cards in total."
|
| 691 |
-
)
|
| 692 |
-
return all_cards
|
|
|
|
| 2 |
|
| 3 |
import asyncio
|
| 4 |
import time
|
| 5 |
+
from typing import Optional, TypeVar
|
| 6 |
|
|
|
|
| 7 |
from agents import Agent, ModelSettings, Runner, set_default_openai_client
|
| 8 |
from openai import (
|
| 9 |
APIConnectionError,
|
|
|
|
| 13 |
RateLimitError,
|
| 14 |
)
|
| 15 |
from pydantic import BaseModel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
from ankigen_core.logging import logger
|
|
|
|
| 18 |
from ankigen_core.utils import ResponseCache
|
| 19 |
|
| 20 |
T = TypeVar("T", bound=BaseModel)
|
|
|
|
| 335 |
# This assumes a single rate limit bucket for all calls from this application instance.
|
| 336 |
# More sophisticated scenarios might need per-model or per-key limiters.
|
| 337 |
openai_rate_limiter = OpenAIRateLimiter() # Using default 60k TPM for now
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ankigen_core/models.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
from pydantic import BaseModel
|
| 2 |
from typing import List, Optional
|
| 3 |
|
| 4 |
# Module for Pydantic data models
|
|
@@ -60,14 +60,3 @@ class LearningSequence(BaseModel):
|
|
| 60 |
cards: List[CardGeneration]
|
| 61 |
suggested_study_order: List[str]
|
| 62 |
review_recommendations: List[str]
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
class CrawledPage(BaseModel):
|
| 66 |
-
url: str
|
| 67 |
-
html_content: str
|
| 68 |
-
text_content: str
|
| 69 |
-
title: Optional[str] = None
|
| 70 |
-
meta_description: Optional[str] = None
|
| 71 |
-
meta_keywords: Optional[List[str]] = Field(default_factory=list)
|
| 72 |
-
crawl_depth: int = 0
|
| 73 |
-
parent_url: Optional[str] = None
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
from typing import List, Optional
|
| 3 |
|
| 4 |
# Module for Pydantic data models
|
|
|
|
| 60 |
cards: List[CardGeneration]
|
| 61 |
suggested_study_order: List[str]
|
| 62 |
review_recommendations: List[str]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ankigen_core/ui_logic.py
CHANGED
|
@@ -2,68 +2,20 @@
|
|
| 2 |
|
| 3 |
import gradio as gr
|
| 4 |
import pandas as pd
|
| 5 |
-
from typing import
|
| 6 |
-
Callable,
|
| 7 |
-
List,
|
| 8 |
-
Optional,
|
| 9 |
-
Tuple,
|
| 10 |
-
)
|
| 11 |
-
from urllib.parse import urlparse
|
| 12 |
|
| 13 |
-
# --- Imports moved from later in the file (Task 7, etc.) ---
|
| 14 |
-
import re # For URL validation and filename sanitization
|
| 15 |
-
import asyncio
|
| 16 |
-
|
| 17 |
-
from ankigen_core.crawler import CrawledPage, WebCrawler
|
| 18 |
-
from ankigen_core.llm_interface import (
|
| 19 |
-
OpenAIClientManager,
|
| 20 |
-
)
|
| 21 |
-
from ankigen_core.card_generator import (
|
| 22 |
-
generate_cards_from_crawled_content,
|
| 23 |
-
AVAILABLE_MODELS,
|
| 24 |
-
)
|
| 25 |
from ankigen_core.utils import get_logger
|
|
|
|
| 26 |
|
| 27 |
-
|
| 28 |
-
from ankigen_core.models import (
|
| 29 |
-
Card,
|
| 30 |
-
# ModelSettings, # Removed
|
| 31 |
-
# LearningPathInput, # Removed
|
| 32 |
-
# LearningPath, # Removed
|
| 33 |
-
# GeneratedPath, # Removed
|
| 34 |
-
# SubjectAnalysis, # Removed
|
| 35 |
-
# SubjectCardRequest, # Removed
|
| 36 |
-
# TextCardRequest, # Removed
|
| 37 |
-
# LearningPathRequest, # Removed
|
| 38 |
-
)
|
| 39 |
-
|
| 40 |
-
# Import agent system for web crawling
|
| 41 |
-
# Agent system is required for web crawling
|
| 42 |
-
from ankigen_core.agents.integration import AgentOrchestrator
|
| 43 |
-
|
| 44 |
-
AGENTS_AVAILABLE_UI = True
|
| 45 |
-
# --- End moved imports ---
|
| 46 |
|
| 47 |
-
# Get an instance of the logger for this module
|
| 48 |
-
crawler_ui_logger = get_logger() # Keep this definition
|
| 49 |
|
|
|
|
|
|
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
current_text: str,
|
| 55 |
-
current_url: str,
|
| 56 |
-
):
|
| 57 |
-
"""Updates visibility and values of UI elements based on generation mode."""
|
| 58 |
-
is_subject = mode == "subject"
|
| 59 |
-
is_text = mode == "text"
|
| 60 |
-
is_web = mode == "web"
|
| 61 |
-
|
| 62 |
-
# Determine value persistence or clearing
|
| 63 |
-
subject_val = current_subject if is_subject else ""
|
| 64 |
-
text_val = current_text if is_text else ""
|
| 65 |
-
url_val = current_url if is_web else ""
|
| 66 |
-
|
| 67 |
# Define standard columns for empty DataFrames
|
| 68 |
main_output_df_columns = [
|
| 69 |
"Index",
|
|
@@ -79,420 +31,20 @@ def update_mode_visibility(
|
|
| 79 |
]
|
| 80 |
|
| 81 |
return (
|
| 82 |
-
gr.update(visible=
|
| 83 |
-
gr.update(visible=
|
| 84 |
-
gr.update(
|
| 85 |
-
gr.update(visible=True), # 4 cards_output (always visible now)
|
| 86 |
-
gr.update(value=subject_val), # 5 subject
|
| 87 |
-
gr.update(value=text_val), # 6 source_text
|
| 88 |
-
gr.update(value=url_val), # 7 web_crawl_url_input
|
| 89 |
gr.update(
|
| 90 |
value=pd.DataFrame(columns=main_output_df_columns)
|
| 91 |
-
), #
|
| 92 |
gr.update(
|
| 93 |
value="<div><b>Total Cards Generated:</b> <span id='total-cards-count'>0</span></div>",
|
| 94 |
visible=False,
|
| 95 |
-
), #
|
| 96 |
-
)
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
def create_crawler_main_mode_elements() -> Tuple[
|
| 100 |
-
List[gr.components.Component], # ui_components (url_input, max_depth, etc.)
|
| 101 |
-
gr.Button, # crawl_button
|
| 102 |
-
gr.Progress, # progress_bar
|
| 103 |
-
gr.Textbox, # progress_status_textbox
|
| 104 |
-
gr.Textbox, # custom_system_prompt
|
| 105 |
-
gr.Textbox, # custom_user_prompt_template
|
| 106 |
-
gr.Checkbox, # use_sitemap_checkbox
|
| 107 |
-
gr.Textbox, # sitemap_url_textbox
|
| 108 |
-
]:
|
| 109 |
-
"""Creates the UI components for the Web Crawler mode integrated into the main tab."""
|
| 110 |
-
ui_components: List[gr.components.Component] = []
|
| 111 |
-
|
| 112 |
-
# URL Input
|
| 113 |
-
url_input = gr.Textbox(
|
| 114 |
-
label="Start URL",
|
| 115 |
-
placeholder="Enter the full URL to start crawling (e.g., https://example.com/docs)",
|
| 116 |
-
elem_id="crawler_url_input",
|
| 117 |
-
)
|
| 118 |
-
ui_components.append(url_input)
|
| 119 |
-
|
| 120 |
-
with gr.Row():
|
| 121 |
-
max_depth_slider = gr.Slider(
|
| 122 |
-
minimum=0,
|
| 123 |
-
maximum=5,
|
| 124 |
-
value=1,
|
| 125 |
-
step=1,
|
| 126 |
-
label="Max Crawl Depth",
|
| 127 |
-
elem_id="crawler_max_depth_slider",
|
| 128 |
-
)
|
| 129 |
-
ui_components.append(max_depth_slider)
|
| 130 |
-
|
| 131 |
-
crawler_req_per_sec_slider = gr.Slider(
|
| 132 |
-
minimum=0.1,
|
| 133 |
-
maximum=10,
|
| 134 |
-
value=2,
|
| 135 |
-
step=0.1,
|
| 136 |
-
label="Requests per Second (Crawler)",
|
| 137 |
-
elem_id="crawler_req_per_sec_slider",
|
| 138 |
-
)
|
| 139 |
-
ui_components.append(crawler_req_per_sec_slider)
|
| 140 |
-
|
| 141 |
-
model_choices_ui_crawler = [(m["label"], m["value"]) for m in AVAILABLE_MODELS]
|
| 142 |
-
default_model_value_crawler = next(
|
| 143 |
-
(m["value"] for m in AVAILABLE_MODELS if "nano" in m["value"].lower()),
|
| 144 |
-
AVAILABLE_MODELS[0]["value"] if AVAILABLE_MODELS else "",
|
| 145 |
-
)
|
| 146 |
-
model_dropdown = gr.Dropdown(
|
| 147 |
-
choices=model_choices_ui_crawler,
|
| 148 |
-
label="AI Model for Content Processing", # Clarified label
|
| 149 |
-
value=default_model_value_crawler,
|
| 150 |
-
elem_id="crawler_model_dropdown",
|
| 151 |
-
allow_custom_value=True,
|
| 152 |
)
|
| 153 |
-
ui_components.append(model_dropdown)
|
| 154 |
-
|
| 155 |
-
with gr.Row():
|
| 156 |
-
include_patterns_textbox = gr.Textbox(
|
| 157 |
-
label="Include URL Patterns (one per line, regex compatible)",
|
| 158 |
-
placeholder="""e.g., /blog/.*
|
| 159 |
-
example.com/articles/.*""",
|
| 160 |
-
lines=3,
|
| 161 |
-
elem_id="crawler_include_patterns",
|
| 162 |
-
scale=1,
|
| 163 |
-
)
|
| 164 |
-
ui_components.append(include_patterns_textbox)
|
| 165 |
-
|
| 166 |
-
exclude_patterns_textbox = gr.Textbox(
|
| 167 |
-
label="Exclude URL Patterns (one per line, regex compatible)",
|
| 168 |
-
placeholder="""e.g., /category/.*
|
| 169 |
-
.*/login""",
|
| 170 |
-
lines=3,
|
| 171 |
-
elem_id="crawler_exclude_patterns",
|
| 172 |
-
scale=1,
|
| 173 |
-
)
|
| 174 |
-
ui_components.append(exclude_patterns_textbox)
|
| 175 |
-
|
| 176 |
-
with gr.Accordion(
|
| 177 |
-
"Sitemap Options", open=False, elem_id="crawler_sitemap_options_accordion"
|
| 178 |
-
):
|
| 179 |
-
use_sitemap_checkbox = gr.Checkbox(
|
| 180 |
-
label="Use Sitemap?",
|
| 181 |
-
value=False,
|
| 182 |
-
elem_id="crawler_use_sitemap_checkbox",
|
| 183 |
-
)
|
| 184 |
-
# ui_components.append(use_sitemap_checkbox) # Appended later with its group
|
| 185 |
-
|
| 186 |
-
sitemap_url_textbox = gr.Textbox(
|
| 187 |
-
label="Sitemap URL (e.g., /sitemap.xml or full URL)",
|
| 188 |
-
placeholder="Enter sitemap URL relative to start URL or full path",
|
| 189 |
-
visible=False,
|
| 190 |
-
elem_id="crawler_sitemap_url_textbox",
|
| 191 |
-
)
|
| 192 |
-
# ui_components.append(sitemap_url_textbox) # Appended later with its group
|
| 193 |
-
|
| 194 |
-
use_sitemap_checkbox.change(
|
| 195 |
-
fn=lambda x: gr.update(visible=x),
|
| 196 |
-
inputs=[use_sitemap_checkbox],
|
| 197 |
-
outputs=[sitemap_url_textbox],
|
| 198 |
-
)
|
| 199 |
-
# Add sitemap components to the main list for return
|
| 200 |
-
# sitemap_elements_for_return = [use_sitemap_checkbox, sitemap_url_textbox] # Unused variable
|
| 201 |
-
|
| 202 |
-
with gr.Accordion(
|
| 203 |
-
"Advanced Prompt Options",
|
| 204 |
-
open=False,
|
| 205 |
-
elem_id="crawler_advanced_options_accordion",
|
| 206 |
-
): # Removed assignment to advanced_options_accordion_component
|
| 207 |
-
custom_system_prompt = gr.Textbox(
|
| 208 |
-
label="Custom System Prompt (Optional)",
|
| 209 |
-
placeholder="Leave empty to use the default system prompt for card generation.",
|
| 210 |
-
lines=5,
|
| 211 |
-
info="Define the overall role and instructions for the AI.",
|
| 212 |
-
elem_id="crawler_custom_system_prompt",
|
| 213 |
-
)
|
| 214 |
-
# ui_components.append(custom_system_prompt) # Appended later
|
| 215 |
-
|
| 216 |
-
custom_user_prompt_template = gr.Textbox(
|
| 217 |
-
label="Custom User Prompt Template (Optional)",
|
| 218 |
-
placeholder="Leave empty to use default. Available placeholders: {url}, {content}",
|
| 219 |
-
lines=5,
|
| 220 |
-
info="Define how the page URL and content are presented to the AI.",
|
| 221 |
-
elem_id="crawler_custom_user_prompt_template",
|
| 222 |
-
)
|
| 223 |
-
# ui_components.append(custom_user_prompt_template) # Appended later
|
| 224 |
-
# Add prompt components to the main list for return
|
| 225 |
-
# prompt_elements_for_return = [custom_system_prompt, custom_user_prompt_template] # Unused variable
|
| 226 |
-
|
| 227 |
-
# Crawl button (will trigger crawl_and_generate, results populate main DataFrame)
|
| 228 |
-
crawl_button = gr.Button(
|
| 229 |
-
"Crawl Content & Prepare Cards", # Changed button text
|
| 230 |
-
variant="secondary", # Differentiate from main generate button
|
| 231 |
-
elem_id="crawler_crawl_content_button",
|
| 232 |
-
)
|
| 233 |
-
# ui_components.append(crawl_button) # Returned separately
|
| 234 |
-
|
| 235 |
-
# Progress bar and status for the crawling process
|
| 236 |
-
progress_bar = (
|
| 237 |
-
gr.Progress()
|
| 238 |
-
) # Removed elem_id as gr.Progress might not support it directly
|
| 239 |
-
progress_status_textbox = gr.Textbox(
|
| 240 |
-
label="Crawl Status",
|
| 241 |
-
interactive=False,
|
| 242 |
-
lines=3, # Reduced lines
|
| 243 |
-
placeholder="Crawling process status will appear here...",
|
| 244 |
-
elem_id="crawler_status_textbox",
|
| 245 |
-
)
|
| 246 |
-
# ui_components.append(progress_status_textbox) # Returned separately
|
| 247 |
-
|
| 248 |
-
# REMOVED UI elements:
|
| 249 |
-
# - export_format_radio (no longer needed here)
|
| 250 |
-
# - All preview related: preview_row_component, preview_dataframe_component, update_cards_button_component
|
| 251 |
-
# - All preview export related: export_format_preview_component, deck_name_preview_component, export_button_preview_component
|
| 252 |
-
# - All direct file download related: download_row_group, generated_file_output, download_button
|
| 253 |
-
|
| 254 |
-
# The main ui_components list should contain all elements whose values are needed as inputs to the crawl/generation
|
| 255 |
-
# or whose visibility might be managed together.
|
| 256 |
-
# For clarity, specific components like buttons or progress bars are returned separately if they have specific event handlers
|
| 257 |
-
# or are managed distinctly.
|
| 258 |
-
|
| 259 |
-
# Add all input fields to ui_components for easier management if needed, or return them individually.
|
| 260 |
-
# For now, returning them grouped for clarity.
|
| 261 |
-
|
| 262 |
-
return (
|
| 263 |
-
ui_components,
|
| 264 |
-
crawl_button,
|
| 265 |
-
progress_bar,
|
| 266 |
-
progress_status_textbox,
|
| 267 |
-
custom_system_prompt,
|
| 268 |
-
custom_user_prompt_template,
|
| 269 |
-
use_sitemap_checkbox,
|
| 270 |
-
sitemap_url_textbox,
|
| 271 |
-
)
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
# --- Crawl and Generate Logic (Task 7) ---
|
| 275 |
-
|
| 276 |
-
# MODIFIED: Get model values from AVAILABLE_MODELS for validation
|
| 277 |
-
CRAWLER_AVAILABLE_MODELS_VALUES = [m["value"] for m in AVAILABLE_MODELS]
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
def _basic_sanitize_filename(name: str) -> str:
|
| 281 |
-
"""Basic filename sanitization by replacing non-alphanumeric characters with underscores."""
|
| 282 |
-
return re.sub(r"[^a-zA-Z0-9_.-]", "_", name)
|
| 283 |
|
| 284 |
|
| 285 |
-
|
| 286 |
-
"""Validate URL for crawling."""
|
| 287 |
-
if not url or not url.startswith(("http://", "https://")):
|
| 288 |
-
gr.Warning("Invalid URL provided. Please enter a valid http/https URL.")
|
| 289 |
-
return False
|
| 290 |
-
try:
|
| 291 |
-
urlparse(url)
|
| 292 |
-
return True
|
| 293 |
-
except Exception:
|
| 294 |
-
return False
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
def _create_web_crawler(
|
| 298 |
-
url: str,
|
| 299 |
-
max_depth: int,
|
| 300 |
-
include_patterns: str,
|
| 301 |
-
exclude_patterns: str,
|
| 302 |
-
use_sitemap: bool,
|
| 303 |
-
sitemap_url_str: str,
|
| 304 |
-
) -> WebCrawler:
|
| 305 |
-
"""Create configured WebCrawler instance."""
|
| 306 |
-
include_list = [p.strip() for p in include_patterns.split(",") if p.strip()]
|
| 307 |
-
exclude_list = [p.strip() for p in exclude_patterns.split(",") if p.strip()]
|
| 308 |
-
|
| 309 |
-
return WebCrawler(
|
| 310 |
-
start_url=url,
|
| 311 |
-
max_depth=max_depth,
|
| 312 |
-
include_patterns=include_list,
|
| 313 |
-
exclude_patterns=exclude_list,
|
| 314 |
-
use_sitemap=use_sitemap,
|
| 315 |
-
sitemap_url=sitemap_url_str
|
| 316 |
-
if use_sitemap and sitemap_url_str.strip()
|
| 317 |
-
else None,
|
| 318 |
-
)
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
def _create_crawl_progress_callback(
|
| 322 |
-
progress: gr.Progress,
|
| 323 |
-
) -> Tuple[Callable[[int, int, str], None], List[int]]:
|
| 324 |
-
"""Create progress callback for crawler with mutable state container."""
|
| 325 |
-
total_urls_container = [0] # Mutable container for nonlocal-like behavior
|
| 326 |
-
|
| 327 |
-
def callback(processed_count: int, total_urls: int, current_url: str):
|
| 328 |
-
total_urls_container[0] = total_urls
|
| 329 |
-
if total_urls_container[0] > 0:
|
| 330 |
-
progress(
|
| 331 |
-
0.1 + (processed_count / total_urls_container[0]) * 0.4,
|
| 332 |
-
desc=f"Crawling: {processed_count}/{total_urls_container[0]} URLs. Current: {current_url}",
|
| 333 |
-
)
|
| 334 |
-
else:
|
| 335 |
-
progress(
|
| 336 |
-
0.1 + processed_count * 0.01,
|
| 337 |
-
desc=f"Crawling: {processed_count} URLs discovered. Current: {current_url}",
|
| 338 |
-
)
|
| 339 |
-
|
| 340 |
-
return callback, total_urls_container
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
async def _perform_web_crawl(
|
| 344 |
-
crawler: WebCrawler,
|
| 345 |
-
progress: gr.Progress,
|
| 346 |
-
url: str,
|
| 347 |
-
) -> Optional[List[CrawledPage]]:
|
| 348 |
-
"""Execute web crawl and return pages or None if empty."""
|
| 349 |
-
callback, _ = _create_crawl_progress_callback(progress)
|
| 350 |
-
|
| 351 |
-
crawler_ui_logger.info(f"Starting crawl for {url}...")
|
| 352 |
-
progress(0.15, desc=f"Starting crawl for {url}...")
|
| 353 |
-
|
| 354 |
-
crawled_pages = await asyncio.to_thread(crawler.crawl, progress_callback=callback)
|
| 355 |
-
|
| 356 |
-
crawler_ui_logger.info(f"Crawling finished. Found {len(crawled_pages)} pages.")
|
| 357 |
-
progress(0.5, desc=f"Crawling finished. Found {len(crawled_pages)} pages.")
|
| 358 |
-
|
| 359 |
-
return crawled_pages if crawled_pages else None
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
async def _process_crawled_with_agents(
|
| 363 |
-
crawled_pages: List[CrawledPage],
|
| 364 |
-
client_manager: OpenAIClientManager,
|
| 365 |
-
url: str,
|
| 366 |
-
progress: gr.Progress,
|
| 367 |
-
) -> Tuple[List[Card], str]:
|
| 368 |
-
"""Process crawled content with agent system."""
|
| 369 |
-
crawler_ui_logger.info("Using agent system for web crawling card generation")
|
| 370 |
-
|
| 371 |
-
orchestrator = AgentOrchestrator(client_manager)
|
| 372 |
-
# API key is already configured in client_manager, pass empty string as placeholder
|
| 373 |
-
await orchestrator.initialize("")
|
| 374 |
-
|
| 375 |
-
combined_content = "\n\n--- PAGE BREAK ---\n\n".join(
|
| 376 |
-
[
|
| 377 |
-
f"URL: {page.url}\nTitle: {page.title}\nContent: {page.text_content[:2000]}..."
|
| 378 |
-
for page in crawled_pages[:10]
|
| 379 |
-
]
|
| 380 |
-
)
|
| 381 |
-
|
| 382 |
-
context = {
|
| 383 |
-
"source_text": combined_content,
|
| 384 |
-
"crawl_source": url,
|
| 385 |
-
"pages_crawled": len(crawled_pages),
|
| 386 |
-
}
|
| 387 |
-
|
| 388 |
-
progress(0.6, desc="Processing with agent system...")
|
| 389 |
-
|
| 390 |
-
agent_cards, _ = await orchestrator.generate_cards_with_agents(
|
| 391 |
-
topic=f"Content from {url}",
|
| 392 |
-
subject="web_content",
|
| 393 |
-
num_cards=min(len(crawled_pages) * 3, 50),
|
| 394 |
-
difficulty="intermediate",
|
| 395 |
-
enable_quality_pipeline=True,
|
| 396 |
-
context=context,
|
| 397 |
-
)
|
| 398 |
-
|
| 399 |
-
if agent_cards:
|
| 400 |
-
progress(0.9, desc=f"Agent system generated {len(agent_cards)} cards")
|
| 401 |
-
final_message = (
|
| 402 |
-
f"Agent system processed content from {len(crawled_pages)} pages. "
|
| 403 |
-
f"Generated {len(agent_cards)} high-quality cards."
|
| 404 |
-
)
|
| 405 |
-
else:
|
| 406 |
-
final_message = "Agent system returned no cards"
|
| 407 |
-
|
| 408 |
-
return agent_cards or [], final_message
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
async def crawl_and_generate(
|
| 412 |
-
url: str,
|
| 413 |
-
max_depth: int,
|
| 414 |
-
crawler_requests_per_second: float,
|
| 415 |
-
include_patterns: str,
|
| 416 |
-
exclude_patterns: str,
|
| 417 |
-
model: str,
|
| 418 |
-
export_format_ui: str,
|
| 419 |
-
custom_system_prompt: str,
|
| 420 |
-
custom_user_prompt_template: str,
|
| 421 |
-
use_sitemap: bool,
|
| 422 |
-
sitemap_url_str: str,
|
| 423 |
-
client_manager: OpenAIClientManager,
|
| 424 |
-
progress: gr.Progress,
|
| 425 |
-
status_textbox: gr.Textbox,
|
| 426 |
-
) -> Tuple[str, List[dict], List[Card]]:
|
| 427 |
-
"""Crawls a website, generates Anki cards, and prepares them for export/display."""
|
| 428 |
-
crawler_ui_logger.info(f"Crawl and generate called for URL: {url}")
|
| 429 |
-
|
| 430 |
-
if not _validate_crawl_url(url):
|
| 431 |
-
return "Invalid URL", [], []
|
| 432 |
-
|
| 433 |
-
try:
|
| 434 |
-
crawler = _create_web_crawler(
|
| 435 |
-
url,
|
| 436 |
-
max_depth,
|
| 437 |
-
include_patterns,
|
| 438 |
-
exclude_patterns,
|
| 439 |
-
use_sitemap,
|
| 440 |
-
sitemap_url_str,
|
| 441 |
-
)
|
| 442 |
-
|
| 443 |
-
crawled_pages = await _perform_web_crawl(crawler, progress, url)
|
| 444 |
-
if not crawled_pages:
|
| 445 |
-
progress(1.0, desc="No pages were crawled. Check URL and patterns.")
|
| 446 |
-
return (
|
| 447 |
-
"No pages were crawled. Check URL and patterns.",
|
| 448 |
-
pd.DataFrame().to_dict(orient="records"),
|
| 449 |
-
[],
|
| 450 |
-
)
|
| 451 |
-
|
| 452 |
-
agent_cards, final_message = await _process_crawled_with_agents(
|
| 453 |
-
crawled_pages,
|
| 454 |
-
client_manager,
|
| 455 |
-
url,
|
| 456 |
-
progress,
|
| 457 |
-
)
|
| 458 |
-
|
| 459 |
-
if agent_cards:
|
| 460 |
-
cards_for_dataframe_export = generate_cards_from_crawled_content(
|
| 461 |
-
agent_cards
|
| 462 |
-
)
|
| 463 |
-
progress(1.0, desc=final_message)
|
| 464 |
-
return final_message, cards_for_dataframe_export, agent_cards
|
| 465 |
-
else:
|
| 466 |
-
progress(1.0, desc=final_message)
|
| 467 |
-
return final_message, pd.DataFrame().to_dict(orient="records"), []
|
| 468 |
-
|
| 469 |
-
except ConnectionError as e:
|
| 470 |
-
crawler_ui_logger.error(f"Connection error during crawl: {e}", exc_info=True)
|
| 471 |
-
progress(1.0, desc=f"Connection error: {e}")
|
| 472 |
-
return f"Connection error: {e}", pd.DataFrame().to_dict(orient="records"), []
|
| 473 |
-
except ValueError as e:
|
| 474 |
-
crawler_ui_logger.error(f"Value error: {e}", exc_info=True)
|
| 475 |
-
progress(1.0, desc=f"Input error: {e}")
|
| 476 |
-
return f"Input error: {e}", pd.DataFrame().to_dict(orient="records"), []
|
| 477 |
-
except RuntimeError as e: # Catch RuntimeError from client_manager.get_client()
|
| 478 |
-
crawler_ui_logger.error(
|
| 479 |
-
f"Runtime error (e.g., OpenAI client not init): {e}", exc_info=True
|
| 480 |
-
)
|
| 481 |
-
progress(1.0, desc=f"Runtime error: {e}")
|
| 482 |
-
return f"Runtime error: {e}", pd.DataFrame().to_dict(orient="records"), []
|
| 483 |
-
except Exception as e:
|
| 484 |
-
crawler_ui_logger.error(
|
| 485 |
-
f"Unexpected error in crawl_and_generate: {e}", exc_info=True
|
| 486 |
-
)
|
| 487 |
-
progress(1.0, desc=f"Unexpected error: {e}")
|
| 488 |
-
return (
|
| 489 |
-
f"An unexpected error occurred: {e}",
|
| 490 |
-
pd.DataFrame().to_dict(orient="records"),
|
| 491 |
-
[],
|
| 492 |
-
)
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
# --- Card Preview and Editing Utilities (Task 13.3) ---
|
| 496 |
|
| 497 |
|
| 498 |
def cards_to_dataframe(cards: List[Card]) -> pd.DataFrame:
|
|
@@ -509,16 +61,16 @@ def cards_to_dataframe(cards: List[Card]) -> pd.DataFrame:
|
|
| 509 |
data_for_df.append(
|
| 510 |
{
|
| 511 |
"ID": i + 1, # 1-indexed ID for display
|
| 512 |
-
"Topic": topic_str,
|
| 513 |
"Front": card.front.question,
|
| 514 |
"Back": card.back.answer,
|
| 515 |
"Tags": tags_str,
|
| 516 |
-
"Card Type": card.card_type or "Basic",
|
| 517 |
-
"Explanation": card.back.explanation or "",
|
| 518 |
-
"Example": card.back.example or "",
|
| 519 |
"Source_URL": card.metadata.get("source_url", "")
|
| 520 |
if card.metadata
|
| 521 |
-
else "",
|
| 522 |
}
|
| 523 |
)
|
| 524 |
# Define all columns explicitly for consistent DataFrame structure
|
|
@@ -546,7 +98,7 @@ def dataframe_to_cards(df: pd.DataFrame, original_cards: List[Card]) -> List[Car
|
|
| 546 |
if df.empty and not original_cards:
|
| 547 |
return []
|
| 548 |
if df.empty and original_cards:
|
| 549 |
-
return []
|
| 550 |
|
| 551 |
for index, row in df.iterrows():
|
| 552 |
try:
|
|
@@ -556,8 +108,6 @@ def dataframe_to_cards(df: pd.DataFrame, original_cards: List[Card]) -> List[Car
|
|
| 556 |
if 0 <= original_card_index < len(original_cards):
|
| 557 |
card_to_update = original_cards[original_card_index]
|
| 558 |
|
| 559 |
-
# Create new CardFront and CardBack objects for immutability if preferred,
|
| 560 |
-
# or update existing ones since Pydantic models are mutable.
|
| 561 |
new_front = card_to_update.front.copy(
|
| 562 |
update={
|
| 563 |
"question": str(row.get("Front", card_to_update.front.question))
|
|
@@ -592,7 +142,6 @@ def dataframe_to_cards(df: pd.DataFrame, original_cards: List[Card]) -> List[Car
|
|
| 592 |
new_metadata["topic"] = str(
|
| 593 |
row.get("Topic", new_metadata.get("topic", "N/A"))
|
| 594 |
)
|
| 595 |
-
# Source URL is generally not editable from this simple table
|
| 596 |
|
| 597 |
updated_card = card_to_update.copy(
|
| 598 |
update={
|
|
@@ -606,16 +155,14 @@ def dataframe_to_cards(df: pd.DataFrame, original_cards: List[Card]) -> List[Car
|
|
| 606 |
)
|
| 607 |
updated_cards.append(updated_card)
|
| 608 |
else:
|
| 609 |
-
|
| 610 |
f"Card ID {card_id} from DataFrame is out of bounds for original_cards list."
|
| 611 |
)
|
| 612 |
except (ValueError, KeyError, AttributeError) as e:
|
| 613 |
-
|
| 614 |
f"Error processing row {index} from DataFrame: {row}. Error: {e}"
|
| 615 |
)
|
| 616 |
if 0 <= original_card_index < len(original_cards):
|
| 617 |
-
updated_cards.append(
|
| 618 |
-
original_cards[original_card_index]
|
| 619 |
-
) # Re-add original on error
|
| 620 |
continue
|
| 621 |
return updated_cards
|
|
|
|
| 2 |
|
| 3 |
import gradio as gr
|
| 4 |
import pandas as pd
|
| 5 |
+
from typing import List
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
from ankigen_core.utils import get_logger
|
| 8 |
+
from ankigen_core.models import Card
|
| 9 |
|
| 10 |
+
logger = get_logger()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
|
|
|
|
|
|
| 12 |
|
| 13 |
+
def update_mode_visibility(mode: str, current_subject: str):
|
| 14 |
+
"""Updates visibility and values of UI elements based on generation mode.
|
| 15 |
|
| 16 |
+
Currently only 'subject' mode is supported. This function is kept for
|
| 17 |
+
future extensibility.
|
| 18 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
# Define standard columns for empty DataFrames
|
| 20 |
main_output_df_columns = [
|
| 21 |
"Index",
|
|
|
|
| 31 |
]
|
| 32 |
|
| 33 |
return (
|
| 34 |
+
gr.update(visible=True), # subject_mode (Group) - always visible
|
| 35 |
+
gr.update(visible=True), # cards_output - always visible
|
| 36 |
+
gr.update(value=current_subject), # subject textbox value
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
gr.update(
|
| 38 |
value=pd.DataFrame(columns=main_output_df_columns)
|
| 39 |
+
), # output DataFrame
|
| 40 |
gr.update(
|
| 41 |
value="<div><b>Total Cards Generated:</b> <span id='total-cards-count'>0</span></div>",
|
| 42 |
visible=False,
|
| 43 |
+
), # total_cards_html
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
|
| 47 |
+
# --- Card Preview and Editing Utilities ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
|
| 50 |
def cards_to_dataframe(cards: List[Card]) -> pd.DataFrame:
|
|
|
|
| 61 |
data_for_df.append(
|
| 62 |
{
|
| 63 |
"ID": i + 1, # 1-indexed ID for display
|
| 64 |
+
"Topic": topic_str,
|
| 65 |
"Front": card.front.question,
|
| 66 |
"Back": card.back.answer,
|
| 67 |
"Tags": tags_str,
|
| 68 |
+
"Card Type": card.card_type or "Basic",
|
| 69 |
+
"Explanation": card.back.explanation or "",
|
| 70 |
+
"Example": card.back.example or "",
|
| 71 |
"Source_URL": card.metadata.get("source_url", "")
|
| 72 |
if card.metadata
|
| 73 |
+
else "",
|
| 74 |
}
|
| 75 |
)
|
| 76 |
# Define all columns explicitly for consistent DataFrame structure
|
|
|
|
| 98 |
if df.empty and not original_cards:
|
| 99 |
return []
|
| 100 |
if df.empty and original_cards:
|
| 101 |
+
return []
|
| 102 |
|
| 103 |
for index, row in df.iterrows():
|
| 104 |
try:
|
|
|
|
| 108 |
if 0 <= original_card_index < len(original_cards):
|
| 109 |
card_to_update = original_cards[original_card_index]
|
| 110 |
|
|
|
|
|
|
|
| 111 |
new_front = card_to_update.front.copy(
|
| 112 |
update={
|
| 113 |
"question": str(row.get("Front", card_to_update.front.question))
|
|
|
|
| 142 |
new_metadata["topic"] = str(
|
| 143 |
row.get("Topic", new_metadata.get("topic", "N/A"))
|
| 144 |
)
|
|
|
|
| 145 |
|
| 146 |
updated_card = card_to_update.copy(
|
| 147 |
update={
|
|
|
|
| 155 |
)
|
| 156 |
updated_cards.append(updated_card)
|
| 157 |
else:
|
| 158 |
+
logger.warning(
|
| 159 |
f"Card ID {card_id} from DataFrame is out of bounds for original_cards list."
|
| 160 |
)
|
| 161 |
except (ValueError, KeyError, AttributeError) as e:
|
| 162 |
+
logger.error(
|
| 163 |
f"Error processing row {index} from DataFrame: {row}. Error: {e}"
|
| 164 |
)
|
| 165 |
if 0 <= original_card_index < len(original_cards):
|
| 166 |
+
updated_cards.append(original_cards[original_card_index])
|
|
|
|
|
|
|
| 167 |
continue
|
| 168 |
return updated_cards
|
app.py
CHANGED
|
@@ -18,11 +18,7 @@ from ankigen_core.exporters import (
|
|
| 18 |
from ankigen_core.llm_interface import (
|
| 19 |
OpenAIClientManager,
|
| 20 |
) # structured_output_completion is internal to core modules
|
| 21 |
-
from ankigen_core.ui_logic import
|
| 22 |
-
crawl_and_generate,
|
| 23 |
-
create_crawler_main_mode_elements,
|
| 24 |
-
update_mode_visibility,
|
| 25 |
-
)
|
| 26 |
from ankigen_core.utils import (
|
| 27 |
ResponseCache,
|
| 28 |
get_logger,
|
|
@@ -159,13 +155,11 @@ def create_ankigen_interface():
|
|
| 159 |
generation_mode = gr.Radio(
|
| 160 |
choices=[
|
| 161 |
("Single Subject", "subject"),
|
| 162 |
-
("Learning Path", "path"),
|
| 163 |
-
("From Text", "text"),
|
| 164 |
-
("From Web", "web"),
|
| 165 |
],
|
| 166 |
value="subject",
|
| 167 |
label="Generation Mode",
|
| 168 |
info="Choose how you want to generate content",
|
|
|
|
| 169 |
)
|
| 170 |
with gr.Group() as subject_mode:
|
| 171 |
subject = gr.Textbox(
|
|
@@ -176,41 +170,6 @@ def create_ankigen_interface():
|
|
| 176 |
"Auto-fill",
|
| 177 |
variant="secondary",
|
| 178 |
)
|
| 179 |
-
with gr.Group(visible=False) as text_mode:
|
| 180 |
-
source_text = gr.Textbox(
|
| 181 |
-
label="Source Text",
|
| 182 |
-
placeholder="Paste text here...",
|
| 183 |
-
lines=15,
|
| 184 |
-
)
|
| 185 |
-
with gr.Group(visible=False) as web_mode:
|
| 186 |
-
# --- BEGIN INTEGRATED CRAWLER UI (Task 16) ---
|
| 187 |
-
logger.info(
|
| 188 |
-
"Setting up integrated Web Crawler UI elements...",
|
| 189 |
-
)
|
| 190 |
-
(
|
| 191 |
-
crawler_input_ui_elements, # List of inputs like URL, depth, model, patterns
|
| 192 |
-
web_crawl_button, # Specific button to trigger crawl
|
| 193 |
-
web_crawl_progress_bar,
|
| 194 |
-
web_crawl_status_textbox,
|
| 195 |
-
web_crawl_custom_system_prompt,
|
| 196 |
-
web_crawl_custom_user_prompt_template,
|
| 197 |
-
web_crawl_use_sitemap_checkbox,
|
| 198 |
-
web_crawl_sitemap_url_textbox,
|
| 199 |
-
) = create_crawler_main_mode_elements()
|
| 200 |
-
|
| 201 |
-
# Unpack crawler_input_ui_elements for clarity and use
|
| 202 |
-
web_crawl_url_input = crawler_input_ui_elements[0]
|
| 203 |
-
web_crawl_max_depth_slider = crawler_input_ui_elements[1]
|
| 204 |
-
web_crawl_req_per_sec_slider = crawler_input_ui_elements[2]
|
| 205 |
-
web_crawl_model_dropdown = crawler_input_ui_elements[3]
|
| 206 |
-
web_crawl_include_patterns_textbox = (
|
| 207 |
-
crawler_input_ui_elements[4]
|
| 208 |
-
)
|
| 209 |
-
web_crawl_exclude_patterns_textbox = (
|
| 210 |
-
crawler_input_ui_elements[5]
|
| 211 |
-
)
|
| 212 |
-
# --- END INTEGRATED CRAWLER UI ---
|
| 213 |
-
|
| 214 |
api_key_input = gr.Textbox(
|
| 215 |
label="OpenAI API Key",
|
| 216 |
type="password",
|
|
@@ -364,29 +323,21 @@ def create_ankigen_interface():
|
|
| 364 |
inputs=[
|
| 365 |
generation_mode,
|
| 366 |
subject,
|
| 367 |
-
source_text,
|
| 368 |
-
web_crawl_url_input,
|
| 369 |
],
|
| 370 |
outputs=[
|
| 371 |
subject_mode,
|
| 372 |
-
text_mode,
|
| 373 |
-
web_mode,
|
| 374 |
cards_output,
|
| 375 |
subject,
|
| 376 |
-
source_text,
|
| 377 |
-
web_crawl_url_input,
|
| 378 |
output,
|
| 379 |
total_cards_html,
|
| 380 |
],
|
| 381 |
)
|
| 382 |
|
| 383 |
-
# Define an async wrapper for the orchestrate_card_generation
|
| 384 |
async def handle_generate_click(
|
| 385 |
api_key_input_val,
|
| 386 |
subject_val,
|
| 387 |
generation_mode_val,
|
| 388 |
-
source_text_val,
|
| 389 |
-
url_input_val,
|
| 390 |
model_choice_val,
|
| 391 |
topic_number_val,
|
| 392 |
cards_per_topic_val,
|
|
@@ -394,20 +345,16 @@ def create_ankigen_interface():
|
|
| 394 |
generate_cloze_checkbox_val,
|
| 395 |
library_name_val,
|
| 396 |
library_topic_val,
|
| 397 |
-
progress=gr.Progress(track_tqdm=True),
|
| 398 |
):
|
| 399 |
-
# Recreate the partial function call, but now it can be awaited
|
| 400 |
-
# The actual orchestrate_card_generation is already partially applied with client_manager and response_cache
|
| 401 |
-
# So, we need to get that specific partial object if it's stored, or redefine the partial logic here.
|
| 402 |
-
# For simplicity and clarity, let's assume direct call to orchestrate_card_generation directly here
|
| 403 |
return await orchestrate_card_generation(
|
| 404 |
-
client_manager,
|
| 405 |
-
response_cache,
|
| 406 |
api_key_input_val,
|
| 407 |
subject_val,
|
| 408 |
generation_mode_val,
|
| 409 |
-
|
| 410 |
-
|
| 411 |
model_choice_val,
|
| 412 |
topic_number_val,
|
| 413 |
cards_per_topic_val,
|
|
@@ -416,16 +363,13 @@ def create_ankigen_interface():
|
|
| 416 |
library_name=library_name_val if library_name_val else None,
|
| 417 |
library_topic=library_topic_val if library_topic_val else None,
|
| 418 |
)
|
| 419 |
-
# Expect 3-tuple return (dataframe, total_cards_html, token_usage_html)
|
| 420 |
|
| 421 |
generate_button.click(
|
| 422 |
-
fn=handle_generate_click,
|
| 423 |
inputs=[
|
| 424 |
api_key_input,
|
| 425 |
subject,
|
| 426 |
generation_mode,
|
| 427 |
-
source_text,
|
| 428 |
-
web_crawl_url_input,
|
| 429 |
model_choice,
|
| 430 |
topic_number,
|
| 431 |
cards_per_topic,
|
|
@@ -629,150 +573,8 @@ def create_ankigen_interface():
|
|
| 629 |
preference_prompt,
|
| 630 |
generate_cloze_checkbox,
|
| 631 |
model_choice,
|
| 632 |
-
library_accordion,
|
| 633 |
-
],
|
| 634 |
-
)
|
| 635 |
-
|
| 636 |
-
async def handle_web_crawl_click(
|
| 637 |
-
api_key_val: str,
|
| 638 |
-
url: str,
|
| 639 |
-
max_depth: int,
|
| 640 |
-
req_per_sec: float,
|
| 641 |
-
model: str, # This is the model for LLM processing of crawled content
|
| 642 |
-
include_patterns: str,
|
| 643 |
-
exclude_patterns: str,
|
| 644 |
-
custom_system_prompt: str,
|
| 645 |
-
custom_user_prompt_template: str,
|
| 646 |
-
use_sitemap: bool,
|
| 647 |
-
sitemap_url: str,
|
| 648 |
-
progress=gr.Progress(track_tqdm=True),
|
| 649 |
-
):
|
| 650 |
-
progress(0, desc="Initializing web crawl...")
|
| 651 |
-
yield {
|
| 652 |
-
web_crawl_status_textbox: gr.update(
|
| 653 |
-
value="Initializing web crawl...",
|
| 654 |
-
),
|
| 655 |
-
output: gr.update(value=None), # Clear main output table
|
| 656 |
-
total_cards_html: gr.update(
|
| 657 |
-
visible=False,
|
| 658 |
-
value="<div><b>Total Cards Generated:</b> <span id='total-cards-count'>0</span></div>",
|
| 659 |
-
),
|
| 660 |
-
}
|
| 661 |
-
|
| 662 |
-
if not api_key_val:
|
| 663 |
-
logger.error("API Key is missing for web crawler operation.")
|
| 664 |
-
yield {
|
| 665 |
-
web_crawl_status_textbox: gr.update(
|
| 666 |
-
value="Error: OpenAI API Key is required.",
|
| 667 |
-
),
|
| 668 |
-
}
|
| 669 |
-
return
|
| 670 |
-
try:
|
| 671 |
-
await client_manager.initialize_client(api_key_val)
|
| 672 |
-
except Exception as e:
|
| 673 |
-
logger.error(
|
| 674 |
-
f"Failed to initialize OpenAI client for crawler: {e}",
|
| 675 |
-
exc_info=True,
|
| 676 |
-
)
|
| 677 |
-
yield {
|
| 678 |
-
web_crawl_status_textbox: gr.update(
|
| 679 |
-
value=f"Error: Client init failed: {e!s}",
|
| 680 |
-
),
|
| 681 |
-
}
|
| 682 |
-
return
|
| 683 |
-
|
| 684 |
-
message, cards_list_of_dicts, _ = await crawl_and_generate(
|
| 685 |
-
url=url,
|
| 686 |
-
max_depth=max_depth,
|
| 687 |
-
crawler_requests_per_second=req_per_sec,
|
| 688 |
-
include_patterns=include_patterns,
|
| 689 |
-
exclude_patterns=exclude_patterns,
|
| 690 |
-
model=model,
|
| 691 |
-
export_format_ui="", # No longer used for direct export from crawl_and_generate
|
| 692 |
-
custom_system_prompt=custom_system_prompt,
|
| 693 |
-
custom_user_prompt_template=custom_user_prompt_template,
|
| 694 |
-
use_sitemap=use_sitemap,
|
| 695 |
-
sitemap_url_str=sitemap_url,
|
| 696 |
-
client_manager=client_manager, # Passed from global scope
|
| 697 |
-
progress=progress, # Gradio progress object
|
| 698 |
-
status_textbox=web_crawl_status_textbox, # Specific status textbox for crawl
|
| 699 |
-
)
|
| 700 |
-
|
| 701 |
-
if cards_list_of_dicts:
|
| 702 |
-
try:
|
| 703 |
-
# Convert List[Dict] to Pandas DataFrame for the main output component
|
| 704 |
-
preview_df_value = pd.DataFrame(cards_list_of_dicts)
|
| 705 |
-
# Ensure columns match the main output dataframe
|
| 706 |
-
# The `generate_cards_from_crawled_content` which produces `cards_list_of_dicts`
|
| 707 |
-
# should already format it correctly. If not, mapping is needed here.
|
| 708 |
-
# For now, assume it matches the main table structure expected by `gr.Dataframe(value=example_data)`
|
| 709 |
-
|
| 710 |
-
# Check if columns match example_data, if not, reorder/rename or log warning
|
| 711 |
-
if not preview_df_value.empty:
|
| 712 |
-
expected_cols = example_data.columns.tolist()
|
| 713 |
-
# Basic check, might need more robust mapping if structures differ significantly
|
| 714 |
-
if not all(
|
| 715 |
-
col in preview_df_value.columns for col in expected_cols
|
| 716 |
-
):
|
| 717 |
-
logger.warning(
|
| 718 |
-
"Crawled card data columns mismatch main output, attempting to use available data.",
|
| 719 |
-
)
|
| 720 |
-
# Potentially select only common columns or reindex if necessary
|
| 721 |
-
# For now, we'll pass it as is, Gradio might handle extra/missing cols gracefully or error.
|
| 722 |
-
|
| 723 |
-
num_cards = len(preview_df_value)
|
| 724 |
-
total_cards_update = f"<div><b>Total Cards Prepared from Crawl:</b> <span id='total-cards-count'>{num_cards}</span></div>"
|
| 725 |
-
|
| 726 |
-
yield {
|
| 727 |
-
web_crawl_status_textbox: gr.update(value=message),
|
| 728 |
-
output: gr.update(value=preview_df_value),
|
| 729 |
-
total_cards_html: gr.update(
|
| 730 |
-
visible=True,
|
| 731 |
-
value=total_cards_update,
|
| 732 |
-
),
|
| 733 |
-
}
|
| 734 |
-
except Exception as e:
|
| 735 |
-
logger.error(
|
| 736 |
-
f"Error converting crawled cards to DataFrame: {e}",
|
| 737 |
-
exc_info=True,
|
| 738 |
-
)
|
| 739 |
-
yield {
|
| 740 |
-
web_crawl_status_textbox: gr.update(
|
| 741 |
-
value=f"{message} (Error displaying cards: {e!s})",
|
| 742 |
-
),
|
| 743 |
-
output: gr.update(value=None),
|
| 744 |
-
total_cards_html: gr.update(visible=False),
|
| 745 |
-
}
|
| 746 |
-
else:
|
| 747 |
-
yield {
|
| 748 |
-
web_crawl_status_textbox: gr.update(
|
| 749 |
-
value=message,
|
| 750 |
-
), # Message from crawl_and_generate (e.g. no cards)
|
| 751 |
-
output: gr.update(value=None),
|
| 752 |
-
total_cards_html: gr.update(visible=False),
|
| 753 |
-
}
|
| 754 |
-
|
| 755 |
-
web_crawl_button.click(
|
| 756 |
-
fn=handle_web_crawl_click,
|
| 757 |
-
inputs=[
|
| 758 |
-
api_key_input,
|
| 759 |
-
web_crawl_url_input,
|
| 760 |
-
web_crawl_max_depth_slider,
|
| 761 |
-
web_crawl_req_per_sec_slider,
|
| 762 |
-
web_crawl_model_dropdown, # Model for LLM processing of content
|
| 763 |
-
web_crawl_include_patterns_textbox,
|
| 764 |
-
web_crawl_exclude_patterns_textbox,
|
| 765 |
-
web_crawl_custom_system_prompt,
|
| 766 |
-
web_crawl_custom_user_prompt_template,
|
| 767 |
-
web_crawl_use_sitemap_checkbox,
|
| 768 |
-
web_crawl_sitemap_url_textbox,
|
| 769 |
-
],
|
| 770 |
-
outputs=[
|
| 771 |
-
web_crawl_status_textbox, # Specific status for crawl
|
| 772 |
-
output, # Main output DataFrame
|
| 773 |
-
total_cards_html, # Main total cards display
|
| 774 |
],
|
| 775 |
-
# Removed progress_bar from outputs as it's handled by gr.Progress(track_tqdm=True)
|
| 776 |
)
|
| 777 |
|
| 778 |
logger.info("AnkiGen Gradio interface creation complete.")
|
|
|
|
| 18 |
from ankigen_core.llm_interface import (
|
| 19 |
OpenAIClientManager,
|
| 20 |
) # structured_output_completion is internal to core modules
|
| 21 |
+
from ankigen_core.ui_logic import update_mode_visibility
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
from ankigen_core.utils import (
|
| 23 |
ResponseCache,
|
| 24 |
get_logger,
|
|
|
|
| 155 |
generation_mode = gr.Radio(
|
| 156 |
choices=[
|
| 157 |
("Single Subject", "subject"),
|
|
|
|
|
|
|
|
|
|
| 158 |
],
|
| 159 |
value="subject",
|
| 160 |
label="Generation Mode",
|
| 161 |
info="Choose how you want to generate content",
|
| 162 |
+
visible=False, # Hidden since only one mode exists
|
| 163 |
)
|
| 164 |
with gr.Group() as subject_mode:
|
| 165 |
subject = gr.Textbox(
|
|
|
|
| 170 |
"Auto-fill",
|
| 171 |
variant="secondary",
|
| 172 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
api_key_input = gr.Textbox(
|
| 174 |
label="OpenAI API Key",
|
| 175 |
type="password",
|
|
|
|
| 323 |
inputs=[
|
| 324 |
generation_mode,
|
| 325 |
subject,
|
|
|
|
|
|
|
| 326 |
],
|
| 327 |
outputs=[
|
| 328 |
subject_mode,
|
|
|
|
|
|
|
| 329 |
cards_output,
|
| 330 |
subject,
|
|
|
|
|
|
|
| 331 |
output,
|
| 332 |
total_cards_html,
|
| 333 |
],
|
| 334 |
)
|
| 335 |
|
| 336 |
+
# Define an async wrapper for the orchestrate_card_generation
|
| 337 |
async def handle_generate_click(
|
| 338 |
api_key_input_val,
|
| 339 |
subject_val,
|
| 340 |
generation_mode_val,
|
|
|
|
|
|
|
| 341 |
model_choice_val,
|
| 342 |
topic_number_val,
|
| 343 |
cards_per_topic_val,
|
|
|
|
| 345 |
generate_cloze_checkbox_val,
|
| 346 |
library_name_val,
|
| 347 |
library_topic_val,
|
| 348 |
+
progress=gr.Progress(track_tqdm=True),
|
| 349 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
return await orchestrate_card_generation(
|
| 351 |
+
client_manager,
|
| 352 |
+
response_cache,
|
| 353 |
api_key_input_val,
|
| 354 |
subject_val,
|
| 355 |
generation_mode_val,
|
| 356 |
+
"", # source_text - deprecated
|
| 357 |
+
"", # url_input - deprecated
|
| 358 |
model_choice_val,
|
| 359 |
topic_number_val,
|
| 360 |
cards_per_topic_val,
|
|
|
|
| 363 |
library_name=library_name_val if library_name_val else None,
|
| 364 |
library_topic=library_topic_val if library_topic_val else None,
|
| 365 |
)
|
|
|
|
| 366 |
|
| 367 |
generate_button.click(
|
| 368 |
+
fn=handle_generate_click,
|
| 369 |
inputs=[
|
| 370 |
api_key_input,
|
| 371 |
subject,
|
| 372 |
generation_mode,
|
|
|
|
|
|
|
| 373 |
model_choice,
|
| 374 |
topic_number,
|
| 375 |
cards_per_topic,
|
|
|
|
| 573 |
preference_prompt,
|
| 574 |
generate_cloze_checkbox,
|
| 575 |
model_choice,
|
| 576 |
+
library_accordion,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 577 |
],
|
|
|
|
| 578 |
)
|
| 579 |
|
| 580 |
logger.info("AnkiGen Gradio interface creation complete.")
|