|
|
|
|
|
import gradio as gr |
|
import pandas as pd |
|
|
|
|
|
from ankigen_core.utils import get_logger, ResponseCache, fetch_webpage_text |
|
from ankigen_core.llm_interface import OpenAIClientManager, structured_output_completion |
|
from ankigen_core.models import ( |
|
Card, |
|
CardFront, |
|
CardBack, |
|
) |
|
|
|
logger = get_logger() |
|
|
|
|
|
AVAILABLE_MODELS = [ |
|
{ |
|
"value": "gpt-4.1", |
|
"label": "gpt-4.1 (Best Quality)", |
|
"description": "Highest quality, slower generation", |
|
}, |
|
{ |
|
"value": "gpt-4.1-nano", |
|
"label": "gpt-4.1 Nano (Fast & Efficient)", |
|
"description": "Optimized for speed and lower cost", |
|
}, |
|
] |
|
|
|
GENERATION_MODES = [ |
|
{ |
|
"value": "subject", |
|
"label": "Single Subject", |
|
"description": "Generate cards for a specific topic", |
|
}, |
|
{ |
|
"value": "path", |
|
"label": "Learning Path", |
|
"description": "Break down a job description or learning goal into subjects", |
|
}, |
|
{ |
|
"value": "text", |
|
"label": "From Text", |
|
"description": "Generate cards from provided text", |
|
}, |
|
{ |
|
"value": "web", |
|
"label": "From Web", |
|
"description": "Generate cards from a web page URL", |
|
}, |
|
] |
|
|
|
|
|
|
|
|
|
def generate_cards_batch( |
|
openai_client, |
|
cache: ResponseCache, |
|
model: str, |
|
topic: str, |
|
num_cards: int, |
|
system_prompt: str, |
|
generate_cloze: bool = False, |
|
batch_size: int = 3, |
|
): |
|
"""Generate a batch of cards for a topic, potentially including cloze deletions""" |
|
|
|
cloze_instruction = "" |
|
if generate_cloze: |
|
cloze_instruction = """ |
|
Where appropriate, generate Cloze deletion cards. |
|
- For Cloze cards, set "card_type" to "cloze". |
|
- Format the question field using Anki's cloze syntax (e.g., "The capital of France is {{c1::Paris}}."). |
|
- The "answer" field should contain the full, non-cloze text or specific context for the cloze. |
|
- For standard question/answer cards, set "card_type" to "basic". |
|
""" |
|
|
|
cards_prompt = f""" |
|
Generate {num_cards} flashcards for the topic: {topic} |
|
{cloze_instruction} |
|
Return your response as a JSON object with the following structure: |
|
{{ |
|
"cards": [ |
|
{{ |
|
"card_type": "basic or cloze", |
|
"front": {{ |
|
"question": "question text (potentially with {{{{c1::cloze syntax}}}})" |
|
}}, |
|
"back": {{ |
|
"answer": "concise answer or full text for cloze", |
|
"explanation": "detailed explanation", |
|
"example": "practical example" |
|
}}, |
|
"metadata": {{ |
|
"prerequisites": ["list", "of", "prerequisites"], |
|
"learning_outcomes": ["list", "of", "outcomes"], |
|
"misconceptions": ["list", "of", "misconceptions"], |
|
"difficulty": "beginner/intermediate/advanced" |
|
}} |
|
}} |
|
// ... more cards |
|
] |
|
}} |
|
""" |
|
|
|
try: |
|
logger.info( |
|
f"Generating card batch for {topic}, Cloze enabled: {generate_cloze}" |
|
) |
|
|
|
response = structured_output_completion( |
|
openai_client=openai_client, |
|
model=model, |
|
response_format={"type": "json_object"}, |
|
system_prompt=system_prompt, |
|
user_prompt=cards_prompt, |
|
cache=cache, |
|
) |
|
|
|
if not response or "cards" not in response: |
|
logger.error("Invalid cards response format") |
|
raise ValueError("Failed to generate cards. Please try again.") |
|
|
|
cards_list = [] |
|
for card_data in response["cards"]: |
|
if "front" not in card_data or "back" not in card_data: |
|
logger.warning( |
|
f"Skipping card due to missing front/back data: {card_data}" |
|
) |
|
continue |
|
if "question" not in card_data["front"]: |
|
logger.warning(f"Skipping card due to missing question: {card_data}") |
|
continue |
|
if ( |
|
"answer" not in card_data["back"] |
|
or "explanation" not in card_data["back"] |
|
or "example" not in card_data["back"] |
|
): |
|
logger.warning( |
|
f"Skipping card due to missing answer/explanation/example: {card_data}" |
|
) |
|
continue |
|
|
|
|
|
card = Card( |
|
card_type=card_data.get("card_type", "basic"), |
|
front=CardFront(**card_data["front"]), |
|
back=CardBack(**card_data["back"]), |
|
metadata=card_data.get("metadata", {}), |
|
) |
|
cards_list.append(card) |
|
|
|
return cards_list |
|
|
|
except Exception as e: |
|
logger.error( |
|
f"Failed to generate cards batch for {topic}: {str(e)}", exc_info=True |
|
) |
|
raise |
|
|
|
|
|
def orchestrate_card_generation( |
|
client_manager: OpenAIClientManager, |
|
cache: ResponseCache, |
|
|
|
api_key_input: str, |
|
subject: str, |
|
generation_mode: str, |
|
source_text: str, |
|
url_input: str, |
|
model_name: str, |
|
topic_number: int, |
|
cards_per_topic: int, |
|
preference_prompt: str, |
|
generate_cloze: bool, |
|
): |
|
"""Orchestrates the card generation process based on UI inputs.""" |
|
|
|
logger.info(f"Starting card generation orchestration in {generation_mode} mode") |
|
logger.debug( |
|
f"Parameters: mode={generation_mode}, topics={topic_number}, cards_per_topic={cards_per_topic}, cloze={generate_cloze}" |
|
) |
|
|
|
|
|
if not api_key_input: |
|
logger.warning("No API key provided to orchestrator") |
|
gr.Error("OpenAI API key is required") |
|
return pd.DataFrame(columns=get_dataframe_columns()), "API key is required.", 0 |
|
|
|
|
|
try: |
|
|
|
client_manager.initialize_client(api_key_input) |
|
openai_client = client_manager.get_client() |
|
except (ValueError, RuntimeError, Exception) as e: |
|
logger.error(f"Client initialization failed in orchestrator: {e}") |
|
gr.Error(f"OpenAI Client Error: {e}") |
|
return ( |
|
pd.DataFrame(columns=get_dataframe_columns()), |
|
f"OpenAI Client Error: {e}", |
|
0, |
|
) |
|
|
|
model = model_name |
|
flattened_data = [] |
|
total_cards_generated = 0 |
|
|
|
|
|
|
|
|
|
|
|
try: |
|
page_text_for_generation = "" |
|
|
|
|
|
if generation_mode == "web": |
|
logger.info("Orchestrator: Web Mode") |
|
if not url_input or not url_input.strip(): |
|
gr.Error("URL is required for 'From Web' mode.") |
|
return ( |
|
pd.DataFrame(columns=get_dataframe_columns()), |
|
"URL is required.", |
|
0, |
|
) |
|
|
|
|
|
gr.Info(f"🕸️ Fetching content from {url_input}...") |
|
try: |
|
page_text_for_generation = fetch_webpage_text(url_input) |
|
if ( |
|
not page_text_for_generation |
|
): |
|
gr.Warning( |
|
f"Could not extract meaningful text content from {url_input}. Please check the page or try another URL." |
|
) |
|
|
|
return ( |
|
pd.DataFrame(columns=get_dataframe_columns()), |
|
"No meaningful text extracted from URL.", |
|
0, |
|
) |
|
|
|
gr.Info( |
|
f"✅ Successfully fetched text (approx. {len(page_text_for_generation)} chars). Starting AI generation..." |
|
) |
|
except (ConnectionError, ValueError, RuntimeError) as e: |
|
logger.error(f"Failed to fetch or process URL {url_input}: {e}") |
|
gr.Error(f"Failed to get content from URL: {e}") |
|
return ( |
|
pd.DataFrame(columns=get_dataframe_columns()), |
|
"Failed to get content from URL.", |
|
0, |
|
) |
|
except Exception as e: |
|
logger.error( |
|
f"Unexpected error fetching URL {url_input}: {e}", exc_info=True |
|
) |
|
gr.Error("An unexpected error occurred fetching the URL.") |
|
return ( |
|
pd.DataFrame(columns=get_dataframe_columns()), |
|
"Unexpected error fetching URL.", |
|
0, |
|
) |
|
|
|
|
|
elif generation_mode == "text": |
|
logger.info("Orchestrator: Text Input Mode") |
|
if not source_text or not source_text.strip(): |
|
gr.Error("Source text is required for 'From Text' mode.") |
|
return ( |
|
pd.DataFrame(columns=get_dataframe_columns()), |
|
"Source text is required.", |
|
0, |
|
) |
|
page_text_for_generation = source_text |
|
gr.Info("🚀 Starting card generation from text...") |
|
|
|
|
|
if generation_mode == "text" or generation_mode == "web": |
|
topic_name = ( |
|
"From Web Content" if generation_mode == "web" else "From Text Input" |
|
) |
|
logger.info(f"Generating cards directly from content: {topic_name}") |
|
|
|
|
|
text_system_prompt = f""" |
|
You are an expert educator creating flashcards from provided text. |
|
Generate {cards_per_topic} clear, concise flashcards based *only* on the text given. |
|
Focus on key concepts, definitions, facts, or processes. |
|
Adhere to the user's learning preferences: {preference_prompt} |
|
Use the specified JSON output format. |
|
Format code examples with triple backticks (```). |
|
""" |
|
json_structure_prompt = get_card_json_structure_prompt() |
|
cloze_instruction = get_cloze_instruction(generate_cloze) |
|
|
|
text_user_prompt = f""" |
|
Generate {cards_per_topic} flashcards based *only* on the following text: |
|
--- TEXT START --- |
|
{page_text_for_generation} |
|
--- TEXT END --- |
|
{cloze_instruction} |
|
{json_structure_prompt} |
|
""" |
|
|
|
|
|
response = structured_output_completion( |
|
openai_client=openai_client, |
|
model=model, |
|
response_format={"type": "json_object"}, |
|
system_prompt=text_system_prompt, |
|
user_prompt=text_user_prompt, |
|
cache=cache, |
|
) |
|
|
|
if not response or "cards" not in response: |
|
logger.error("Invalid cards response format from text/web generation.") |
|
gr.Error("Failed to generate cards from content. Please try again.") |
|
return ( |
|
pd.DataFrame(columns=get_dataframe_columns()), |
|
"Failed to generate cards from content.", |
|
0, |
|
) |
|
|
|
cards_data = response["cards"] |
|
card_list = process_raw_cards_data(cards_data) |
|
|
|
flattened_data.extend( |
|
format_cards_for_dataframe(card_list, topic_name, start_index=1) |
|
) |
|
total_cards_generated = len(flattened_data) |
|
gr.Info( |
|
f"✅ Generated {total_cards_generated} cards from the provided content." |
|
) |
|
|
|
|
|
elif generation_mode == "subject": |
|
logger.info(f"Orchestrator: Subject Mode for {subject}") |
|
if not subject or not subject.strip(): |
|
gr.Error("Subject is required for 'Single Subject' mode.") |
|
return ( |
|
pd.DataFrame(columns=get_dataframe_columns()), |
|
"Subject is required.", |
|
0, |
|
) |
|
|
|
gr.Info("🚀 Starting card generation for subject...") |
|
|
|
system_prompt = f""" |
|
You are an expert educator in {subject}. Create an optimized learning sequence. |
|
Break down {subject} into {topic_number} logical concepts/topics, ordered by difficulty. |
|
Keep in mind the user's preferences: {preference_prompt} |
|
""" |
|
topic_prompt = f""" |
|
Generate the top {topic_number} important subjects/topics to know about {subject} |
|
ordered by ascending difficulty (beginner to advanced). |
|
Return your response as a JSON object: {{"topics": [{{"name": "topic name", "difficulty": "beginner/intermediate/advanced", "description": "brief description"}}]}} |
|
""" |
|
|
|
logger.info("Generating topics...") |
|
topics_response = structured_output_completion( |
|
openai_client=openai_client, |
|
model=model, |
|
response_format={"type": "json_object"}, |
|
system_prompt=system_prompt, |
|
user_prompt=topic_prompt, |
|
cache=cache, |
|
) |
|
|
|
if not topics_response or "topics" not in topics_response: |
|
logger.error("Invalid topics response format") |
|
gr.Error("Failed to generate topics. Please try again.") |
|
return ( |
|
pd.DataFrame(columns=get_dataframe_columns()), |
|
"Failed to generate topics.", |
|
0, |
|
) |
|
|
|
topics = topics_response["topics"] |
|
gr.Info( |
|
f"✨ Generated {len(topics)} topics successfully! Now generating cards..." |
|
) |
|
|
|
|
|
card_system_prompt = f""" |
|
You are an expert educator in {subject}, creating flashcards for specific topics. |
|
Focus on clarity, accuracy, and adherence to the user's preferences: {preference_prompt} |
|
Format code examples with triple backticks (```). |
|
Use the specified JSON output format. |
|
""" |
|
|
|
|
|
for i, topic_info in enumerate(topics): |
|
topic_name = topic_info.get("name", f"Topic {i + 1}") |
|
logger.info(f"Generating cards for topic: {topic_name}") |
|
try: |
|
cards = generate_cards_batch( |
|
openai_client=openai_client, |
|
cache=cache, |
|
model=model, |
|
topic=topic_name, |
|
num_cards=cards_per_topic, |
|
system_prompt=card_system_prompt, |
|
generate_cloze=generate_cloze, |
|
) |
|
|
|
if cards: |
|
flattened_data.extend( |
|
format_cards_for_dataframe(cards, topic_name, topic_index=i) |
|
) |
|
total_cards_generated += len(cards) |
|
gr.Info( |
|
f"✅ Generated {len(cards)} cards for {topic_name} (Total: {total_cards_generated})" |
|
) |
|
else: |
|
gr.Warning( |
|
f"⚠️ No cards generated for topic '{topic_name}' (API might have returned empty list)." |
|
) |
|
|
|
except Exception as e: |
|
logger.error( |
|
f"Failed during card generation for topic {topic_name}: {e}", |
|
exc_info=True, |
|
) |
|
gr.Warning( |
|
f"Failed to generate cards for '{topic_name}'. Skipping." |
|
) |
|
continue |
|
else: |
|
logger.error(f"Invalid generation mode received: {generation_mode}") |
|
gr.Error(f"Unsupported generation mode selected: {generation_mode}") |
|
return pd.DataFrame(columns=get_dataframe_columns()), "Unsupported mode.", 0 |
|
|
|
|
|
logger.info( |
|
f"Card generation orchestration complete. Total cards: {total_cards_generated}" |
|
) |
|
final_html = f""" |
|
<div style="text-align: center"> |
|
<p>✅ Generation complete!</p> |
|
<p>Total cards generated: {total_cards_generated}</p> |
|
</div> |
|
""" |
|
|
|
|
|
df = pd.DataFrame(flattened_data, columns=get_dataframe_columns()) |
|
return df, final_html, total_cards_generated |
|
|
|
except gr.Error as e: |
|
logger.warning(f"A Gradio error was raised and caught: {e}") |
|
raise |
|
except Exception as e: |
|
logger.error( |
|
f"Unexpected error during card generation orchestration: {e}", exc_info=True |
|
) |
|
gr.Error(f"An unexpected error occurred: {e}") |
|
return pd.DataFrame(columns=get_dataframe_columns()), "Unexpected error.", 0 |
|
|
|
|
|
|
|
|
|
|
|
def get_cloze_instruction(generate_cloze: bool) -> str: |
|
if not generate_cloze: |
|
return "" |
|
return """ |
|
Where appropriate, generate Cloze deletion cards. |
|
- For Cloze cards, set "card_type" to "cloze". |
|
- Format the question field using Anki's cloze syntax (e.g., "The capital of France is {{c1::Paris}}."). |
|
- The "answer" field should contain the full, non-cloze text or specific context for the cloze. |
|
- For standard question/answer cards, set "card_type" to "basic". |
|
""" |
|
|
|
|
|
def get_card_json_structure_prompt() -> str: |
|
return """ |
|
Return your response as a JSON object with the following structure: |
|
{{ |
|
"cards": [ |
|
{{ |
|
"card_type": "basic or cloze", |
|
"front": {{ |
|
"question": "question text (potentially with {{{{c1::cloze syntax}}}})" |
|
}}, |
|
"back": {{ |
|
"answer": "concise answer or full text for cloze", |
|
"explanation": "detailed explanation", |
|
"example": "practical example" |
|
}}, |
|
"metadata": {{ |
|
"prerequisites": ["list", "of", "prerequisites"], |
|
"learning_outcomes": ["list", "of", "outcomes"], |
|
"misconceptions": ["list", "of", "misconceptions"], |
|
"difficulty": "beginner/intermediate/advanced" |
|
}} |
|
}} |
|
// ... more cards |
|
] |
|
}} |
|
""" |
|
|
|
|
|
def process_raw_cards_data(cards_data: list) -> list[Card]: |
|
"""Processes raw card data dicts into a list of Card Pydantic models.""" |
|
cards_list = [] |
|
for card_data in cards_data: |
|
|
|
if ( |
|
not isinstance(card_data, dict) |
|
or "front" not in card_data |
|
or "back" not in card_data |
|
): |
|
logger.warning(f"Skipping malformed card data: {card_data}") |
|
continue |
|
try: |
|
card = Card( |
|
card_type=card_data.get("card_type", "basic"), |
|
front=CardFront(**card_data["front"]), |
|
back=CardBack(**card_data["back"]), |
|
metadata=card_data.get("metadata", {}), |
|
) |
|
cards_list.append(card) |
|
except Exception as e: |
|
logger.warning( |
|
f"Skipping card due to Pydantic validation error: {e} | Data: {card_data}" |
|
) |
|
return cards_list |
|
|
|
|
|
def format_cards_for_dataframe( |
|
cards: list[Card], topic_name: str, topic_index: int = 0, start_index: int = 1 |
|
) -> list: |
|
"""Formats a list of Card objects into a list of lists for the DataFrame.""" |
|
formatted_rows = [] |
|
for card_idx, card in enumerate(cards, start=start_index): |
|
index_str = ( |
|
f"{topic_index + 1}.{card_idx}" if topic_index >= 0 else f"{card_idx}" |
|
) |
|
metadata = card.metadata or {} |
|
row = [ |
|
index_str, |
|
topic_name, |
|
card.card_type, |
|
card.front.question, |
|
card.back.answer, |
|
card.back.explanation, |
|
card.back.example, |
|
metadata.get("prerequisites", []), |
|
metadata.get("learning_outcomes", []), |
|
metadata.get("misconceptions", []), |
|
metadata.get("difficulty", "beginner"), |
|
] |
|
formatted_rows.append(row) |
|
return formatted_rows |
|
|
|
|
|
def get_dataframe_columns() -> list[str]: |
|
"""Returns the standard list of columns for the results DataFrame.""" |
|
return [ |
|
"Index", |
|
"Topic", |
|
"Card_Type", |
|
"Question", |
|
"Answer", |
|
"Explanation", |
|
"Example", |
|
"Prerequisites", |
|
"Learning_Outcomes", |
|
"Common_Misconceptions", |
|
"Difficulty", |
|
] |
|
|