Spaces:

brickfrog
/

ankigen

Running

App Files Files Community

brickfrog commited on about 1 month ago

Commit

07fe6c3

verified ·

1 Parent(s): c7abff0

Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

ankigen_core/card_generator.py +61 -4
app.py +7 -0
tests/integration/test_app_interactions.py +1 -0
tests/unit/test_card_generator.py +36 -0

ankigen_core/card_generator.py CHANGED Viewed

@@ -176,6 +176,50 @@ async def generate_cards_batch(
         raise  # Re-raise for the main function to handle
 async def orchestrate_card_generation(  # MODIFIED: Added async
     client_manager: OpenAIClientManager,  # Expect the manager
     cache: ResponseCache,  # Expect the cache instance
@@ -190,6 +234,7 @@ async def orchestrate_card_generation(  # MODIFIED: Added async
     cards_per_topic: int,
     preference_prompt: str,
     generate_cloze: bool,
 ):
     """Orchestrates the card generation process based on UI inputs."""
@@ -490,6 +535,10 @@ async def orchestrate_card_generation(  # MODIFIED: Added async
                         "structured_output_completion returned None, defaulting to empty card list for text mode."
                     )
                 processed_cards = process_raw_cards_data(raw_cards)
                 formatted_cards = format_cards_for_dataframe(
                     processed_cards, topic_name=source_text_display_name, start_index=1
                 )
@@ -529,7 +578,9 @@ async def orchestrate_card_generation(  # MODIFIED: Added async
         # progress_total_batches = len(topics_for_generation)
         # current_batch_num = 0
-        for topic_info in (
             topics_for_generation
         ):  # This loop will be skipped if text_mode populated flattened_data directly
             # current_batch_num += 1
@@ -551,6 +602,10 @@ async def orchestrate_card_generation(  # MODIFIED: Added async
                     system_prompt,  # System prompt defined above based on mode
                     generate_cloze,
                 )
                 # Assign topic name to cards before formatting for DataFrame
                 formatted_batch = format_cards_for_dataframe(
                     batch_cards,
@@ -758,9 +813,11 @@ def format_cards_for_dataframe(
         difficulty_str = strip_html_tags(str(difficulty))
         formatted_card = {
-            "Index": f"{topic_index}.{actual_index}"
-            if topic_index > 0
-            else str(actual_index),
             "Topic": strip_html_tags(topic_name),  # Ensure topic is also plain
             "Card_Type": strip_html_tags(card_type),
             "Question": question,  # Already stripped during Card object creation

         raise  # Re-raise for the main function to handle
+async def judge_card(
+    openai_client,
+    cache: ResponseCache,
+    model: str,
+    card: Card,
+) -> bool:
+    """Use an LLM to validate a single card."""
+    system_prompt = (
+        "You review flashcards and decide if the question is clear and useful. "
+        'Respond with a JSON object like {"is_valid": true}.'
+    )
+    user_prompt = f"Question: {card.front.question}\nAnswer: {card.back.answer}"
+    try:
+        result = await structured_output_completion(
+            openai_client=openai_client,
+            model=model,
+            response_format={"type": "json_object"},
+            system_prompt=system_prompt,
+            user_prompt=user_prompt,
+            cache=cache,
+        )
+        if isinstance(result, dict):
+            return bool(result.get("is_valid", True))
+    except Exception as e:  # pragma: no cover - network or parse errors
+        logger.warning(f"LLM judge failed for card '{card.front.question}': {e}")
+    return True
+async def judge_cards(
+    openai_client,
+    cache: ResponseCache,
+    model: str,
+    cards: List[Card],
+) -> List[Card]:
+    """Filter cards using the LLM judge."""
+    validated: List[Card] = []
+    for card in cards:
+        if await judge_card(openai_client, cache, model, card):
+            validated.append(card)
+        else:
+            logger.info(f"Card rejected by judge: {card.front.question}")
+    return validated
 async def orchestrate_card_generation(  # MODIFIED: Added async
     client_manager: OpenAIClientManager,  # Expect the manager
     cache: ResponseCache,  # Expect the cache instance
     cards_per_topic: int,
     preference_prompt: str,
     generate_cloze: bool,
+    use_llm_judge: bool = False,
 ):
     """Orchestrates the card generation process based on UI inputs."""
                         "structured_output_completion returned None, defaulting to empty card list for text mode."
                     )
                 processed_cards = process_raw_cards_data(raw_cards)
+                if use_llm_judge and processed_cards:
+                    processed_cards = await judge_cards(
+                        openai_client, cache, model, processed_cards
+                    )
                 formatted_cards = format_cards_for_dataframe(
                     processed_cards, topic_name=source_text_display_name, start_index=1
                 )
         # progress_total_batches = len(topics_for_generation)
         # current_batch_num = 0
+        for (
+            topic_info
+        ) in (
             topics_for_generation
         ):  # This loop will be skipped if text_mode populated flattened_data directly
             # current_batch_num += 1
                     system_prompt,  # System prompt defined above based on mode
                     generate_cloze,
                 )
+                if use_llm_judge and batch_cards:
+                    batch_cards = await judge_cards(
+                        openai_client, cache, model, batch_cards
+                    )
                 # Assign topic name to cards before formatting for DataFrame
                 formatted_batch = format_cards_for_dataframe(
                     batch_cards,
         difficulty_str = strip_html_tags(str(difficulty))
         formatted_card = {
+            "Index": (
+                f"{topic_index}.{actual_index}"
+                if topic_index > 0
+                else str(actual_index)
+            ),
             "Topic": strip_html_tags(topic_name),  # Ensure topic is also plain
             "Card_Type": strip_html_tags(card_type),
             "Question": question,  # Already stripped during Card object creation

app.py CHANGED Viewed

@@ -295,6 +295,10 @@ def create_ankigen_interface():
                                 label="Generate Cloze Cards (Experimental)",
                                 value=False,
                             )
             generate_button = gr.Button("Generate Cards", variant="primary")
@@ -490,6 +494,7 @@ def create_ankigen_interface():
                 cards_per_topic_val,
                 preference_prompt_val,
                 generate_cloze_checkbox_val,
                 progress=gr.Progress(track_tqdm=True),  # Added progress tracker
             ):
                 # Recreate the partial function call, but now it can be awaited
@@ -509,6 +514,7 @@ def create_ankigen_interface():
                     cards_per_topic_val,
                     preference_prompt_val,
                     generate_cloze_checkbox_val,
                 )
             generate_button.click(
@@ -524,6 +530,7 @@ def create_ankigen_interface():
                     cards_per_topic,
                     preference_prompt,
                     generate_cloze_checkbox,
                 ],
                 outputs=[output, total_cards_html],
                 show_progress="full",

                                 label="Generate Cloze Cards (Experimental)",
                                 value=False,
                             )
+                            llm_judge_checkbox = gr.Checkbox(
+                                label="Use LLM Judge",
+                                value=False,
+                            )
             generate_button = gr.Button("Generate Cards", variant="primary")
                 cards_per_topic_val,
                 preference_prompt_val,
                 generate_cloze_checkbox_val,
+                llm_judge_checkbox_val,
                 progress=gr.Progress(track_tqdm=True),  # Added progress tracker
             ):
                 # Recreate the partial function call, but now it can be awaited
                     cards_per_topic_val,
                     preference_prompt_val,
                     generate_cloze_checkbox_val,
+                    llm_judge_checkbox_val,
                 )
             generate_button.click(
                     cards_per_topic,
                     preference_prompt,
                     generate_cloze_checkbox,
+                    llm_judge_checkbox,
                 ],
                 outputs=[output, total_cards_html],
                 show_progress="full",

tests/integration/test_app_interactions.py CHANGED Viewed

@@ -393,6 +393,7 @@ def get_orchestrator_mock_inputs(generation_mode="subject", api_key="sk-test"):
         "cards_per_topic": 3,  # For subject mode / text mode / web mode
         "preference_prompt": "Test preferences",
         "generate_cloze": False,
     }

         "cards_per_topic": 3,  # For subject mode / text mode / web mode
         "preference_prompt": "Test preferences",
         "generate_cloze": False,
+        "use_llm_judge": False,
     }

tests/unit/test_card_generator.py CHANGED Viewed

@@ -203,6 +203,7 @@ def base_orchestrator_args(api_key="valid_key", **kwargs):
         "cards_per_topic": 5,  # Corresponds to num_cards in generate_cards_batch
         "preference_prompt": "Pref prompt",  # Corresponds to system_prompt
         "generate_cloze": False,
     }
     base_args.update(kwargs)  # Update with any provided kwargs
     return base_args
@@ -276,6 +277,41 @@ async def test_orchestrate_subject_mode(
     # assert status.strip() == expected_html_status.strip()
 @patch("ankigen_core.card_generator.structured_output_completion")
 @patch("ankigen_core.card_generator.generate_cards_batch")
 async def test_orchestrate_text_mode(

         "cards_per_topic": 5,  # Corresponds to num_cards in generate_cards_batch
         "preference_prompt": "Pref prompt",  # Corresponds to system_prompt
         "generate_cloze": False,
+        "use_llm_judge": False,
     }
     base_args.update(kwargs)  # Update with any provided kwargs
     return base_args
     # assert status.strip() == expected_html_status.strip()
+@patch("ankigen_core.card_generator.judge_cards")
+@patch("ankigen_core.card_generator.structured_output_completion")
+@patch("ankigen_core.card_generator.generate_cards_batch")
+async def test_orchestrate_subject_mode_with_judge(
+    mock_gcb,
+    mock_soc,
+    mock_judge,
+    mock_client_manager_fixture,
+    mock_response_cache_fixture,
+):
+    """Test orchestrate_card_generation calls judge_cards when enabled."""
+    manager, client = mock_client_manager_fixture
+    cache = mock_response_cache_fixture
+    args = base_orchestrator_args(generation_mode="subject", use_llm_judge=True)
+    mock_soc.return_value = {
+        "topics": [{"name": "T1", "difficulty": "d", "description": "d"}]
+    }
+    sample_card = Card(
+        front=CardFront(question="Q1"),
+        back=CardBack(answer="A1", explanation="E1", example="Ex1"),
+    )
+    mock_gcb.return_value = [sample_card]
+    mock_judge.return_value = [sample_card]
+    with patch("gradio.Info"), patch("gradio.Warning"):
+        await card_generator.orchestrate_card_generation(
+            client_manager=manager,
+            cache=cache,
+            **args,
+        )
+    mock_judge.assert_called_once_with(client, cache, args["model_name"], [sample_card])
 @patch("ankigen_core.card_generator.structured_output_completion")
 @patch("ankigen_core.card_generator.generate_cards_batch")
 async def test_orchestrate_text_mode(