Spaces:

hhschu
/

elna

Sleeping

David Chu commited on Jun 12

Commit

21d76a7

unverified ·

1 Parent(s): d2f1b05

feat: mitigate citation hallucination

Using PydanticAI's output function and raising a ModelRetry will
make the LLM regenerate the output (by default 1 retry)
if there are nonexistent source IDs.

Files changed (3) hide show

app/agent.py +26 -26
app/main.py +2 -2
main.py +6 -8

app/agent.py CHANGED Viewed

@@ -1,8 +1,7 @@
 from pathlib import Path
-import logfire
 from pydantic import BaseModel, Field
-from pydantic_ai import Agent
 from pydantic_ai.messages import (
     ModelMessage,
     ModelRequest,
@@ -28,24 +27,6 @@ class Statement(BaseModel):
     )
-model = GoogleModel("gemini-2.5-flash-preview-05-20")
-settings = GoogleModelSettings(
-    google_thinking_config={"thinking_budget": 2048, "include_thoughts": True},
-)
-agent = Agent(
-    model=model,
-    name="elna",
-    model_settings=settings,
-    output_type=list[Statement],
-    system_prompt=(Path(__file__).parent / "system_instruction.txt").read_text(),
-    tools=[
-        dailymed.find_drug_set_ids,
-        dailymed.find_drug_instruction,
-        literature.search_medical_literature,
-    ],
-)
 def get_context(messages: list[ModelMessage]) -> Context:
     thoughts: list[str] = []
     sources: dict[str, dict] = {}
@@ -67,18 +48,19 @@ def get_context(messages: list[ModelMessage]) -> Context:
     return Context(thoughts=thoughts, sources=sources)
-def respond(query: str) -> models.Statements:
-    result = agent.run_sync(query)
-    context = get_context(result.all_messages())
     statements = []
-    for statement in result.output:
         sources = []
         for source_id in statement.sources or []:
             try:
                 sources.append(context.sources[source_id])
-            except KeyError:
-                logfire.warning(f"citation hallucination '{source_id}'")
         statements.append({"text": statement.text, "sources": sources})
     return models.Statements.model_validate(
@@ -87,3 +69,21 @@ def respond(query: str) -> models.Statements:
             "thoughts": "\n\n".join(context.thoughts),
         }
     )

 from pathlib import Path
 from pydantic import BaseModel, Field
+from pydantic_ai import Agent, ModelRetry, RunContext
 from pydantic_ai.messages import (
     ModelMessage,
     ModelRequest,
     )
 def get_context(messages: list[ModelMessage]) -> Context:
     thoughts: list[str] = []
     sources: dict[str, dict] = {}
     return Context(thoughts=thoughts, sources=sources)
+def create_response(ctx: RunContext, output: list[Statement]) -> models.Statements:
+    context = get_context(ctx.messages)
     statements = []
+    for statement in output:
         sources = []
         for source_id in statement.sources or []:
             try:
                 sources.append(context.sources[source_id])
+            except KeyError as err:
+                raise ModelRetry(
+                    f"Source ID '{source_id}' not found in literature."
+                ) from err
         statements.append({"text": statement.text, "sources": sources})
     return models.Statements.model_validate(
             "thoughts": "\n\n".join(context.thoughts),
         }
     )
+model = GoogleModel("gemini-2.5-flash-preview-05-20")
+settings = GoogleModelSettings(
+    google_thinking_config={"thinking_budget": 2048, "include_thoughts": True},
+)
+agent = Agent(
+    model=model,
+    name="elna",
+    model_settings=settings,
+    output_type=create_response,
+    system_prompt=(Path(__file__).parent / "system_instruction.txt").read_text(),
+    tools=[
+        dailymed.find_drug_set_ids,
+        dailymed.find_drug_instruction,
+        literature.search_medical_literature,
+    ],
+)

app/main.py CHANGED Viewed

@@ -19,5 +19,5 @@ def health_check():
 @app.get("/ask", response_model=models.Statements)
-def ask(query: str):
-    return agent.respond(query)

 @app.get("/ask", response_model=models.Statements)
+async def ask(query: str):
+    return await agent.agent.run(query)

main.py CHANGED Viewed

@@ -9,11 +9,11 @@ logfire.configure(
 logfire.instrument_pydantic_ai()
-def format_output(statements: models.Statements) -> tuple[str, str]:
     sentences = []
     citations = {}
-    for statement in statements.statements:
         sentence = statement.text
         if sentence.startswith(("*", "-")):
@@ -33,9 +33,7 @@ def format_output(statements: models.Statements) -> tuple[str, str]:
     answer = " ".join(sentences)
     footnotes = "\n".join(f"[^{id}]: {citation}" for citation, id in citations.items())
-    thought = statements.thoughts or ""
-    return f"{answer}\n\n{footnotes}", thought
 def main():
@@ -46,10 +44,10 @@ def main():
         if submit:
             with st.spinner("Thinking...", show_time=True):
-                output = agent.respond(query)
-            answer, thoughts = format_output(output)
             with st.expander("Thinking Process"):
-                st.markdown(thoughts)
             st.markdown(answer)

 logfire.instrument_pydantic_ai()
+def format_output(statements: list[models.Statement]) -> str:
     sentences = []
     citations = {}
+    for statement in statements:
         sentence = statement.text
         if sentence.startswith(("*", "-")):
     answer = " ".join(sentences)
     footnotes = "\n".join(f"[^{id}]: {citation}" for citation, id in citations.items())
+    return f"{answer}\n\n{footnotes}"
 def main():
         if submit:
             with st.spinner("Thinking...", show_time=True):
+                output = agent.agent.run_sync(query).output
+                answer = format_output(output.statements)
             with st.expander("Thinking Process"):
+                st.markdown(output.thoughts)
             st.markdown(answer)