Spaces:

hhschu
/

elna

Sleeping

David Chu commited on Jun 11

Commit

0f4b0ea

unverified ·

1 Parent(s): 4782a9d

feat: reference citations by a short id

Title + URL citations significantly increase output length,
especially when reused in tables, extending generation time.
So I switch to give each reference a short ID.

Files changed (5) hide show

app/agent.py +20 -17
app/system_instruction.txt +16 -87
app/tools/dailymed.py +3 -0
app/tools/literature.py +2 -0
app/tools/utils.py +23 -0

app/agent.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import re
 from pathlib import Path
@@ -23,8 +24,7 @@ RESPONSE_FORMAT = """\
 Return in JSON matching this specification:
-Source = { "title": string, "url": str }
-Statement = { "text": string, "sources": array<Source> }
 Return: array<Statement>
 Do not return the response in a markdown code block.
@@ -37,7 +37,7 @@ SOURCE_TOOL_NAMES = {
 def hydrate_sources(
-    statements: models.Statements, calling_history: list[types.Content]
 ) -> models.Statements:
     sources = {}
     for call in calling_history:
@@ -48,16 +48,21 @@ def hydrate_sources(
                 and func.response
             ):
                 for source in func.response.get("result", []):
-                    sources[source["url"]] = source
-    for statement in statements.statements:
-        if statement.sources:
-            statement.sources = [
-                models.Source.model_validate(sources[source.url])
-                for source in statement.sources
-            ]
-    return statements
 def validate_response(response: types.GenerateContentResponse) -> models.Statements:
@@ -76,17 +81,15 @@ def validate_response(response: types.GenerateContentResponse) -> models.Stateme
         text = match.group(1).strip()
     try:
-        statements = models.Statements.model_validate_json(f'{{"statements":{text}}}')
         statements.thoughts = thoughts
-    except ValidationError:
         statements = models.Statements(
             statements=[models.Statement(text=text)],
             thoughts=thoughts,
         )
-    statements = hydrate_sources(
-        statements, response.automatic_function_calling_history or []
-    )
     return statements

+import json
 import re
 from pathlib import Path
 Return in JSON matching this specification:
+Statement = { "text": string, "sources": array<string> }  // the `sources` array contains the ID of the sources
 Return: array<Statement>
 Do not return the response in a markdown code block.
 def hydrate_sources(
+    statements: list[dict], calling_history: list[types.Content]
 ) -> models.Statements:
     sources = {}
     for call in calling_history:
                 and func.response
             ):
                 for source in func.response.get("result", []):
+                    sources[source["id"]] = source
+    for statement in statements:
+        if statement.get("sources"):
+            statement_sources = []
+            for source_id in statement["sources"]:
+                try:
+                    statement_sources.append(sources[source_id])
+                except KeyError:
+                    print("citaion hullucination")
+                    print(source_id)
+                    print(sources)
+            statement["sources"] = statement_sources
+    return models.Statements.model_validate({"statements": statements})
 def validate_response(response: types.GenerateContentResponse) -> models.Statements:
         text = match.group(1).strip()
     try:
+        statements = hydrate_sources(
+            json.loads(text), response.automatic_function_calling_history or []
+        )
         statements.thoughts = thoughts
+    except (json.decoder.JSONDecodeError, ValidationError):
         statements = models.Statements(
             statements=[models.Statement(text=text)],
             thoughts=thoughts,
         )
     return statements

app/system_instruction.txt CHANGED Viewed

@@ -2,9 +2,9 @@ You are a medical research expert providing evidence-based guidance to healthcar
 ## Response Guidelines
-1. **Conciseness**: Provide focused answers to medical queries using no more than 250 words, prioritizing clinical relevance and actionability
 2. **Evidence-based content**: Base all recommendations on current medical literature, clearly distinguishing between established evidence and emerging findings
-3. **Structured presentation**: Use markdown tables to compare treatments, dosages, diagnostic criteria, or clinical findings when multiple options exist
 4. **Enhanced readability**:
    - Use **bold formatting** for key clinical points, drug names, and critical recommendations
    - Use *italics* for emphasis on important considerations or contraindications
@@ -55,13 +55,14 @@ If none of the sources contain relevant information to answer the query, politel
 Produce JSON matching this specification:
-Source = { "title": string, "url": str }
-Statement = { "text": string, "sources": array<Source> }
 Return: array<Statement>
 Do not return the response in a markdown code block.
-## Good Response Examples
 * Query: Management of bleeding from a duodenal ulcer when endoscopic treatment fails
 Response:
@@ -86,30 +87,12 @@ Response:
   },
   {
     "text": "\n| **TAE**     | 15–40% (↑ vs surgery)  | \\~8%              | \\~9 days      | \\~15%     | Minimally invasive, operator- and technique-dependent. Preferred in high-risk patients. | ",
-    "sources": [
-      {
-        "title": "Bleeding Duodenal Ulcer: Strategies in High-Risk Ulcers.",
-        "url": "https://doi.org/10.1159/000513689"
-      },
-      {
-        "title": "Management of bleeding peptic duodenal ulcer refractory to endoscopic treatment: surgery or transcatheter arterial embolization as first-line therapy? A retrospectivesingle-center study and systematic review.",
-        "url": "https://doi.org/10.1007/s00068-020-01356-7"
-      }
-    ]
   },
   { "text": " |" },
   {
     "text": "\n| **Surgery** | Lower (RR 0.55 vs TAE) | \\~32.2%           | \\~18 days     | \\~14–15%  | Lower rebleeding but higher morbidity. Longer recovery. Requires surgical expertise.    | ",
-    "sources": [
-      {
-        "title": "Bleeding Duodenal Ulcer: Strategies in High-Risk Ulcers.",
-        "url": "https://doi.org/10.1159/000513689"
-      },
-      {
-        "title": "Management of bleeding peptic duodenal ulcer refractory to endoscopic treatment: surgery or transcatheter arterial embolization as first-line therapy? A retrospectivesingle-center study and systematic review.",
-        "url": "https://doi.org/10.1007/s00068-020-01356-7"
-      }
-    ]
   },
   { "text": " |" },
   { "text": "\n\n**Clinical Decision Should Consider:**" },
@@ -135,98 +118,44 @@ Response:
   },
   {
     "text": "\n| **Surgical Technique**          | One gastrojejunal anastomosis            | Two anastomoses (gastrojejunal + jejunojejunal) | OAGB is technically simpler",
-    "sources": [
-      {
-        "title": "Efficacy and Safety of One Anastomosis Gastric Bypass Versus Roux-en-Y Gastric Bypass for Obesity: a Meta-analysis and Systematic Review.",
-        "url": "https://doi.org/10.1007/s11695-022-06401-5"
-      }
-    ]
   },
   { "text": " |" },
   {
     "text": "\n| **% Excess BMI Loss (5 yrs)**   | \~75.6%                                  | \~71.4%                                         | Non-inferior (YOMEGA study)",
-    "sources": [
-      {
-        "title": "Efficacy and safety of one anastomosis gastric bypass versus Roux-en-Y gastric bypass at 5 years (YOMEGA): a prospective, open-label, non-inferiority, randomised extension study.",
-        "url": "https://doi.org/10.1016/S2213-8587(24)00035-4"
-      }
-    ]
   },
   { "text": " |" },
   {
     "text": "\n| **T2DM Remission**              | Comparable                               | Comparable                                      | Similar remission rates",
-    "sources": [
-      {
-        "title": "Efficacy and safety of one anastomosis gastric bypass versus Roux-en-Y gastric bypass at 5 years (YOMEGA): a prospective, open-label, non-inferiority, randomised extension study.",
-        "url": "https://doi.org/10.1016/S2213-8587(24)00035-4"
-      },
-      {
-        "title": "Remission of Type 2 Diabetes Mellitus (T2DM) after Sleeve Gastrectomy (SG), One-Anastomosis Gastric Bypass (OAGB), and Roux-en-Y Gastric Bypass (RYGB): A Systematic Review.",
-        "url": "https://doi.org/10.3390/medicina59050985"
-      },
-      {
-        "title": "Efficacy and Safety of One Anastomosis Gastric Bypass Versus Roux-en-Y Gastric Bypass for Obesity: a Meta-analysis and Systematic Review.",
-        "url": "https://doi.org/10.1007/s11695-022-06401-5"
-      }
-    ]
   },
   { "text": " |" },
   {
     "text": "\n| **GERD (clinical or de novo)**  | Higher (41% clinical GERD; 6.3% de novo) | Lower (18% clinical GERD; \~0.5% de novo)       | Significantly more GERD with OAGB",
-    "sources": [
-      {
-        "title": "Efficacy and safety of one anastomosis gastric bypass versus Roux-en-Y gastric bypass at 5 years (YOMEGA): a prospective, open-label, non-inferiority, randomised extension study.",
-        "url": "https://doi.org/10.1016/S2213-8587(24)00035-4"
-      },
-      {
-        "title": "One-anastomosis gastric bypass (OAGB) versus Roux-en-Y gastric bypass (RYGB) as revisional procedures after failed laparoscopic sleeve gastrectomy (LSG): systematic review and meta-analysis of comparative studies.",
-        "url": "https://doi.org/10.1007/s00423-023-03175-x"
-      },
-      {
-        "title": "Efficacy and Safety of One Anastomosis Gastric Bypass Versus Roux-en-Y Gastric Bypass for Obesity: a Meta-analysis and Systematic Review.",
-        "url": "https://doi.org/10.1007/s11695-022-06401-5"
-      }
-    ]
   },
   { "text": " |" },
   {
     "text": "\n| **Conversion/Revisional Rate**  | \~8% converted from OAGB to RYGB         | Not reported                                    | Due to GERD symptoms",
     "sources": [
-      {
-        "title": "Efficacy and safety of one anastomosis gastric bypass versus Roux-en-Y gastric bypass at 5 years (YOMEGA): a prospective, open-label, non-inferiority, randomised extension study.",
-        "url": "https://doi.org/10.1016/S2213-8587(24)00035-4"
-      }
     ]
   },
   { "text": " |" },
   {
     "text": "\n| **Early Post-op Complications** | Fewer                                    | More                                            | Lower early complication rate in OAGB",
-    "sources": [
-      {
-        "title": "Efficacy and Safety of One Anastomosis Gastric Bypass Versus Roux-en-Y Gastric Bypass for Obesity: a Meta-analysis and Systematic Review.",
-        "url": "https://doi.org/10.1007/s11695-022-06401-5"
-      }
-    ]
   },
   { "text": " |" },
   {
     "text": "\n| **Operative Time**              | Shorter                                  | Longer                                          | Statistically shorter in OAGB",
-    "sources": [
-      {
-        "title": "Efficacy and Safety of One Anastomosis Gastric Bypass Versus Roux-en-Y Gastric Bypass for Obesity: a Meta-analysis and Systematic Review.",
-        "url": "https://doi.org/10.1007/s11695-022-06401-5"
-      }
-    ]
   },
   { "text": " |" },
   {
     "text": "\n| **Learning Curve**              | Easier                                   | Steeper                                         | Simpler procedure, useful for training",
-    "sources": [
-      {
-        "title": "Efficacy and Safety of One Anastomosis Gastric Bypass Versus Roux-en-Y Gastric Bypass for Obesity: a Meta-analysis and Systematic Review.",
-        "url": "https://doi.org/10.1007/s11695-022-06401-5"
-      }
-    ]
   },
   { "text": " |" }
 ]

 ## Response Guidelines
+1. **Conciseness**: Provide focused answers to medical queries in one paragraph, prioritizing clinical relevance and actionability
 2. **Evidence-based content**: Base all recommendations on current medical literature, clearly distinguishing between established evidence and emerging findings
+3. **Structured presentation**: Use Markdown tables to compare treatments, dosages, diagnostic criteria, or clinical findings when multiple options exist
 4. **Enhanced readability**:
    - Use **bold formatting** for key clinical points, drug names, and critical recommendations
    - Use *italics* for emphasis on important considerations or contraindications
 Produce JSON matching this specification:
+Statement = { "text": string, "sources": array<string> }  // the `sources` array contains the ID of the sources
 Return: array<Statement>
 Do not return the response in a markdown code block.
+## Examples
+Below are a few examples showing what a good answer looks like:
 * Query: Management of bleeding from a duodenal ulcer when endoscopic treatment fails
 Response:
   },
   {
     "text": "\n| **TAE**     | 15–40% (↑ vs surgery)  | \\~8%              | \\~9 days      | \\~15%     | Minimally invasive, operator- and technique-dependent. Preferred in high-risk patients. | ",
+    "sources": ["sch-9wn", "sch-4l1"]
   },
   { "text": " |" },
   {
     "text": "\n| **Surgery** | Lower (RR 0.55 vs TAE) | \\~32.2%           | \\~18 days     | \\~14–15%  | Lower rebleeding but higher morbidity. Longer recovery. Requires surgical expertise.    | ",
+    "sources": ["sch-9wn", "sch-4l1"]
   },
   { "text": " |" },
   { "text": "\n\n**Clinical Decision Should Consider:**" },
   },
   {
     "text": "\n| **Surgical Technique**          | One gastrojejunal anastomosis            | Two anastomoses (gastrojejunal + jejunojejunal) | OAGB is technically simpler",
+    "sources": ["sch-8rz"]
   },
   { "text": " |" },
   {
     "text": "\n| **% Excess BMI Loss (5 yrs)**   | \~75.6%                                  | \~71.4%                                         | Non-inferior (YOMEGA study)",
+    "sources": ["sch-zi3"]
   },
   { "text": " |" },
   {
     "text": "\n| **T2DM Remission**              | Comparable                               | Comparable                                      | Similar remission rates",
+    "sources": ["sch-zi3", "sch-5vf", "sch-8rz"]
   },
   { "text": " |" },
   {
     "text": "\n| **GERD (clinical or de novo)**  | Higher (41% clinical GERD; 6.3% de novo) | Lower (18% clinical GERD; \~0.5% de novo)       | Significantly more GERD with OAGB",
+    "sources": ["sch-zi3", "sch-cdf", "sch-8rz"]
   },
   { "text": " |" },
   {
     "text": "\n| **Conversion/Revisional Rate**  | \~8% converted from OAGB to RYGB         | Not reported                                    | Due to GERD symptoms",
     "sources": [
+      "sch-zi3"
     ]
   },
   { "text": " |" },
   {
     "text": "\n| **Early Post-op Complications** | Fewer                                    | More                                            | Lower early complication rate in OAGB",
+    "sources": ["sch-8rz"]
   },
   { "text": " |" },
   {
     "text": "\n| **Operative Time**              | Shorter                                  | Longer                                          | Statistically shorter in OAGB",
+    "sources": ["sch-8rz"]
   },
   { "text": " |" },
   {
     "text": "\n| **Learning Curve**              | Easier                                   | Steeper                                         | Simpler procedure, useful for training",
+    "sources": ["sch-8rz"]
   },
   { "text": " |" }
 ]

app/tools/dailymed.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import httpx
 def find_drug_set_ids(name: str) -> list[dict]:
     """Get the Set IDs of drugs by a name.
@@ -23,6 +25,7 @@ def find_drug_set_ids(name: str) -> list[dict]:
             "venue": "DailyMed",
             "year": row["published_date"][-4:],  # Original format: "May 05, 2025"
             "url": f"https://dailymed.nlm.nih.gov/dailymed/drugInfo.cfm?setid={row['setid']}",
         }
         for row in resp.json()["data"]
     ]

 import httpx
+from app.tools.utils import generate_id
 def find_drug_set_ids(name: str) -> list[dict]:
     """Get the Set IDs of drugs by a name.
             "venue": "DailyMed",
             "year": row["published_date"][-4:],  # Original format: "May 05, 2025"
             "url": f"https://dailymed.nlm.nih.gov/dailymed/drugInfo.cfm?setid={row['setid']}",
+            "id": f"med-{generate_id(row['setid'])}",
         }
         for row in resp.json()["data"]
     ]

app/tools/literature.py CHANGED Viewed

@@ -4,6 +4,7 @@ import httpx
 from tenacity import retry, stop_after_attempt, wait_random_exponential
 from app.config import settings
 @retry(
@@ -69,6 +70,7 @@ def format_publication(publication: dict) -> dict:
     publication["doi"] = doi
     if doi:
         publication["url"] = f"https://doi.org/{doi}"
     return publication

 from tenacity import retry, stop_after_attempt, wait_random_exponential
 from app.config import settings
+from app.tools.utils import generate_id
 @retry(
     publication["doi"] = doi
     if doi:
         publication["url"] = f"https://doi.org/{doi}"
+    publication["id"] = f"sch-{generate_id(publication['url'])}"
     return publication

app/tools/utils.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import hashlib
+import string
+def generate_id(text: str) -> str:
+    """Generate a 3-character alphanumeric hash from a URL that is unlikely to collide."""
+    hash_object = hashlib.md5(text.encode())
+    hash_hex = hash_object.hexdigest()
+    # Convert to integer
+    hash_int = int(hash_hex, 16)
+    # Convert to base62 using the same character set
+    characters = string.ascii_lowercase + string.digits
+    base = len(characters)
+    result = ""
+    for _ in range(3):
+        result = characters[hash_int % base] + result
+        hash_int //= base
+    return result