David Chu
commited on
feat: reference citations by a short id
Browse filesTitle + URL citations significantly increase output length,
especially when reused in tables, extending generation time.
So I switch to give each reference a short ID.
- app/agent.py +20 -17
- app/system_instruction.txt +16 -87
- app/tools/dailymed.py +3 -0
- app/tools/literature.py +2 -0
- app/tools/utils.py +23 -0
app/agent.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import re
|
2 |
from pathlib import Path
|
3 |
|
@@ -23,8 +24,7 @@ RESPONSE_FORMAT = """\
|
|
23 |
|
24 |
Return in JSON matching this specification:
|
25 |
|
26 |
-
|
27 |
-
Statement = { "text": string, "sources": array<Source> }
|
28 |
Return: array<Statement>
|
29 |
|
30 |
Do not return the response in a markdown code block.
|
@@ -37,7 +37,7 @@ SOURCE_TOOL_NAMES = {
|
|
37 |
|
38 |
|
39 |
def hydrate_sources(
|
40 |
-
statements:
|
41 |
) -> models.Statements:
|
42 |
sources = {}
|
43 |
for call in calling_history:
|
@@ -48,16 +48,21 @@ def hydrate_sources(
|
|
48 |
and func.response
|
49 |
):
|
50 |
for source in func.response.get("result", []):
|
51 |
-
sources[source["
|
52 |
|
53 |
-
for statement in statements
|
54 |
-
if statement.sources:
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
-
return statements
|
61 |
|
62 |
|
63 |
def validate_response(response: types.GenerateContentResponse) -> models.Statements:
|
@@ -76,17 +81,15 @@ def validate_response(response: types.GenerateContentResponse) -> models.Stateme
|
|
76 |
text = match.group(1).strip()
|
77 |
|
78 |
try:
|
79 |
-
statements =
|
|
|
|
|
80 |
statements.thoughts = thoughts
|
81 |
-
except ValidationError:
|
82 |
statements = models.Statements(
|
83 |
statements=[models.Statement(text=text)],
|
84 |
thoughts=thoughts,
|
85 |
)
|
86 |
-
|
87 |
-
statements = hydrate_sources(
|
88 |
-
statements, response.automatic_function_calling_history or []
|
89 |
-
)
|
90 |
return statements
|
91 |
|
92 |
|
|
|
1 |
+
import json
|
2 |
import re
|
3 |
from pathlib import Path
|
4 |
|
|
|
24 |
|
25 |
Return in JSON matching this specification:
|
26 |
|
27 |
+
Statement = { "text": string, "sources": array<string> } // the `sources` array contains the ID of the sources
|
|
|
28 |
Return: array<Statement>
|
29 |
|
30 |
Do not return the response in a markdown code block.
|
|
|
37 |
|
38 |
|
39 |
def hydrate_sources(
|
40 |
+
statements: list[dict], calling_history: list[types.Content]
|
41 |
) -> models.Statements:
|
42 |
sources = {}
|
43 |
for call in calling_history:
|
|
|
48 |
and func.response
|
49 |
):
|
50 |
for source in func.response.get("result", []):
|
51 |
+
sources[source["id"]] = source
|
52 |
|
53 |
+
for statement in statements:
|
54 |
+
if statement.get("sources"):
|
55 |
+
statement_sources = []
|
56 |
+
for source_id in statement["sources"]:
|
57 |
+
try:
|
58 |
+
statement_sources.append(sources[source_id])
|
59 |
+
except KeyError:
|
60 |
+
print("citaion hullucination")
|
61 |
+
print(source_id)
|
62 |
+
print(sources)
|
63 |
+
statement["sources"] = statement_sources
|
64 |
|
65 |
+
return models.Statements.model_validate({"statements": statements})
|
66 |
|
67 |
|
68 |
def validate_response(response: types.GenerateContentResponse) -> models.Statements:
|
|
|
81 |
text = match.group(1).strip()
|
82 |
|
83 |
try:
|
84 |
+
statements = hydrate_sources(
|
85 |
+
json.loads(text), response.automatic_function_calling_history or []
|
86 |
+
)
|
87 |
statements.thoughts = thoughts
|
88 |
+
except (json.decoder.JSONDecodeError, ValidationError):
|
89 |
statements = models.Statements(
|
90 |
statements=[models.Statement(text=text)],
|
91 |
thoughts=thoughts,
|
92 |
)
|
|
|
|
|
|
|
|
|
93 |
return statements
|
94 |
|
95 |
|
app/system_instruction.txt
CHANGED
@@ -2,9 +2,9 @@ You are a medical research expert providing evidence-based guidance to healthcar
|
|
2 |
|
3 |
## Response Guidelines
|
4 |
|
5 |
-
1. **Conciseness**: Provide focused answers to medical queries
|
6 |
2. **Evidence-based content**: Base all recommendations on current medical literature, clearly distinguishing between established evidence and emerging findings
|
7 |
-
3. **Structured presentation**: Use
|
8 |
4. **Enhanced readability**:
|
9 |
- Use **bold formatting** for key clinical points, drug names, and critical recommendations
|
10 |
- Use *italics* for emphasis on important considerations or contraindications
|
@@ -55,13 +55,14 @@ If none of the sources contain relevant information to answer the query, politel
|
|
55 |
|
56 |
Produce JSON matching this specification:
|
57 |
|
58 |
-
|
59 |
-
Statement = { "text": string, "sources": array<Source> }
|
60 |
Return: array<Statement>
|
61 |
|
62 |
Do not return the response in a markdown code block.
|
63 |
|
64 |
-
##
|
|
|
|
|
65 |
|
66 |
* Query: Management of bleeding from a duodenal ulcer when endoscopic treatment fails
|
67 |
Response:
|
@@ -86,30 +87,12 @@ Response:
|
|
86 |
},
|
87 |
{
|
88 |
"text": "\n| **TAE** | 15–40% (↑ vs surgery) | \\~8% | \\~9 days | \\~15% | Minimally invasive, operator- and technique-dependent. Preferred in high-risk patients. | ",
|
89 |
-
"sources": [
|
90 |
-
{
|
91 |
-
"title": "Bleeding Duodenal Ulcer: Strategies in High-Risk Ulcers.",
|
92 |
-
"url": "https://doi.org/10.1159/000513689"
|
93 |
-
},
|
94 |
-
{
|
95 |
-
"title": "Management of bleeding peptic duodenal ulcer refractory to endoscopic treatment: surgery or transcatheter arterial embolization as first-line therapy? A retrospectivesingle-center study and systematic review.",
|
96 |
-
"url": "https://doi.org/10.1007/s00068-020-01356-7"
|
97 |
-
}
|
98 |
-
]
|
99 |
},
|
100 |
{ "text": " |" },
|
101 |
{
|
102 |
"text": "\n| **Surgery** | Lower (RR 0.55 vs TAE) | \\~32.2% | \\~18 days | \\~14–15% | Lower rebleeding but higher morbidity. Longer recovery. Requires surgical expertise. | ",
|
103 |
-
"sources": [
|
104 |
-
{
|
105 |
-
"title": "Bleeding Duodenal Ulcer: Strategies in High-Risk Ulcers.",
|
106 |
-
"url": "https://doi.org/10.1159/000513689"
|
107 |
-
},
|
108 |
-
{
|
109 |
-
"title": "Management of bleeding peptic duodenal ulcer refractory to endoscopic treatment: surgery or transcatheter arterial embolization as first-line therapy? A retrospectivesingle-center study and systematic review.",
|
110 |
-
"url": "https://doi.org/10.1007/s00068-020-01356-7"
|
111 |
-
}
|
112 |
-
]
|
113 |
},
|
114 |
{ "text": " |" },
|
115 |
{ "text": "\n\n**Clinical Decision Should Consider:**" },
|
@@ -135,98 +118,44 @@ Response:
|
|
135 |
},
|
136 |
{
|
137 |
"text": "\n| **Surgical Technique** | One gastrojejunal anastomosis | Two anastomoses (gastrojejunal + jejunojejunal) | OAGB is technically simpler",
|
138 |
-
"sources": [
|
139 |
-
{
|
140 |
-
"title": "Efficacy and Safety of One Anastomosis Gastric Bypass Versus Roux-en-Y Gastric Bypass for Obesity: a Meta-analysis and Systematic Review.",
|
141 |
-
"url": "https://doi.org/10.1007/s11695-022-06401-5"
|
142 |
-
}
|
143 |
-
]
|
144 |
},
|
145 |
{ "text": " |" },
|
146 |
{
|
147 |
"text": "\n| **% Excess BMI Loss (5 yrs)** | \~75.6% | \~71.4% | Non-inferior (YOMEGA study)",
|
148 |
-
"sources": [
|
149 |
-
{
|
150 |
-
"title": "Efficacy and safety of one anastomosis gastric bypass versus Roux-en-Y gastric bypass at 5 years (YOMEGA): a prospective, open-label, non-inferiority, randomised extension study.",
|
151 |
-
"url": "https://doi.org/10.1016/S2213-8587(24)00035-4"
|
152 |
-
}
|
153 |
-
]
|
154 |
},
|
155 |
{ "text": " |" },
|
156 |
{
|
157 |
"text": "\n| **T2DM Remission** | Comparable | Comparable | Similar remission rates",
|
158 |
-
"sources": [
|
159 |
-
{
|
160 |
-
"title": "Efficacy and safety of one anastomosis gastric bypass versus Roux-en-Y gastric bypass at 5 years (YOMEGA): a prospective, open-label, non-inferiority, randomised extension study.",
|
161 |
-
"url": "https://doi.org/10.1016/S2213-8587(24)00035-4"
|
162 |
-
},
|
163 |
-
{
|
164 |
-
"title": "Remission of Type 2 Diabetes Mellitus (T2DM) after Sleeve Gastrectomy (SG), One-Anastomosis Gastric Bypass (OAGB), and Roux-en-Y Gastric Bypass (RYGB): A Systematic Review.",
|
165 |
-
"url": "https://doi.org/10.3390/medicina59050985"
|
166 |
-
},
|
167 |
-
{
|
168 |
-
"title": "Efficacy and Safety of One Anastomosis Gastric Bypass Versus Roux-en-Y Gastric Bypass for Obesity: a Meta-analysis and Systematic Review.",
|
169 |
-
"url": "https://doi.org/10.1007/s11695-022-06401-5"
|
170 |
-
}
|
171 |
-
]
|
172 |
},
|
173 |
{ "text": " |" },
|
174 |
{
|
175 |
"text": "\n| **GERD (clinical or de novo)** | Higher (41% clinical GERD; 6.3% de novo) | Lower (18% clinical GERD; \~0.5% de novo) | Significantly more GERD with OAGB",
|
176 |
-
"sources": [
|
177 |
-
{
|
178 |
-
"title": "Efficacy and safety of one anastomosis gastric bypass versus Roux-en-Y gastric bypass at 5 years (YOMEGA): a prospective, open-label, non-inferiority, randomised extension study.",
|
179 |
-
"url": "https://doi.org/10.1016/S2213-8587(24)00035-4"
|
180 |
-
},
|
181 |
-
{
|
182 |
-
"title": "One-anastomosis gastric bypass (OAGB) versus Roux-en-Y gastric bypass (RYGB) as revisional procedures after failed laparoscopic sleeve gastrectomy (LSG): systematic review and meta-analysis of comparative studies.",
|
183 |
-
"url": "https://doi.org/10.1007/s00423-023-03175-x"
|
184 |
-
},
|
185 |
-
{
|
186 |
-
"title": "Efficacy and Safety of One Anastomosis Gastric Bypass Versus Roux-en-Y Gastric Bypass for Obesity: a Meta-analysis and Systematic Review.",
|
187 |
-
"url": "https://doi.org/10.1007/s11695-022-06401-5"
|
188 |
-
}
|
189 |
-
]
|
190 |
},
|
191 |
{ "text": " |" },
|
192 |
{
|
193 |
"text": "\n| **Conversion/Revisional Rate** | \~8% converted from OAGB to RYGB | Not reported | Due to GERD symptoms",
|
194 |
"sources": [
|
195 |
-
|
196 |
-
"title": "Efficacy and safety of one anastomosis gastric bypass versus Roux-en-Y gastric bypass at 5 years (YOMEGA): a prospective, open-label, non-inferiority, randomised extension study.",
|
197 |
-
"url": "https://doi.org/10.1016/S2213-8587(24)00035-4"
|
198 |
-
}
|
199 |
]
|
200 |
},
|
201 |
{ "text": " |" },
|
202 |
{
|
203 |
"text": "\n| **Early Post-op Complications** | Fewer | More | Lower early complication rate in OAGB",
|
204 |
-
"sources": [
|
205 |
-
{
|
206 |
-
"title": "Efficacy and Safety of One Anastomosis Gastric Bypass Versus Roux-en-Y Gastric Bypass for Obesity: a Meta-analysis and Systematic Review.",
|
207 |
-
"url": "https://doi.org/10.1007/s11695-022-06401-5"
|
208 |
-
}
|
209 |
-
]
|
210 |
},
|
211 |
{ "text": " |" },
|
212 |
{
|
213 |
"text": "\n| **Operative Time** | Shorter | Longer | Statistically shorter in OAGB",
|
214 |
-
"sources": [
|
215 |
-
{
|
216 |
-
"title": "Efficacy and Safety of One Anastomosis Gastric Bypass Versus Roux-en-Y Gastric Bypass for Obesity: a Meta-analysis and Systematic Review.",
|
217 |
-
"url": "https://doi.org/10.1007/s11695-022-06401-5"
|
218 |
-
}
|
219 |
-
]
|
220 |
},
|
221 |
{ "text": " |" },
|
222 |
{
|
223 |
"text": "\n| **Learning Curve** | Easier | Steeper | Simpler procedure, useful for training",
|
224 |
-
"sources": [
|
225 |
-
{
|
226 |
-
"title": "Efficacy and Safety of One Anastomosis Gastric Bypass Versus Roux-en-Y Gastric Bypass for Obesity: a Meta-analysis and Systematic Review.",
|
227 |
-
"url": "https://doi.org/10.1007/s11695-022-06401-5"
|
228 |
-
}
|
229 |
-
]
|
230 |
},
|
231 |
{ "text": " |" }
|
232 |
]
|
|
|
2 |
|
3 |
## Response Guidelines
|
4 |
|
5 |
+
1. **Conciseness**: Provide focused answers to medical queries in one paragraph, prioritizing clinical relevance and actionability
|
6 |
2. **Evidence-based content**: Base all recommendations on current medical literature, clearly distinguishing between established evidence and emerging findings
|
7 |
+
3. **Structured presentation**: Use Markdown tables to compare treatments, dosages, diagnostic criteria, or clinical findings when multiple options exist
|
8 |
4. **Enhanced readability**:
|
9 |
- Use **bold formatting** for key clinical points, drug names, and critical recommendations
|
10 |
- Use *italics* for emphasis on important considerations or contraindications
|
|
|
55 |
|
56 |
Produce JSON matching this specification:
|
57 |
|
58 |
+
Statement = { "text": string, "sources": array<string> } // the `sources` array contains the ID of the sources
|
|
|
59 |
Return: array<Statement>
|
60 |
|
61 |
Do not return the response in a markdown code block.
|
62 |
|
63 |
+
## Examples
|
64 |
+
|
65 |
+
Below are a few examples showing what a good answer looks like:
|
66 |
|
67 |
* Query: Management of bleeding from a duodenal ulcer when endoscopic treatment fails
|
68 |
Response:
|
|
|
87 |
},
|
88 |
{
|
89 |
"text": "\n| **TAE** | 15–40% (↑ vs surgery) | \\~8% | \\~9 days | \\~15% | Minimally invasive, operator- and technique-dependent. Preferred in high-risk patients. | ",
|
90 |
+
"sources": ["sch-9wn", "sch-4l1"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
},
|
92 |
{ "text": " |" },
|
93 |
{
|
94 |
"text": "\n| **Surgery** | Lower (RR 0.55 vs TAE) | \\~32.2% | \\~18 days | \\~14–15% | Lower rebleeding but higher morbidity. Longer recovery. Requires surgical expertise. | ",
|
95 |
+
"sources": ["sch-9wn", "sch-4l1"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
},
|
97 |
{ "text": " |" },
|
98 |
{ "text": "\n\n**Clinical Decision Should Consider:**" },
|
|
|
118 |
},
|
119 |
{
|
120 |
"text": "\n| **Surgical Technique** | One gastrojejunal anastomosis | Two anastomoses (gastrojejunal + jejunojejunal) | OAGB is technically simpler",
|
121 |
+
"sources": ["sch-8rz"]
|
|
|
|
|
|
|
|
|
|
|
122 |
},
|
123 |
{ "text": " |" },
|
124 |
{
|
125 |
"text": "\n| **% Excess BMI Loss (5 yrs)** | \~75.6% | \~71.4% | Non-inferior (YOMEGA study)",
|
126 |
+
"sources": ["sch-zi3"]
|
|
|
|
|
|
|
|
|
|
|
127 |
},
|
128 |
{ "text": " |" },
|
129 |
{
|
130 |
"text": "\n| **T2DM Remission** | Comparable | Comparable | Similar remission rates",
|
131 |
+
"sources": ["sch-zi3", "sch-5vf", "sch-8rz"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
},
|
133 |
{ "text": " |" },
|
134 |
{
|
135 |
"text": "\n| **GERD (clinical or de novo)** | Higher (41% clinical GERD; 6.3% de novo) | Lower (18% clinical GERD; \~0.5% de novo) | Significantly more GERD with OAGB",
|
136 |
+
"sources": ["sch-zi3", "sch-cdf", "sch-8rz"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
},
|
138 |
{ "text": " |" },
|
139 |
{
|
140 |
"text": "\n| **Conversion/Revisional Rate** | \~8% converted from OAGB to RYGB | Not reported | Due to GERD symptoms",
|
141 |
"sources": [
|
142 |
+
"sch-zi3"
|
|
|
|
|
|
|
143 |
]
|
144 |
},
|
145 |
{ "text": " |" },
|
146 |
{
|
147 |
"text": "\n| **Early Post-op Complications** | Fewer | More | Lower early complication rate in OAGB",
|
148 |
+
"sources": ["sch-8rz"]
|
|
|
|
|
|
|
|
|
|
|
149 |
},
|
150 |
{ "text": " |" },
|
151 |
{
|
152 |
"text": "\n| **Operative Time** | Shorter | Longer | Statistically shorter in OAGB",
|
153 |
+
"sources": ["sch-8rz"]
|
|
|
|
|
|
|
|
|
|
|
154 |
},
|
155 |
{ "text": " |" },
|
156 |
{
|
157 |
"text": "\n| **Learning Curve** | Easier | Steeper | Simpler procedure, useful for training",
|
158 |
+
"sources": ["sch-8rz"]
|
|
|
|
|
|
|
|
|
|
|
159 |
},
|
160 |
{ "text": " |" }
|
161 |
]
|
app/tools/dailymed.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
import httpx
|
2 |
|
|
|
|
|
3 |
|
4 |
def find_drug_set_ids(name: str) -> list[dict]:
|
5 |
"""Get the Set IDs of drugs by a name.
|
@@ -23,6 +25,7 @@ def find_drug_set_ids(name: str) -> list[dict]:
|
|
23 |
"venue": "DailyMed",
|
24 |
"year": row["published_date"][-4:], # Original format: "May 05, 2025"
|
25 |
"url": f"https://dailymed.nlm.nih.gov/dailymed/drugInfo.cfm?setid={row['setid']}",
|
|
|
26 |
}
|
27 |
for row in resp.json()["data"]
|
28 |
]
|
|
|
1 |
import httpx
|
2 |
|
3 |
+
from app.tools.utils import generate_id
|
4 |
+
|
5 |
|
6 |
def find_drug_set_ids(name: str) -> list[dict]:
|
7 |
"""Get the Set IDs of drugs by a name.
|
|
|
25 |
"venue": "DailyMed",
|
26 |
"year": row["published_date"][-4:], # Original format: "May 05, 2025"
|
27 |
"url": f"https://dailymed.nlm.nih.gov/dailymed/drugInfo.cfm?setid={row['setid']}",
|
28 |
+
"id": f"med-{generate_id(row['setid'])}",
|
29 |
}
|
30 |
for row in resp.json()["data"]
|
31 |
]
|
app/tools/literature.py
CHANGED
@@ -4,6 +4,7 @@ import httpx
|
|
4 |
from tenacity import retry, stop_after_attempt, wait_random_exponential
|
5 |
|
6 |
from app.config import settings
|
|
|
7 |
|
8 |
|
9 |
@retry(
|
@@ -69,6 +70,7 @@ def format_publication(publication: dict) -> dict:
|
|
69 |
publication["doi"] = doi
|
70 |
if doi:
|
71 |
publication["url"] = f"https://doi.org/{doi}"
|
|
|
72 |
return publication
|
73 |
|
74 |
|
|
|
4 |
from tenacity import retry, stop_after_attempt, wait_random_exponential
|
5 |
|
6 |
from app.config import settings
|
7 |
+
from app.tools.utils import generate_id
|
8 |
|
9 |
|
10 |
@retry(
|
|
|
70 |
publication["doi"] = doi
|
71 |
if doi:
|
72 |
publication["url"] = f"https://doi.org/{doi}"
|
73 |
+
publication["id"] = f"sch-{generate_id(publication['url'])}"
|
74 |
return publication
|
75 |
|
76 |
|
app/tools/utils.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import hashlib
|
2 |
+
import string
|
3 |
+
|
4 |
+
|
5 |
+
def generate_id(text: str) -> str:
|
6 |
+
"""Generate a 3-character alphanumeric hash from a URL that is unlikely to collide."""
|
7 |
+
|
8 |
+
hash_object = hashlib.md5(text.encode())
|
9 |
+
hash_hex = hash_object.hexdigest()
|
10 |
+
|
11 |
+
# Convert to integer
|
12 |
+
hash_int = int(hash_hex, 16)
|
13 |
+
|
14 |
+
# Convert to base62 using the same character set
|
15 |
+
characters = string.ascii_lowercase + string.digits
|
16 |
+
base = len(characters)
|
17 |
+
|
18 |
+
result = ""
|
19 |
+
for _ in range(3):
|
20 |
+
result = characters[hash_int % base] + result
|
21 |
+
hash_int //= base
|
22 |
+
|
23 |
+
return result
|