File size: 11,966 Bytes
f2c42a8
 
 
d2f1b05
f2c42a8
 
5a9965e
0f4b0ea
5a9965e
f2c42a8
d2f1b05
 
 
 
 
f2c42a8
792bd75
f2c42a8
5a9965e
 
 
 
 
f2c42a8
 
 
 
 
 
 
 
 
5a9965e
f2c42a8
 
 
d2f1b05
 
 
 
f2c42a8
 
 
3865f47
f2c42a8
 
 
 
 
 
 
3865f47
f2c42a8
 
 
 
 
d38794d
 
f2c42a8
 
 
 
 
3865f47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2c42a8
 
d9a5339
 
 
 
 
 
 
 
 
 
0f4b0ea
d9a5339
 
 
 
22b8aeb
 
 
3865f47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22b8aeb
3865f47
 
 
22b8aeb
 
3865f47
22b8aeb
3865f47
 
22b8aeb
3865f47
22b8aeb
3865f47
22b8aeb
 
 
3865f47
22b8aeb
 
 
03401b5
f2c42a8
22b8aeb
 
f2c42a8
 
22b8aeb
 
 
 
3865f47
22b8aeb
 
f2c42a8
 
 
 
 
 
 
3865f47
f2c42a8
 
 
3865f47
d38794d
 
3865f47
 
 
f2c42a8
 
 
d9a5339
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
from xml.etree import ElementTree

import httpx
from pydantic_ai import ModelRetry
from tenacity import retry, stop_after_attempt, wait_random_exponential

from app.config import settings
from app.tools.utils import generate_id


@retry(
    stop=stop_after_attempt(3),
    wait=wait_random_exponential(multiplier=0.5, max=10),
    reraise=True,
)
def search_semantic_scholar(
    query: str, top_k: int = 20, min_citation_count: int = 5
) -> list[dict]:
    headers = {}

    if api_key := settings.semantic_scholar_api_key:
        headers["x-api-key"] = api_key

    resp = httpx.get(
        "https://api.semanticscholar.org/graph/v1/paper/search",
        params={
            "query": query,
            "limit": top_k,
            "fields": "title,tldr,abstract,externalIds,url,venue,year,citationCount,influentialCitationCount",
            "fieldsOfStudy": "Medicine,Biology",
            "minCitationCount": min_citation_count,
        },
        headers=headers,
        timeout=10.0,
    )
    resp.raise_for_status()
    data = resp.json().get("data", [])
    if not data:
        raise ModelRetry(f"No results for '{query}'. Try different keywords.")
    return data


@retry(stop=stop_after_attempt(5), wait=wait_random_exponential(multiplier=0.5, max=10))
def get_pubmed_metadata(pmids: list[int]) -> dict[str, dict]:
    resp = httpx.get(
        "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi",
        params={"db": "pubmed", "id": pmids, "retmode": "xml"},
    )
    resp.raise_for_status()
    root = ElementTree.fromstring(resp.text)

    results = {}
    for article in root.iter("PubmedArticle"):
        abstract = ""
        pmid = article.findtext(
            ".//PubmedData/ArticleIdList/ArticleId[@IdType='pubmed']", default=""
        )
        # The abstract is sometimes divided into multiple
        # sections. Concatenate into one Markdown text.
        for text in article.findall(".//AbstractText"):
            if label := text.attrib.get("Label"):
                abstract += f"## {label}\n\n"
            abstract += f"{text.text or ''}\n\n"

        # Extract publication types
        # https://www.nlm.nih.gov/mesh/pubtypes.html
        publication_types = [
            pt.text
            for pt in article.findall(".//PublicationTypeList/PublicationType")
            if pt.text
        ]

        # Extract MeSH terms (DescriptorName and QualifierName)
        mesh_terms = []
        for mesh_heading in article.findall(
            ".//MedlineCitation/MeshHeadingList/MeshHeading"
        ):
            descriptor = mesh_heading.findtext("DescriptorName")
            if descriptor:
                mesh_terms.append(descriptor.strip())
            for qualifier in mesh_heading.findall("QualifierName"):
                if qualifier.text:
                    mesh_terms.append(qualifier.text.strip())

        results[pmid] = {
            "abstract": abstract.strip(),
            "publication_types": publication_types,
            "mesh_terms": mesh_terms,
        }

    return results


def format_publication(publication: dict) -> dict:
    tldr = publication.pop("tldr") or {}
    external_ids = publication.pop("externalIds")
    doi = external_ids.get("DOI")
    publication["summary"] = tldr.get("text", "")
    publication["citations"] = publication.pop("citationCount")
    publication["influential_citations"] = publication.pop("influentialCitationCount")
    publication["doi"] = doi
    if doi:
        publication["url"] = f"https://doi.org/{doi}"
    publication["id"] = f"sch-{generate_id(publication['url'])}"
    return publication


def search_medical_literature(query: str) -> list[dict]:
    """Search medical literature and prioritize high-quality evidence sources.

    CRITICAL: This tool returns literature that varies significantly in evidence quality.
    You MUST prioritize publications with consolidated evidence based on the following criteria:

    **Type of evidence**
    - Gold Standard Evidence
        - Systematic Review
        - Meta-Analysis
        - Randomized Controlled Trial (RCT)

    - High-Quality Clinical Evidence
        - Controlled Clinical Trial
        - Clinical Trial, Phase III

    - Specialized High-Quality Studies
        - Pragmatic Clinical Trial
        - Clinical Trial, Phase II
        - Equivalence Trial

    Authoritative References
        - Practice Guideline
        - Pharmacopoeia
        - Consensus Development Conference (NIH or not)

    - Other Important Clinical Evidence
        - Clinical Study
        - Observational Study
        - Validation Study
        - Comparative Study
        - Case Reports
        - Multicenter Study
        - Evaluation Study

    **Credibility of the publisher**:
    - Top general medicine journals
      - The Lancet (or any of its specialty journals)
      - New England Journal of Medicine (NEJM, or any of its specialty journals)
      - Nature Medicine (or any of its medical specialty journals)
      - Journal of the American Medical Association (JAMA, or any of its specialty journals)
      - BMJ
    - Top specialized medicine journals
      - Journal of Clinical Oncology
      - European Heart Journal
      - Circulation
      - Journal of the American College of Cardiology
      - Cancer Cell
      - Annals of Oncology
      - Gastroenterology
      - International Journal of Epidemiology
      - Blood
      - Molecular Psychiatry
      - Journal of the National Cancer Institute
      - Gut
      - Cancer Discovery
      - Clinical Cancer Research
      - Science Translational Medicine
      - Immunity
      - Brain
      - Yearbook of Paediatric Endocrinology
      - Journal of Allergy and Clinical Immunology
      - Annals of Internal Medicine
      - Journal of Clinical Investigation
      - Alzheimer's and Dementia
      - Journal of Hepatology
      - Clinical Infectious Diseases
      - Hepatology
      - Neurology
      - PLoS Medicine
      - Annals of the Rheumatic Diseases
      - Leukemia
      - European Urology
      - Biological Psychiatry
      - Cell Metabolism
      - American Journal of Psychiatry
      - American Journal of Respiratory and Critical Care Medicine
      - European Journal of Heart Failure
      - Journal for ImmunoTherapy of Cancer
      - European Respiratory Journal
      - American Journal of Epidemiology
      - Annals of Neurology
      - Kidney International
      - Diabetes Care
      - Acta Neuropathologica
      - Cancer
      - JCI insight
      - Frontiers in Immunology
      - European Journal of Cancer
      - Journal of Thoracic Oncology
      - Journal of the National Comprehensive Cancer Network : JNCCN
      - Genetics in Medicine
      - Science Immunology
      - Blood advances
      - Journal of the American Heart Association
      - Hypertension
      - Intensive Care Medicine
      - BMC Medicine
      - Circulation Research
      - Arthritis & Rheumatology
      - Diabetologia
      - Journal of the American Society of Nephrology (JASN)
      - Journal of Clinical Endocrinology and Metabolism
      - Genome Medicine
      - Journal of Experimental Medicine
      - American Heart Journal
      - Clinical Gastroenterology and Hepatology
      - Nutrients
      - Diabetes
      - British Journal of Cancer
      - Obstetrical and Gynecological Survey
      - Annals of Surgery
      - Haematologica

    **Reputation of the authors**
    Prioritize publications from professional societies:
    - World Health Organization (WHO)
    - World Medical Association (WMA)
    - Centers for Disease Control and Prevention (CDC)
    - National Institutes of Health (NIH)
    - U.S. Preventive Services Task Force (USPSTF)
    - American College of Physicians (ACP)
    - National Medical Association (NMA)
    - American College of Cardiology (ACC)
    - American Heart Association (AHA)
    - American Society of Clinical Oncology (ASCO)
    - National Comprehensive Cancer Network (NCCN)
    - Infectious Diseases Society of America (IDSA)
    - American Academy of Pediatrics (AAP)
    - American College of Obstetricians and Gynecologists (ACOG)
    - American Psychiatric Association (APA)
    - American College of Surgeons (ACS)
    - American College of Emergency Physicians (ACEP)
    - American Academy of Neurology (AAN)
    - Endocrine Society
    - National Institute for Health and Care Excellence (NICE)
    - European Medical Association (EMA)
    - European Union of Medical Specialists (UEMS)
    - European Medicines Agency (EMA)
    - European Society of Cardiology (ESC)
    - European Respiratory Society (ERS)
    - European Society of Anaesthesiology and Intensive Care (ESAIC)
    - European Academy of Neurology (EAN)
    - European Society for Medical Oncology (ESMO)
    - European Association for the Study of the Liver (EASL)
    - European Society of Clinical Microbiology and Infectious Diseases (ESCMID)
    - European Association of Urology (EAU)
    - European Society of Endocrinology (ESE)
    - European Paediatric Association (EPA/UNEPSA)
    - European Society of Human Reproduction and Embryology (ESHRE)
    - European Federation of Internal Medicine (EFIM)
    - European Stroke Organisation (ESO)
    - European Psychiatric Association (EPA)
    - European Society of Radiology (ESR)
    - European Hematology Association (EHA)
    - European Society for Emergency Medicine (EUSEM)

    EVIDENCE PRIORITIZATION (when analyzing results):
    To the extent possible, the answer should be grounded in top-tier evidence provided by reputable authors and medical societies
    and published in reputable journals.


    SEARCH OPTIMIZATION GUIDELINES:
    1. **Medical Term Extraction**: Focus on core medical concepts, conditions,
       procedures, and medications from the clinical query
    2. **Broad Conceptual Scope**: Use 2-4 core medical terms. Avoid overly
       specific modifiers like "criteria," "indicators," "guidelines,"
       "recommendations," "treatment," or "management"
    3. **Medical Terminology**: Convert colloquial terms to precise medical
       terminology for better literature retrieval
    4. **Search Strategy**: Construct queries that will capture both guidelines
       AND research studies to ensure comprehensive evidence coverage

    SEARCH EXAMPLES:
    - Query: "ACE inhibitor side effects diabetes"
      (captures both guidelines and studies on ACE inhibitors in diabetic patients)
    - Query: "anticoagulation perioperative management elderly"
      (broad enough to find guidelines and RCTs on perioperative anticoagulation)

    Args:
        query: Medical keywords, topic, or concept for literature search.
               Should focus on clinical concepts rather than specific modifiers.

    Returns:
        List of publications with varying evidence quality. Each contains:
        - title, abstract, venue, year, citation counts
        - id (for citation), doi, url
        - summary (TLDR when available)

        IMPORTANT: Examine citation counts, venue, and content to identify
        high-quality sources (guidelines, large RCTs) for response prioritization.
    """
    publications = search_semantic_scholar(query=query, top_k=20)
    pmids = [
        publication["externalIds"]["PubMed"]
        for publication in publications
        if publication["externalIds"].get("PubMed")
    ]
    pubmed_metadata = get_pubmed_metadata(pmids)

    outputs = []
    for publication in publications:
        if metadata := pubmed_metadata.get(publication["externalIds"].get("PubMed")):
            # Abstracts on PubMed are more complete than the
            # ones returned from Semantic Scholar.
            publication["abstract"] = metadata.get("abstract")
            publication["publication_types"] = metadata.get("publication_types")
            publication["mesh_terms"] = metadata.get("mesh_terms")

        outputs.append(format_publication(publication))

    return outputs