File size: 11,966 Bytes
f2c42a8 d2f1b05 f2c42a8 5a9965e 0f4b0ea 5a9965e f2c42a8 d2f1b05 f2c42a8 792bd75 f2c42a8 5a9965e f2c42a8 5a9965e f2c42a8 d2f1b05 f2c42a8 3865f47 f2c42a8 3865f47 f2c42a8 d38794d f2c42a8 3865f47 f2c42a8 d9a5339 0f4b0ea d9a5339 22b8aeb 3865f47 22b8aeb 3865f47 22b8aeb 3865f47 22b8aeb 3865f47 22b8aeb 3865f47 22b8aeb 3865f47 22b8aeb 3865f47 22b8aeb 03401b5 f2c42a8 22b8aeb f2c42a8 22b8aeb 3865f47 22b8aeb f2c42a8 3865f47 f2c42a8 3865f47 d38794d 3865f47 f2c42a8 d9a5339 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 |
from xml.etree import ElementTree
import httpx
from pydantic_ai import ModelRetry
from tenacity import retry, stop_after_attempt, wait_random_exponential
from app.config import settings
from app.tools.utils import generate_id
@retry(
stop=stop_after_attempt(3),
wait=wait_random_exponential(multiplier=0.5, max=10),
reraise=True,
)
def search_semantic_scholar(
query: str, top_k: int = 20, min_citation_count: int = 5
) -> list[dict]:
headers = {}
if api_key := settings.semantic_scholar_api_key:
headers["x-api-key"] = api_key
resp = httpx.get(
"https://api.semanticscholar.org/graph/v1/paper/search",
params={
"query": query,
"limit": top_k,
"fields": "title,tldr,abstract,externalIds,url,venue,year,citationCount,influentialCitationCount",
"fieldsOfStudy": "Medicine,Biology",
"minCitationCount": min_citation_count,
},
headers=headers,
timeout=10.0,
)
resp.raise_for_status()
data = resp.json().get("data", [])
if not data:
raise ModelRetry(f"No results for '{query}'. Try different keywords.")
return data
@retry(stop=stop_after_attempt(5), wait=wait_random_exponential(multiplier=0.5, max=10))
def get_pubmed_metadata(pmids: list[int]) -> dict[str, dict]:
resp = httpx.get(
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi",
params={"db": "pubmed", "id": pmids, "retmode": "xml"},
)
resp.raise_for_status()
root = ElementTree.fromstring(resp.text)
results = {}
for article in root.iter("PubmedArticle"):
abstract = ""
pmid = article.findtext(
".//PubmedData/ArticleIdList/ArticleId[@IdType='pubmed']", default=""
)
# The abstract is sometimes divided into multiple
# sections. Concatenate into one Markdown text.
for text in article.findall(".//AbstractText"):
if label := text.attrib.get("Label"):
abstract += f"## {label}\n\n"
abstract += f"{text.text or ''}\n\n"
# Extract publication types
# https://www.nlm.nih.gov/mesh/pubtypes.html
publication_types = [
pt.text
for pt in article.findall(".//PublicationTypeList/PublicationType")
if pt.text
]
# Extract MeSH terms (DescriptorName and QualifierName)
mesh_terms = []
for mesh_heading in article.findall(
".//MedlineCitation/MeshHeadingList/MeshHeading"
):
descriptor = mesh_heading.findtext("DescriptorName")
if descriptor:
mesh_terms.append(descriptor.strip())
for qualifier in mesh_heading.findall("QualifierName"):
if qualifier.text:
mesh_terms.append(qualifier.text.strip())
results[pmid] = {
"abstract": abstract.strip(),
"publication_types": publication_types,
"mesh_terms": mesh_terms,
}
return results
def format_publication(publication: dict) -> dict:
tldr = publication.pop("tldr") or {}
external_ids = publication.pop("externalIds")
doi = external_ids.get("DOI")
publication["summary"] = tldr.get("text", "")
publication["citations"] = publication.pop("citationCount")
publication["influential_citations"] = publication.pop("influentialCitationCount")
publication["doi"] = doi
if doi:
publication["url"] = f"https://doi.org/{doi}"
publication["id"] = f"sch-{generate_id(publication['url'])}"
return publication
def search_medical_literature(query: str) -> list[dict]:
"""Search medical literature and prioritize high-quality evidence sources.
CRITICAL: This tool returns literature that varies significantly in evidence quality.
You MUST prioritize publications with consolidated evidence based on the following criteria:
**Type of evidence**
- Gold Standard Evidence
- Systematic Review
- Meta-Analysis
- Randomized Controlled Trial (RCT)
- High-Quality Clinical Evidence
- Controlled Clinical Trial
- Clinical Trial, Phase III
- Specialized High-Quality Studies
- Pragmatic Clinical Trial
- Clinical Trial, Phase II
- Equivalence Trial
Authoritative References
- Practice Guideline
- Pharmacopoeia
- Consensus Development Conference (NIH or not)
- Other Important Clinical Evidence
- Clinical Study
- Observational Study
- Validation Study
- Comparative Study
- Case Reports
- Multicenter Study
- Evaluation Study
**Credibility of the publisher**:
- Top general medicine journals
- The Lancet (or any of its specialty journals)
- New England Journal of Medicine (NEJM, or any of its specialty journals)
- Nature Medicine (or any of its medical specialty journals)
- Journal of the American Medical Association (JAMA, or any of its specialty journals)
- BMJ
- Top specialized medicine journals
- Journal of Clinical Oncology
- European Heart Journal
- Circulation
- Journal of the American College of Cardiology
- Cancer Cell
- Annals of Oncology
- Gastroenterology
- International Journal of Epidemiology
- Blood
- Molecular Psychiatry
- Journal of the National Cancer Institute
- Gut
- Cancer Discovery
- Clinical Cancer Research
- Science Translational Medicine
- Immunity
- Brain
- Yearbook of Paediatric Endocrinology
- Journal of Allergy and Clinical Immunology
- Annals of Internal Medicine
- Journal of Clinical Investigation
- Alzheimer's and Dementia
- Journal of Hepatology
- Clinical Infectious Diseases
- Hepatology
- Neurology
- PLoS Medicine
- Annals of the Rheumatic Diseases
- Leukemia
- European Urology
- Biological Psychiatry
- Cell Metabolism
- American Journal of Psychiatry
- American Journal of Respiratory and Critical Care Medicine
- European Journal of Heart Failure
- Journal for ImmunoTherapy of Cancer
- European Respiratory Journal
- American Journal of Epidemiology
- Annals of Neurology
- Kidney International
- Diabetes Care
- Acta Neuropathologica
- Cancer
- JCI insight
- Frontiers in Immunology
- European Journal of Cancer
- Journal of Thoracic Oncology
- Journal of the National Comprehensive Cancer Network : JNCCN
- Genetics in Medicine
- Science Immunology
- Blood advances
- Journal of the American Heart Association
- Hypertension
- Intensive Care Medicine
- BMC Medicine
- Circulation Research
- Arthritis & Rheumatology
- Diabetologia
- Journal of the American Society of Nephrology (JASN)
- Journal of Clinical Endocrinology and Metabolism
- Genome Medicine
- Journal of Experimental Medicine
- American Heart Journal
- Clinical Gastroenterology and Hepatology
- Nutrients
- Diabetes
- British Journal of Cancer
- Obstetrical and Gynecological Survey
- Annals of Surgery
- Haematologica
**Reputation of the authors**
Prioritize publications from professional societies:
- World Health Organization (WHO)
- World Medical Association (WMA)
- Centers for Disease Control and Prevention (CDC)
- National Institutes of Health (NIH)
- U.S. Preventive Services Task Force (USPSTF)
- American College of Physicians (ACP)
- National Medical Association (NMA)
- American College of Cardiology (ACC)
- American Heart Association (AHA)
- American Society of Clinical Oncology (ASCO)
- National Comprehensive Cancer Network (NCCN)
- Infectious Diseases Society of America (IDSA)
- American Academy of Pediatrics (AAP)
- American College of Obstetricians and Gynecologists (ACOG)
- American Psychiatric Association (APA)
- American College of Surgeons (ACS)
- American College of Emergency Physicians (ACEP)
- American Academy of Neurology (AAN)
- Endocrine Society
- National Institute for Health and Care Excellence (NICE)
- European Medical Association (EMA)
- European Union of Medical Specialists (UEMS)
- European Medicines Agency (EMA)
- European Society of Cardiology (ESC)
- European Respiratory Society (ERS)
- European Society of Anaesthesiology and Intensive Care (ESAIC)
- European Academy of Neurology (EAN)
- European Society for Medical Oncology (ESMO)
- European Association for the Study of the Liver (EASL)
- European Society of Clinical Microbiology and Infectious Diseases (ESCMID)
- European Association of Urology (EAU)
- European Society of Endocrinology (ESE)
- European Paediatric Association (EPA/UNEPSA)
- European Society of Human Reproduction and Embryology (ESHRE)
- European Federation of Internal Medicine (EFIM)
- European Stroke Organisation (ESO)
- European Psychiatric Association (EPA)
- European Society of Radiology (ESR)
- European Hematology Association (EHA)
- European Society for Emergency Medicine (EUSEM)
EVIDENCE PRIORITIZATION (when analyzing results):
To the extent possible, the answer should be grounded in top-tier evidence provided by reputable authors and medical societies
and published in reputable journals.
SEARCH OPTIMIZATION GUIDELINES:
1. **Medical Term Extraction**: Focus on core medical concepts, conditions,
procedures, and medications from the clinical query
2. **Broad Conceptual Scope**: Use 2-4 core medical terms. Avoid overly
specific modifiers like "criteria," "indicators," "guidelines,"
"recommendations," "treatment," or "management"
3. **Medical Terminology**: Convert colloquial terms to precise medical
terminology for better literature retrieval
4. **Search Strategy**: Construct queries that will capture both guidelines
AND research studies to ensure comprehensive evidence coverage
SEARCH EXAMPLES:
- Query: "ACE inhibitor side effects diabetes"
(captures both guidelines and studies on ACE inhibitors in diabetic patients)
- Query: "anticoagulation perioperative management elderly"
(broad enough to find guidelines and RCTs on perioperative anticoagulation)
Args:
query: Medical keywords, topic, or concept for literature search.
Should focus on clinical concepts rather than specific modifiers.
Returns:
List of publications with varying evidence quality. Each contains:
- title, abstract, venue, year, citation counts
- id (for citation), doi, url
- summary (TLDR when available)
IMPORTANT: Examine citation counts, venue, and content to identify
high-quality sources (guidelines, large RCTs) for response prioritization.
"""
publications = search_semantic_scholar(query=query, top_k=20)
pmids = [
publication["externalIds"]["PubMed"]
for publication in publications
if publication["externalIds"].get("PubMed")
]
pubmed_metadata = get_pubmed_metadata(pmids)
outputs = []
for publication in publications:
if metadata := pubmed_metadata.get(publication["externalIds"].get("PubMed")):
# Abstracts on PubMed are more complete than the
# ones returned from Semantic Scholar.
publication["abstract"] = metadata.get("abstract")
publication["publication_types"] = metadata.get("publication_types")
publication["mesh_terms"] = metadata.get("mesh_terms")
outputs.append(format_publication(publication))
return outputs
|