|
from xml.etree import ElementTree |
|
|
|
import httpx |
|
from pydantic_ai import ModelRetry |
|
from tenacity import retry, stop_after_attempt, wait_random_exponential |
|
|
|
from app.config import settings |
|
from app.tools.utils import generate_id |
|
|
|
|
|
@retry( |
|
stop=stop_after_attempt(3), |
|
wait=wait_random_exponential(multiplier=0.5, max=10), |
|
reraise=True, |
|
) |
|
def search_semantic_scholar( |
|
query: str, top_k: int = 20, min_citation_count: int = 5 |
|
) -> list[dict]: |
|
headers = {} |
|
|
|
if api_key := settings.semantic_scholar_api_key: |
|
headers["x-api-key"] = api_key |
|
|
|
resp = httpx.get( |
|
"https://api.semanticscholar.org/graph/v1/paper/search", |
|
params={ |
|
"query": query, |
|
"limit": top_k, |
|
"fields": "title,tldr,abstract,externalIds,url,venue,year,citationCount,influentialCitationCount", |
|
"fieldsOfStudy": "Medicine,Biology", |
|
"minCitationCount": min_citation_count, |
|
}, |
|
headers=headers, |
|
timeout=10.0, |
|
) |
|
resp.raise_for_status() |
|
data = resp.json().get("data", []) |
|
if not data: |
|
raise ModelRetry(f"No results for '{query}'. Try different keywords.") |
|
return data |
|
|
|
|
|
@retry(stop=stop_after_attempt(5), wait=wait_random_exponential(multiplier=0.5, max=10)) |
|
def get_pubmed_metadata(pmids: list[int]) -> dict[str, dict]: |
|
resp = httpx.get( |
|
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi", |
|
params={"db": "pubmed", "id": pmids, "retmode": "xml"}, |
|
) |
|
resp.raise_for_status() |
|
root = ElementTree.fromstring(resp.text) |
|
|
|
results = {} |
|
for article in root.iter("PubmedArticle"): |
|
abstract = "" |
|
pmid = article.findtext( |
|
".//PubmedData/ArticleIdList/ArticleId[@IdType='pubmed']", default="" |
|
) |
|
|
|
|
|
for text in article.findall(".//AbstractText"): |
|
if label := text.attrib.get("Label"): |
|
abstract += f"## {label}\n\n" |
|
abstract += f"{text.text or ''}\n\n" |
|
|
|
|
|
|
|
publication_types = [ |
|
pt.text |
|
for pt in article.findall(".//PublicationTypeList/PublicationType") |
|
if pt.text |
|
] |
|
|
|
|
|
mesh_terms = [] |
|
for mesh_heading in article.findall( |
|
".//MedlineCitation/MeshHeadingList/MeshHeading" |
|
): |
|
descriptor = mesh_heading.findtext("DescriptorName") |
|
if descriptor: |
|
mesh_terms.append(descriptor.strip()) |
|
for qualifier in mesh_heading.findall("QualifierName"): |
|
if qualifier.text: |
|
mesh_terms.append(qualifier.text.strip()) |
|
|
|
results[pmid] = { |
|
"abstract": abstract.strip(), |
|
"publication_types": publication_types, |
|
"mesh_terms": mesh_terms, |
|
} |
|
|
|
return results |
|
|
|
|
|
def format_publication(publication: dict) -> dict: |
|
tldr = publication.pop("tldr") or {} |
|
external_ids = publication.pop("externalIds") |
|
doi = external_ids.get("DOI") |
|
publication["summary"] = tldr.get("text", "") |
|
publication["citations"] = publication.pop("citationCount") |
|
publication["influential_citations"] = publication.pop("influentialCitationCount") |
|
publication["doi"] = doi |
|
if doi: |
|
publication["url"] = f"https://doi.org/{doi}" |
|
publication["id"] = f"sch-{generate_id(publication['url'])}" |
|
return publication |
|
|
|
|
|
def search_medical_literature(query: str) -> list[dict]: |
|
"""Search medical literature and prioritize high-quality evidence sources. |
|
|
|
CRITICAL: This tool returns literature that varies significantly in evidence quality. |
|
You MUST prioritize publications with consolidated evidence based on the following criteria: |
|
|
|
**Type of evidence** |
|
- Gold Standard Evidence |
|
- Systematic Review |
|
- Meta-Analysis |
|
- Randomized Controlled Trial (RCT) |
|
|
|
- High-Quality Clinical Evidence |
|
- Controlled Clinical Trial |
|
- Clinical Trial, Phase III |
|
|
|
- Specialized High-Quality Studies |
|
- Pragmatic Clinical Trial |
|
- Clinical Trial, Phase II |
|
- Equivalence Trial |
|
|
|
Authoritative References |
|
- Practice Guideline |
|
- Pharmacopoeia |
|
- Consensus Development Conference (NIH or not) |
|
|
|
- Other Important Clinical Evidence |
|
- Clinical Study |
|
- Observational Study |
|
- Validation Study |
|
- Comparative Study |
|
- Case Reports |
|
- Multicenter Study |
|
- Evaluation Study |
|
|
|
**Credibility of the publisher**: |
|
- Top general medicine journals |
|
- The Lancet (or any of its specialty journals) |
|
- New England Journal of Medicine (NEJM, or any of its specialty journals) |
|
- Nature Medicine (or any of its medical specialty journals) |
|
- Journal of the American Medical Association (JAMA, or any of its specialty journals) |
|
- BMJ |
|
- Top specialized medicine journals |
|
- Journal of Clinical Oncology |
|
- European Heart Journal |
|
- Circulation |
|
- Journal of the American College of Cardiology |
|
- Cancer Cell |
|
- Annals of Oncology |
|
- Gastroenterology |
|
- International Journal of Epidemiology |
|
- Blood |
|
- Molecular Psychiatry |
|
- Journal of the National Cancer Institute |
|
- Gut |
|
- Cancer Discovery |
|
- Clinical Cancer Research |
|
- Science Translational Medicine |
|
- Immunity |
|
- Brain |
|
- Yearbook of Paediatric Endocrinology |
|
- Journal of Allergy and Clinical Immunology |
|
- Annals of Internal Medicine |
|
- Journal of Clinical Investigation |
|
- Alzheimer's and Dementia |
|
- Journal of Hepatology |
|
- Clinical Infectious Diseases |
|
- Hepatology |
|
- Neurology |
|
- PLoS Medicine |
|
- Annals of the Rheumatic Diseases |
|
- Leukemia |
|
- European Urology |
|
- Biological Psychiatry |
|
- Cell Metabolism |
|
- American Journal of Psychiatry |
|
- American Journal of Respiratory and Critical Care Medicine |
|
- European Journal of Heart Failure |
|
- Journal for ImmunoTherapy of Cancer |
|
- European Respiratory Journal |
|
- American Journal of Epidemiology |
|
- Annals of Neurology |
|
- Kidney International |
|
- Diabetes Care |
|
- Acta Neuropathologica |
|
- Cancer |
|
- JCI insight |
|
- Frontiers in Immunology |
|
- European Journal of Cancer |
|
- Journal of Thoracic Oncology |
|
- Journal of the National Comprehensive Cancer Network : JNCCN |
|
- Genetics in Medicine |
|
- Science Immunology |
|
- Blood advances |
|
- Journal of the American Heart Association |
|
- Hypertension |
|
- Intensive Care Medicine |
|
- BMC Medicine |
|
- Circulation Research |
|
- Arthritis & Rheumatology |
|
- Diabetologia |
|
- Journal of the American Society of Nephrology (JASN) |
|
- Journal of Clinical Endocrinology and Metabolism |
|
- Genome Medicine |
|
- Journal of Experimental Medicine |
|
- American Heart Journal |
|
- Clinical Gastroenterology and Hepatology |
|
- Nutrients |
|
- Diabetes |
|
- British Journal of Cancer |
|
- Obstetrical and Gynecological Survey |
|
- Annals of Surgery |
|
- Haematologica |
|
|
|
**Reputation of the authors** |
|
Prioritize publications from professional societies: |
|
- World Health Organization (WHO) |
|
- World Medical Association (WMA) |
|
- Centers for Disease Control and Prevention (CDC) |
|
- National Institutes of Health (NIH) |
|
- U.S. Preventive Services Task Force (USPSTF) |
|
- American College of Physicians (ACP) |
|
- National Medical Association (NMA) |
|
- American College of Cardiology (ACC) |
|
- American Heart Association (AHA) |
|
- American Society of Clinical Oncology (ASCO) |
|
- National Comprehensive Cancer Network (NCCN) |
|
- Infectious Diseases Society of America (IDSA) |
|
- American Academy of Pediatrics (AAP) |
|
- American College of Obstetricians and Gynecologists (ACOG) |
|
- American Psychiatric Association (APA) |
|
- American College of Surgeons (ACS) |
|
- American College of Emergency Physicians (ACEP) |
|
- American Academy of Neurology (AAN) |
|
- Endocrine Society |
|
- National Institute for Health and Care Excellence (NICE) |
|
- European Medical Association (EMA) |
|
- European Union of Medical Specialists (UEMS) |
|
- European Medicines Agency (EMA) |
|
- European Society of Cardiology (ESC) |
|
- European Respiratory Society (ERS) |
|
- European Society of Anaesthesiology and Intensive Care (ESAIC) |
|
- European Academy of Neurology (EAN) |
|
- European Society for Medical Oncology (ESMO) |
|
- European Association for the Study of the Liver (EASL) |
|
- European Society of Clinical Microbiology and Infectious Diseases (ESCMID) |
|
- European Association of Urology (EAU) |
|
- European Society of Endocrinology (ESE) |
|
- European Paediatric Association (EPA/UNEPSA) |
|
- European Society of Human Reproduction and Embryology (ESHRE) |
|
- European Federation of Internal Medicine (EFIM) |
|
- European Stroke Organisation (ESO) |
|
- European Psychiatric Association (EPA) |
|
- European Society of Radiology (ESR) |
|
- European Hematology Association (EHA) |
|
- European Society for Emergency Medicine (EUSEM) |
|
|
|
EVIDENCE PRIORITIZATION (when analyzing results): |
|
To the extent possible, the answer should be grounded in top-tier evidence provided by reputable authors and medical societies |
|
and published in reputable journals. |
|
|
|
|
|
SEARCH OPTIMIZATION GUIDELINES: |
|
1. **Medical Term Extraction**: Focus on core medical concepts, conditions, |
|
procedures, and medications from the clinical query |
|
2. **Broad Conceptual Scope**: Use 2-4 core medical terms. Avoid overly |
|
specific modifiers like "criteria," "indicators," "guidelines," |
|
"recommendations," "treatment," or "management" |
|
3. **Medical Terminology**: Convert colloquial terms to precise medical |
|
terminology for better literature retrieval |
|
4. **Search Strategy**: Construct queries that will capture both guidelines |
|
AND research studies to ensure comprehensive evidence coverage |
|
|
|
SEARCH EXAMPLES: |
|
- Query: "ACE inhibitor side effects diabetes" |
|
(captures both guidelines and studies on ACE inhibitors in diabetic patients) |
|
- Query: "anticoagulation perioperative management elderly" |
|
(broad enough to find guidelines and RCTs on perioperative anticoagulation) |
|
|
|
Args: |
|
query: Medical keywords, topic, or concept for literature search. |
|
Should focus on clinical concepts rather than specific modifiers. |
|
|
|
Returns: |
|
List of publications with varying evidence quality. Each contains: |
|
- title, abstract, venue, year, citation counts |
|
- id (for citation), doi, url |
|
- summary (TLDR when available) |
|
|
|
IMPORTANT: Examine citation counts, venue, and content to identify |
|
high-quality sources (guidelines, large RCTs) for response prioritization. |
|
""" |
|
publications = search_semantic_scholar(query=query, top_k=20) |
|
pmids = [ |
|
publication["externalIds"]["PubMed"] |
|
for publication in publications |
|
if publication["externalIds"].get("PubMed") |
|
] |
|
pubmed_metadata = get_pubmed_metadata(pmids) |
|
|
|
outputs = [] |
|
for publication in publications: |
|
if metadata := pubmed_metadata.get(publication["externalIds"].get("PubMed")): |
|
|
|
|
|
publication["abstract"] = metadata.get("abstract") |
|
publication["publication_types"] = metadata.get("publication_types") |
|
publication["mesh_terms"] = metadata.get("mesh_terms") |
|
|
|
outputs.append(format_publication(publication)) |
|
|
|
return outputs |
|
|