File size: 2,756 Bytes
1411075
587c291
a5bfe49
026bd38
587c291
 
94b3916
587c291
a5bfe49
587c291
b94ef99
587c291
 
 
 
 
 
 
 
b94ef99
587c291
 
 
 
94b3916
1411075
 
026bd38
587c291
1411075
587c291
 
94b3916
587c291
 
 
 
 
 
 
02711ba
587c291
 
02711ba
587c291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02711ba
587c291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# genesis/api_clients/pubmed_api.py

import os
import requests
import html
from xml.etree import ElementTree as ET

PUBMED_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"

def search_pubmed_literature(query: str, max_results: int = 10):
    """
    Search PubMed literature using NCBI E-utilities API.

    Args:
        query (str): Search query (e.g. "synthetic biology cancer therapy")
        max_results (int): Maximum number of results to return.

    Returns:
        list[dict]: Each dict has 'title', 'authors', 'link'
    """
    if not query.strip():
        return []

    api_key = os.getenv("PUBMED_API_KEY")  # optional
    params = {
        "db": "pubmed",
        "term": query,
        "retmax": max_results,
        "retmode": "xml"
    }
    if api_key:
        params["api_key"] = api_key

    try:
        # Step 1: Search for IDs
        search_url = f"{PUBMED_BASE}/esearch.fcgi"
        search_res = requests.get(search_url, params=params, timeout=10)
        search_res.raise_for_status()
        root = ET.fromstring(search_res.text)
        ids = [id_elem.text for id_elem in root.findall(".//Id")]

        if not ids:
            return []

        # Step 2: Fetch article details
        fetch_url = f"{PUBMED_BASE}/efetch.fcgi"
        fetch_params = {
            "db": "pubmed",
            "id": ",".join(ids),
            "retmode": "xml"
        }
        if api_key:
            fetch_params["api_key"] = api_key

        fetch_res = requests.get(fetch_url, params=fetch_params, timeout=10)
        fetch_res.raise_for_status()
        fetch_root = ET.fromstring(fetch_res.text)

        results = []
        for article in fetch_root.findall(".//PubmedArticle"):
            title_elem = article.find(".//ArticleTitle")
            title = html.unescape(title_elem.text) if title_elem is not None else "No title"

            authors = []
            for author in article.findall(".//Author"):
                last = author.find("LastName")
                fore = author.find("ForeName")
                if last is not None and fore is not None:
                    authors.append(f"{fore.text} {last.text}")

            pmid_elem = article.find(".//PMID")
            pmid = pmid_elem.text if pmid_elem is not None else ""
            link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" if pmid else ""

            results.append({
                "title": title,
                "authors": ", ".join(authors) if authors else "N/A",
                "link": link
            })

        return results

    except requests.exceptions.RequestException as e:
        print(f"[PubMed API Error] {e}")
        return []
    except ET.ParseError as e:
        print(f"[PubMed Parse Error] {e}")
        return []