Spaces:

mgbam
/

Synthetic_Biology

Sleeping

File size: 2,756 Bytes

# genesis/api_clients/pubmed_api.py

import os
import requests
import html
from xml.etree import ElementTree as ET

PUBMED_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"

def search_pubmed_literature(query: str, max_results: int = 10):
    """
    Search PubMed literature using NCBI E-utilities API.

    Args:
        query (str): Search query (e.g. "synthetic biology cancer therapy")
        max_results (int): Maximum number of results to return.

    Returns:
        list[dict]: Each dict has 'title', 'authors', 'link'
    """
    if not query.strip():
        return []

    api_key = os.getenv("PUBMED_API_KEY")  # optional
    params = {
        "db": "pubmed",
        "term": query,
        "retmax": max_results,
        "retmode": "xml"
    }
    if api_key:
        params["api_key"] = api_key

    try:
        # Step 1: Search for IDs
        search_url = f"{PUBMED_BASE}/esearch.fcgi"
        search_res = requests.get(search_url, params=params, timeout=10)
        search_res.raise_for_status()
        root = ET.fromstring(search_res.text)
        ids = [id_elem.text for id_elem in root.findall(".//Id")]

        if not ids:
            return []

        # Step 2: Fetch article details
        fetch_url = f"{PUBMED_BASE}/efetch.fcgi"
        fetch_params = {
            "db": "pubmed",
            "id": ",".join(ids),
            "retmode": "xml"
        }
        if api_key:
            fetch_params["api_key"] = api_key

        fetch_res = requests.get(fetch_url, params=fetch_params, timeout=10)
        fetch_res.raise_for_status()
        fetch_root = ET.fromstring(fetch_res.text)

        results = []
        for article in fetch_root.findall(".//PubmedArticle"):
            title_elem = article.find(".//ArticleTitle")
            title = html.unescape(title_elem.text) if title_elem is not None else "No title"

            authors = []
            for author in article.findall(".//Author"):
                last = author.find("LastName")
                fore = author.find("ForeName")
                if last is not None and fore is not None:
                    authors.append(f"{fore.text} {last.text}")

            pmid_elem = article.find(".//PMID")
            pmid = pmid_elem.text if pmid_elem is not None else ""
            link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" if pmid else ""

            results.append({
                "title": title,
                "authors": ", ".join(authors) if authors else "N/A",
                "link": link
            })

        return results

    except requests.exceptions.RequestException as e:
        print(f"[PubMed API Error] {e}")
        return []
    except ET.ParseError as e:
        print(f"[PubMed Parse Error] {e}")
        return []