Spaces:

mgbam
/

Synthetic_Biology

Sleeping

App Files Files Community

mgbam commited on 15 days ago

Commit

94b3916

verified ·

1 Parent(s): 6fa7402

Update genesis/api_clients/pubmed_api.py

Browse files

Files changed (1) hide show

genesis/api_clients/pubmed_api.py +58 -36

genesis/api_clients/pubmed_api.py CHANGED Viewed

@@ -2,51 +2,73 @@
 import requests
 from xml.etree import ElementTree as ET
-def search_pubmed(query: str, max_results: int = 10):
     """
-    Search PubMed using the NCBI E-utilities API.
-    Returns a list of dicts with 'title', 'authors', 'pub_date', 'link'.
     """
-    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
-    summary_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
-    # Step 1: Search PubMed IDs
-    search_params = {
         "db": "pubmed",
         "term": query,
         "retmax": max_results,
-        "retmode": "json"
     }
-    search_res = requests.get(base_url, params=search_params)
-    search_res.raise_for_status()
-    id_list = search_res.json().get("esearchresult", {}).get("idlist", [])
-    if not id_list:
         return []
-    # Step 2: Fetch summaries for IDs
-    summary_params = {
         "db": "pubmed",
-        "id": ",".join(id_list),
         "retmode": "xml"
     }
-    summary_res = requests.get(summary_url, params=summary_params)
-    summary_res.raise_for_status()
-    root = ET.fromstring(summary_res.text)
-    papers = []
-    for docsum in root.findall(".//DocSum"):
-        paper = {"title": None, "authors": [], "pub_date": None, "link": None}
-        for item in docsum.findall("Item"):
-            if item.attrib.get("Name") == "Title":
-                paper["title"] = item.text
-            elif item.attrib.get("Name") == "PubDate":
-                paper["pub_date"] = item.text
-            elif item.attrib.get("Name") == "AuthorList":
-                paper["authors"] = [author.text for author in item.findall("Item")]
-        uid_elem = docsum.find("Id")
-        if uid_elem is not None:
-            paper["link"] = f"https://pubmed.ncbi.nlm.nih.gov/{uid_elem.text}/"
-        papers.append(paper)
-    return papers

 import requests
 from xml.etree import ElementTree as ET
+NCBI_EUTILS_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
+def search_pubmed(query: str, max_results: int = 10, api_key: str = None):
     """
+    Search PubMed for a given query and return a list of PMIDs.
     """
+    params = {
         "db": "pubmed",
         "term": query,
+        "retmode": "json",
         "retmax": max_results,
     }
+    if api_key:
+        params["api_key"] = api_key
+    response = requests.get(f"{NCBI_EUTILS_BASE}/esearch.fcgi", params=params)
+    response.raise_for_status()
+    data = response.json()
+    return data.get("esearchresult", {}).get("idlist", [])
+def fetch_pubmed_details(pmids: list, api_key: str = None):
+    """
+    Fetch detailed article data for given PMIDs.
+    """
+    if not pmids:
         return []
+    params = {
         "db": "pubmed",
+        "id": ",".join(pmids),
         "retmode": "xml"
     }
+    if api_key:
+        params["api_key"] = api_key
+    response = requests.get(f"{NCBI_EUTILS_BASE}/efetch.fcgi", params=params)
+    response.raise_for_status()
+    return parse_pubmed_xml(response.text)
+def parse_pubmed_xml(xml_text: str):
+    """
+    Parse PubMed XML into structured dicts.
+    """
+    root = ET.fromstring(xml_text)
+    articles = []
+    for article in root.findall(".//PubmedArticle"):
+        title_el = article.find(".//ArticleTitle")
+        abstract_el = article.find(".//AbstractText")
+        journal_el = article.find(".//Journal/Title")
+        year_el = article.find(".//PubDate/Year")
+        articles.append({
+            "title": title_el.text if title_el is not None else None,
+            "abstract": abstract_el.text if abstract_el is not None else None,
+            "journal": journal_el.text if journal_el is not None else None,
+            "year": year_el.text if year_el is not None else None,
+        })
+    return articles
+def search_and_fetch(query: str, max_results: int = 5, api_key: str = None):
+    """
+    Convenience function: search and fetch article details.
+    """
+    pmids = search_pubmed(query, max_results=max_results, api_key=api_key)
+    return fetch_pubmed_details(pmids, api_key=api_key)