mgbam commited on
Commit
b94ef99
·
verified ·
1 Parent(s): 97ec060

Update genesis/api_clients/pubmed_api.py

Browse files
Files changed (1) hide show
  1. genesis/api_clients/pubmed_api.py +41 -23
genesis/api_clients/pubmed_api.py CHANGED
@@ -1,34 +1,52 @@
1
  # genesis/api_clients/pubmed_api.py
2
  import requests
3
- import os
4
-
5
- NCBI_API_KEY = os.getenv("NCBI_API_KEY") # Stored in HF Secrets
6
- BASE_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
7
 
8
  def search_pubmed(query: str, max_results: int = 10):
9
- """Search PubMed articles."""
10
- url = f"{BASE_URL}/esearch.fcgi"
11
- params = {
 
 
 
 
 
 
12
  "db": "pubmed",
13
  "term": query,
14
  "retmax": max_results,
15
- "retmode": "json",
16
- "api_key": NCBI_API_KEY
17
  }
18
- r = requests.get(url, params=params)
19
- r.raise_for_status()
20
- return r.json()
 
 
 
21
 
22
- def fetch_pubmed_details(id_list):
23
- """Fetch article details for given PubMed IDs."""
24
- ids = ",".join(id_list)
25
- url = f"{BASE_URL}/efetch.fcgi"
26
- params = {
27
  "db": "pubmed",
28
- "id": ids,
29
- "retmode": "xml",
30
- "api_key": NCBI_API_KEY
31
  }
32
- r = requests.get(url, params=params)
33
- r.raise_for_status()
34
- return r.text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # genesis/api_clients/pubmed_api.py
2
  import requests
3
+ from xml.etree import ElementTree as ET
 
 
 
4
 
5
  def search_pubmed(query: str, max_results: int = 10):
6
+ """
7
+ Search PubMed using the NCBI E-utilities API.
8
+ Returns a list of dicts with 'title', 'authors', 'pub_date', 'link'.
9
+ """
10
+ base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
11
+ summary_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
12
+
13
+ # Step 1: Search PubMed IDs
14
+ search_params = {
15
  "db": "pubmed",
16
  "term": query,
17
  "retmax": max_results,
18
+ "retmode": "json"
 
19
  }
20
+ search_res = requests.get(base_url, params=search_params)
21
+ search_res.raise_for_status()
22
+ id_list = search_res.json().get("esearchresult", {}).get("idlist", [])
23
+
24
+ if not id_list:
25
+ return []
26
 
27
+ # Step 2: Fetch summaries for IDs
28
+ summary_params = {
 
 
 
29
  "db": "pubmed",
30
+ "id": ",".join(id_list),
31
+ "retmode": "xml"
 
32
  }
33
+ summary_res = requests.get(summary_url, params=summary_params)
34
+ summary_res.raise_for_status()
35
+
36
+ root = ET.fromstring(summary_res.text)
37
+ papers = []
38
+ for docsum in root.findall(".//DocSum"):
39
+ paper = {"title": None, "authors": [], "pub_date": None, "link": None}
40
+ for item in docsum.findall("Item"):
41
+ if item.attrib.get("Name") == "Title":
42
+ paper["title"] = item.text
43
+ elif item.attrib.get("Name") == "PubDate":
44
+ paper["pub_date"] = item.text
45
+ elif item.attrib.get("Name") == "AuthorList":
46
+ paper["authors"] = [author.text for author in item.findall("Item")]
47
+ uid_elem = docsum.find("Id")
48
+ if uid_elem is not None:
49
+ paper["link"] = f"https://pubmed.ncbi.nlm.nih.gov/{uid_elem.text}/"
50
+ papers.append(paper)
51
+
52
+ return papers