mgbam commited on
Commit
94b3916
·
verified ·
1 Parent(s): 6fa7402

Update genesis/api_clients/pubmed_api.py

Browse files
Files changed (1) hide show
  1. genesis/api_clients/pubmed_api.py +58 -36
genesis/api_clients/pubmed_api.py CHANGED
@@ -2,51 +2,73 @@
2
  import requests
3
  from xml.etree import ElementTree as ET
4
 
5
- def search_pubmed(query: str, max_results: int = 10):
 
 
6
  """
7
- Search PubMed using the NCBI E-utilities API.
8
- Returns a list of dicts with 'title', 'authors', 'pub_date', 'link'.
9
  """
10
- base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
11
- summary_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
12
-
13
- # Step 1: Search PubMed IDs
14
- search_params = {
15
  "db": "pubmed",
16
  "term": query,
 
17
  "retmax": max_results,
18
- "retmode": "json"
19
  }
20
- search_res = requests.get(base_url, params=search_params)
21
- search_res.raise_for_status()
22
- id_list = search_res.json().get("esearchresult", {}).get("idlist", [])
 
 
 
 
23
 
24
- if not id_list:
 
 
 
 
 
25
  return []
26
 
27
- # Step 2: Fetch summaries for IDs
28
- summary_params = {
29
  "db": "pubmed",
30
- "id": ",".join(id_list),
31
  "retmode": "xml"
32
  }
33
- summary_res = requests.get(summary_url, params=summary_params)
34
- summary_res.raise_for_status()
35
-
36
- root = ET.fromstring(summary_res.text)
37
- papers = []
38
- for docsum in root.findall(".//DocSum"):
39
- paper = {"title": None, "authors": [], "pub_date": None, "link": None}
40
- for item in docsum.findall("Item"):
41
- if item.attrib.get("Name") == "Title":
42
- paper["title"] = item.text
43
- elif item.attrib.get("Name") == "PubDate":
44
- paper["pub_date"] = item.text
45
- elif item.attrib.get("Name") == "AuthorList":
46
- paper["authors"] = [author.text for author in item.findall("Item")]
47
- uid_elem = docsum.find("Id")
48
- if uid_elem is not None:
49
- paper["link"] = f"https://pubmed.ncbi.nlm.nih.gov/{uid_elem.text}/"
50
- papers.append(paper)
51
-
52
- return papers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import requests
3
  from xml.etree import ElementTree as ET
4
 
5
+ NCBI_EUTILS_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
6
+
7
+ def search_pubmed(query: str, max_results: int = 10, api_key: str = None):
8
  """
9
+ Search PubMed for a given query and return a list of PMIDs.
 
10
  """
11
+ params = {
 
 
 
 
12
  "db": "pubmed",
13
  "term": query,
14
+ "retmode": "json",
15
  "retmax": max_results,
 
16
  }
17
+ if api_key:
18
+ params["api_key"] = api_key
19
+
20
+ response = requests.get(f"{NCBI_EUTILS_BASE}/esearch.fcgi", params=params)
21
+ response.raise_for_status()
22
+ data = response.json()
23
+ return data.get("esearchresult", {}).get("idlist", [])
24
 
25
+
26
+ def fetch_pubmed_details(pmids: list, api_key: str = None):
27
+ """
28
+ Fetch detailed article data for given PMIDs.
29
+ """
30
+ if not pmids:
31
  return []
32
 
33
+ params = {
 
34
  "db": "pubmed",
35
+ "id": ",".join(pmids),
36
  "retmode": "xml"
37
  }
38
+ if api_key:
39
+ params["api_key"] = api_key
40
+
41
+ response = requests.get(f"{NCBI_EUTILS_BASE}/efetch.fcgi", params=params)
42
+ response.raise_for_status()
43
+ return parse_pubmed_xml(response.text)
44
+
45
+
46
+ def parse_pubmed_xml(xml_text: str):
47
+ """
48
+ Parse PubMed XML into structured dicts.
49
+ """
50
+ root = ET.fromstring(xml_text)
51
+ articles = []
52
+
53
+ for article in root.findall(".//PubmedArticle"):
54
+ title_el = article.find(".//ArticleTitle")
55
+ abstract_el = article.find(".//AbstractText")
56
+ journal_el = article.find(".//Journal/Title")
57
+ year_el = article.find(".//PubDate/Year")
58
+
59
+ articles.append({
60
+ "title": title_el.text if title_el is not None else None,
61
+ "abstract": abstract_el.text if abstract_el is not None else None,
62
+ "journal": journal_el.text if journal_el is not None else None,
63
+ "year": year_el.text if year_el is not None else None,
64
+ })
65
+
66
+ return articles
67
+
68
+
69
+ def search_and_fetch(query: str, max_results: int = 5, api_key: str = None):
70
+ """
71
+ Convenience function: search and fetch article details.
72
+ """
73
+ pmids = search_pubmed(query, max_results=max_results, api_key=api_key)
74
+ return fetch_pubmed_details(pmids, api_key=api_key)