Spaces:
Sleeping
Sleeping
Update genesis/api_clients/pubmed_api.py
Browse files
genesis/api_clients/pubmed_api.py
CHANGED
@@ -2,51 +2,73 @@
|
|
2 |
import requests
|
3 |
from xml.etree import ElementTree as ET
|
4 |
|
5 |
-
|
|
|
|
|
6 |
"""
|
7 |
-
Search PubMed
|
8 |
-
Returns a list of dicts with 'title', 'authors', 'pub_date', 'link'.
|
9 |
"""
|
10 |
-
|
11 |
-
summary_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
|
12 |
-
|
13 |
-
# Step 1: Search PubMed IDs
|
14 |
-
search_params = {
|
15 |
"db": "pubmed",
|
16 |
"term": query,
|
|
|
17 |
"retmax": max_results,
|
18 |
-
"retmode": "json"
|
19 |
}
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
23 |
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
25 |
return []
|
26 |
|
27 |
-
|
28 |
-
summary_params = {
|
29 |
"db": "pubmed",
|
30 |
-
"id": ",".join(
|
31 |
"retmode": "xml"
|
32 |
}
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import requests
|
3 |
from xml.etree import ElementTree as ET
|
4 |
|
5 |
+
NCBI_EUTILS_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
|
6 |
+
|
7 |
+
def search_pubmed(query: str, max_results: int = 10, api_key: str = None):
|
8 |
"""
|
9 |
+
Search PubMed for a given query and return a list of PMIDs.
|
|
|
10 |
"""
|
11 |
+
params = {
|
|
|
|
|
|
|
|
|
12 |
"db": "pubmed",
|
13 |
"term": query,
|
14 |
+
"retmode": "json",
|
15 |
"retmax": max_results,
|
|
|
16 |
}
|
17 |
+
if api_key:
|
18 |
+
params["api_key"] = api_key
|
19 |
+
|
20 |
+
response = requests.get(f"{NCBI_EUTILS_BASE}/esearch.fcgi", params=params)
|
21 |
+
response.raise_for_status()
|
22 |
+
data = response.json()
|
23 |
+
return data.get("esearchresult", {}).get("idlist", [])
|
24 |
|
25 |
+
|
26 |
+
def fetch_pubmed_details(pmids: list, api_key: str = None):
|
27 |
+
"""
|
28 |
+
Fetch detailed article data for given PMIDs.
|
29 |
+
"""
|
30 |
+
if not pmids:
|
31 |
return []
|
32 |
|
33 |
+
params = {
|
|
|
34 |
"db": "pubmed",
|
35 |
+
"id": ",".join(pmids),
|
36 |
"retmode": "xml"
|
37 |
}
|
38 |
+
if api_key:
|
39 |
+
params["api_key"] = api_key
|
40 |
+
|
41 |
+
response = requests.get(f"{NCBI_EUTILS_BASE}/efetch.fcgi", params=params)
|
42 |
+
response.raise_for_status()
|
43 |
+
return parse_pubmed_xml(response.text)
|
44 |
+
|
45 |
+
|
46 |
+
def parse_pubmed_xml(xml_text: str):
|
47 |
+
"""
|
48 |
+
Parse PubMed XML into structured dicts.
|
49 |
+
"""
|
50 |
+
root = ET.fromstring(xml_text)
|
51 |
+
articles = []
|
52 |
+
|
53 |
+
for article in root.findall(".//PubmedArticle"):
|
54 |
+
title_el = article.find(".//ArticleTitle")
|
55 |
+
abstract_el = article.find(".//AbstractText")
|
56 |
+
journal_el = article.find(".//Journal/Title")
|
57 |
+
year_el = article.find(".//PubDate/Year")
|
58 |
+
|
59 |
+
articles.append({
|
60 |
+
"title": title_el.text if title_el is not None else None,
|
61 |
+
"abstract": abstract_el.text if abstract_el is not None else None,
|
62 |
+
"journal": journal_el.text if journal_el is not None else None,
|
63 |
+
"year": year_el.text if year_el is not None else None,
|
64 |
+
})
|
65 |
+
|
66 |
+
return articles
|
67 |
+
|
68 |
+
|
69 |
+
def search_and_fetch(query: str, max_results: int = 5, api_key: str = None):
|
70 |
+
"""
|
71 |
+
Convenience function: search and fetch article details.
|
72 |
+
"""
|
73 |
+
pmids = search_pubmed(query, max_results=max_results, api_key=api_key)
|
74 |
+
return fetch_pubmed_details(pmids, api_key=api_key)
|