Spaces:
Sleeping
Sleeping
Update genesis/api_clients/ncbi_api.py
Browse files- genesis/api_clients/ncbi_api.py +78 -44
genesis/api_clients/ncbi_api.py
CHANGED
@@ -1,64 +1,98 @@
|
|
1 |
# genesis/api_clients/ncbi_api.py
|
|
|
2 |
import requests
|
3 |
-
|
4 |
|
5 |
-
|
|
|
6 |
|
7 |
-
|
|
|
|
|
|
|
|
|
8 |
"""
|
9 |
-
|
10 |
-
and return a list of IDs.
|
11 |
"""
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
"term": term,
|
16 |
-
"retmode": "json",
|
17 |
-
"retmax": retmax
|
18 |
-
}
|
19 |
-
res = requests.get(url, params=params)
|
20 |
res.raise_for_status()
|
21 |
-
|
22 |
-
|
23 |
|
24 |
-
def
|
25 |
"""
|
26 |
-
|
27 |
-
rettype can be 'abstract', 'fasta', 'gb', etc.
|
28 |
"""
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
31 |
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
33 |
params = {
|
34 |
-
"db":
|
35 |
-
"id": ",".join(
|
36 |
-
"
|
37 |
-
"retmode": retmode
|
38 |
}
|
39 |
-
res = requests.get(
|
40 |
res.raise_for_status()
|
41 |
-
return res.text
|
|
|
42 |
|
43 |
-
def
|
44 |
"""
|
45 |
-
|
46 |
"""
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
50 |
|
51 |
-
ids = ncbi_search("gene", query, retmax=1)
|
52 |
-
if not ids:
|
53 |
-
return None
|
54 |
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
}
|
61 |
-
res
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
res.raise_for_status()
|
63 |
-
|
64 |
-
return data.get("result", {}).get(ids[0], {})
|
|
|
1 |
# genesis/api_clients/ncbi_api.py
|
2 |
+
import os
|
3 |
import requests
|
4 |
+
from urllib.parse import urlencode
|
5 |
|
6 |
+
NCBI_API_KEY = os.getenv("NCBI_API_KEY")
|
7 |
+
NCBI_BASE_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
|
8 |
|
9 |
+
if not NCBI_API_KEY:
|
10 |
+
raise ValueError("Missing NCBI_API_KEY in environment variables")
|
11 |
+
|
12 |
+
|
13 |
+
def _ncbi_request(endpoint: str, params: dict):
|
14 |
"""
|
15 |
+
Generic helper for calling NCBI E-Utilities API.
|
|
|
16 |
"""
|
17 |
+
params["api_key"] = NCBI_API_KEY
|
18 |
+
url = f"{NCBI_BASE_URL}{endpoint}?{urlencode(params)}"
|
19 |
+
res = requests.get(url)
|
|
|
|
|
|
|
|
|
|
|
20 |
res.raise_for_status()
|
21 |
+
return res.text
|
22 |
+
|
23 |
|
24 |
+
def search_pubmed(query: str, max_results: int = 10):
|
25 |
"""
|
26 |
+
Search PubMed articles via NCBI.
|
|
|
27 |
"""
|
28 |
+
params = {"db": "pubmed", "term": query, "retmax": max_results, "retmode": "json"}
|
29 |
+
res = requests.get(f"{NCBI_BASE_URL}esearch.fcgi", params=params)
|
30 |
+
res.raise_for_status()
|
31 |
+
ids = res.json().get("esearchresult", {}).get("idlist", [])
|
32 |
+
return ids
|
33 |
+
|
34 |
|
35 |
+
def fetch_pubmed_details(pubmed_ids: list):
|
36 |
+
"""
|
37 |
+
Fetch PubMed details given a list of IDs.
|
38 |
+
"""
|
39 |
+
if not pubmed_ids:
|
40 |
+
return []
|
41 |
params = {
|
42 |
+
"db": "pubmed",
|
43 |
+
"id": ",".join(pubmed_ids),
|
44 |
+
"retmode": "xml"
|
|
|
45 |
}
|
46 |
+
res = requests.get(f"{NCBI_BASE_URL}efetch.fcgi", params=params)
|
47 |
res.raise_for_status()
|
48 |
+
return res.text # XML response
|
49 |
+
|
50 |
|
51 |
+
def search_gene(query: str):
|
52 |
"""
|
53 |
+
Search for a gene in NCBI Gene database.
|
54 |
"""
|
55 |
+
params = {"db": "gene", "term": query, "retmode": "json", "retmax": 5}
|
56 |
+
res = requests.get(f"{NCBI_BASE_URL}esearch.fcgi", params=params)
|
57 |
+
res.raise_for_status()
|
58 |
+
return res.json()
|
59 |
|
|
|
|
|
|
|
60 |
|
61 |
+
def fetch_gene_summary(gene_id: str):
|
62 |
+
"""
|
63 |
+
Get detailed gene summary from NCBI.
|
64 |
+
"""
|
65 |
+
params = {"db": "gene", "id": gene_id, "retmode": "json"}
|
66 |
+
res = requests.get(f"{NCBI_BASE_URL}esummary.fcgi", params=params)
|
67 |
+
res.raise_for_status()
|
68 |
+
return res.json()
|
69 |
+
|
70 |
+
|
71 |
+
def search_protein(query: str):
|
72 |
+
"""
|
73 |
+
Search proteins in NCBI Protein database.
|
74 |
+
"""
|
75 |
+
params = {"db": "protein", "term": query, "retmode": "json", "retmax": 5}
|
76 |
+
res = requests.get(f"{NCBI_BASE_URL}esearch.fcgi", params=params)
|
77 |
+
res.raise_for_status()
|
78 |
+
return res.json()
|
79 |
+
|
80 |
+
|
81 |
+
def search_pubchem_compound(query: str):
|
82 |
+
"""
|
83 |
+
Search compounds in PubChem via NCBI.
|
84 |
+
"""
|
85 |
+
params = {"db": "pccompound", "term": query, "retmode": "json", "retmax": 5}
|
86 |
+
res = requests.get(f"{NCBI_BASE_URL}esearch.fcgi", params=params)
|
87 |
+
res.raise_for_status()
|
88 |
+
return res.json()
|
89 |
+
|
90 |
+
|
91 |
+
def search_clinical_trials(query: str):
|
92 |
+
"""
|
93 |
+
Search clinical trials in ClinicalTrials.gov via NCBI.
|
94 |
+
"""
|
95 |
+
params = {"db": "clinicaltrials", "term": query, "retmode": "json", "retmax": 5}
|
96 |
+
res = requests.get(f"{NCBI_BASE_URL}esearch.fcgi", params=params)
|
97 |
res.raise_for_status()
|
98 |
+
return res.json()
|
|