mgbam commited on
Commit
117cd77
·
verified ·
1 Parent(s): a9daa10

Update genesis/api_clients/ncbi_api.py

Browse files
Files changed (1) hide show
  1. genesis/api_clients/ncbi_api.py +78 -44
genesis/api_clients/ncbi_api.py CHANGED
@@ -1,64 +1,98 @@
1
  # genesis/api_clients/ncbi_api.py
 
2
  import requests
3
- import xml.etree.ElementTree as ET
4
 
5
- NCBI_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
 
6
 
7
- def ncbi_search(db: str, term: str, retmax: int = 10):
 
 
 
 
8
  """
9
- Search an NCBI database (pubmed, gene, protein, taxonomy, etc.)
10
- and return a list of IDs.
11
  """
12
- url = f"{NCBI_BASE}/esearch.fcgi"
13
- params = {
14
- "db": db,
15
- "term": term,
16
- "retmode": "json",
17
- "retmax": retmax
18
- }
19
- res = requests.get(url, params=params)
20
  res.raise_for_status()
21
- data = res.json()
22
- return data.get("esearchresult", {}).get("idlist", [])
23
 
24
- def ncbi_fetch(db: str, ids: list, rettype: str = "abstract", retmode: str = "text"):
25
  """
26
- Fetch records from an NCBI database by IDs.
27
- rettype can be 'abstract', 'fasta', 'gb', etc.
28
  """
29
- if not ids:
30
- return None
 
 
 
 
31
 
32
- url = f"{NCBI_BASE}/efetch.fcgi"
 
 
 
 
 
33
  params = {
34
- "db": db,
35
- "id": ",".join(ids),
36
- "rettype": rettype,
37
- "retmode": retmode
38
  }
39
- res = requests.get(url, params=params)
40
  res.raise_for_status()
41
- return res.text
 
42
 
43
- def get_gene_summary(gene_name: str, organism: str = None):
44
  """
45
- Get a gene summary from NCBI Gene.
46
  """
47
- query = gene_name
48
- if organism:
49
- query += f" AND {organism}[Organism]"
 
50
 
51
- ids = ncbi_search("gene", query, retmax=1)
52
- if not ids:
53
- return None
54
 
55
- url = f"{NCBI_BASE}/esummary.fcgi"
56
- params = {
57
- "db": "gene",
58
- "id": ids[0],
59
- "retmode": "json"
60
- }
61
- res = requests.get(url, params=params)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  res.raise_for_status()
63
- data = res.json()
64
- return data.get("result", {}).get(ids[0], {})
 
1
  # genesis/api_clients/ncbi_api.py
2
+ import os
3
  import requests
4
+ from urllib.parse import urlencode
5
 
6
+ NCBI_API_KEY = os.getenv("NCBI_API_KEY")
7
+ NCBI_BASE_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
8
 
9
+ if not NCBI_API_KEY:
10
+ raise ValueError("Missing NCBI_API_KEY in environment variables")
11
+
12
+
13
+ def _ncbi_request(endpoint: str, params: dict):
14
  """
15
+ Generic helper for calling NCBI E-Utilities API.
 
16
  """
17
+ params["api_key"] = NCBI_API_KEY
18
+ url = f"{NCBI_BASE_URL}{endpoint}?{urlencode(params)}"
19
+ res = requests.get(url)
 
 
 
 
 
20
  res.raise_for_status()
21
+ return res.text
22
+
23
 
24
+ def search_pubmed(query: str, max_results: int = 10):
25
  """
26
+ Search PubMed articles via NCBI.
 
27
  """
28
+ params = {"db": "pubmed", "term": query, "retmax": max_results, "retmode": "json"}
29
+ res = requests.get(f"{NCBI_BASE_URL}esearch.fcgi", params=params)
30
+ res.raise_for_status()
31
+ ids = res.json().get("esearchresult", {}).get("idlist", [])
32
+ return ids
33
+
34
 
35
+ def fetch_pubmed_details(pubmed_ids: list):
36
+ """
37
+ Fetch PubMed details given a list of IDs.
38
+ """
39
+ if not pubmed_ids:
40
+ return []
41
  params = {
42
+ "db": "pubmed",
43
+ "id": ",".join(pubmed_ids),
44
+ "retmode": "xml"
 
45
  }
46
+ res = requests.get(f"{NCBI_BASE_URL}efetch.fcgi", params=params)
47
  res.raise_for_status()
48
+ return res.text # XML response
49
+
50
 
51
+ def search_gene(query: str):
52
  """
53
+ Search for a gene in NCBI Gene database.
54
  """
55
+ params = {"db": "gene", "term": query, "retmode": "json", "retmax": 5}
56
+ res = requests.get(f"{NCBI_BASE_URL}esearch.fcgi", params=params)
57
+ res.raise_for_status()
58
+ return res.json()
59
 
 
 
 
60
 
61
+ def fetch_gene_summary(gene_id: str):
62
+ """
63
+ Get detailed gene summary from NCBI.
64
+ """
65
+ params = {"db": "gene", "id": gene_id, "retmode": "json"}
66
+ res = requests.get(f"{NCBI_BASE_URL}esummary.fcgi", params=params)
67
+ res.raise_for_status()
68
+ return res.json()
69
+
70
+
71
+ def search_protein(query: str):
72
+ """
73
+ Search proteins in NCBI Protein database.
74
+ """
75
+ params = {"db": "protein", "term": query, "retmode": "json", "retmax": 5}
76
+ res = requests.get(f"{NCBI_BASE_URL}esearch.fcgi", params=params)
77
+ res.raise_for_status()
78
+ return res.json()
79
+
80
+
81
+ def search_pubchem_compound(query: str):
82
+ """
83
+ Search compounds in PubChem via NCBI.
84
+ """
85
+ params = {"db": "pccompound", "term": query, "retmode": "json", "retmax": 5}
86
+ res = requests.get(f"{NCBI_BASE_URL}esearch.fcgi", params=params)
87
+ res.raise_for_status()
88
+ return res.json()
89
+
90
+
91
+ def search_clinical_trials(query: str):
92
+ """
93
+ Search clinical trials in ClinicalTrials.gov via NCBI.
94
+ """
95
+ params = {"db": "clinicaltrials", "term": query, "retmode": "json", "retmax": 5}
96
+ res = requests.get(f"{NCBI_BASE_URL}esearch.fcgi", params=params)
97
  res.raise_for_status()
98
+ return res.json()