mgbam commited on
Commit
ef76eaa
·
verified ·
1 Parent(s): 1087a21

Update genesis/api_clients/ncbi_api.py

Browse files
Files changed (1) hide show
  1. genesis/api_clients/ncbi_api.py +53 -22
genesis/api_clients/ncbi_api.py CHANGED
@@ -1,33 +1,64 @@
1
  # genesis/api_clients/ncbi_api.py
2
  import requests
3
- import os
4
 
5
- NCBI_API_KEY = os.getenv("NCBI_API_KEY")
6
- BASE_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
7
 
8
- def search_gene(query: str, max_results: int = 10):
9
- """Search NCBI Gene database."""
10
- url = f"{BASE_URL}/esearch.fcgi"
 
 
 
11
  params = {
12
- "db": "gene",
13
- "term": query,
14
- "retmax": max_results,
15
  "retmode": "json",
16
- "api_key": NCBI_API_KEY
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  }
18
- r = requests.get(url, params=params)
19
- r.raise_for_status()
20
- return r.json()
 
 
 
 
 
 
 
 
21
 
22
- def fetch_gene_summary(gene_id: str):
23
- """Fetch summary for a specific gene."""
24
- url = f"{BASE_URL}/esummary.fcgi"
 
 
25
  params = {
26
  "db": "gene",
27
- "id": gene_id,
28
- "retmode": "json",
29
- "api_key": NCBI_API_KEY
30
  }
31
- r = requests.get(url, params=params)
32
- r.raise_for_status()
33
- return r.json()
 
 
1
  # genesis/api_clients/ncbi_api.py
2
  import requests
3
+ import xml.etree.ElementTree as ET
4
 
5
+ NCBI_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
 
6
 
7
+ def ncbi_search(db: str, term: str, retmax: int = 10):
8
+ """
9
+ Search an NCBI database (pubmed, gene, protein, taxonomy, etc.)
10
+ and return a list of IDs.
11
+ """
12
+ url = f"{NCBI_BASE}/esearch.fcgi"
13
  params = {
14
+ "db": db,
15
+ "term": term,
 
16
  "retmode": "json",
17
+ "retmax": retmax
18
+ }
19
+ res = requests.get(url, params=params)
20
+ res.raise_for_status()
21
+ data = res.json()
22
+ return data.get("esearchresult", {}).get("idlist", [])
23
+
24
+ def ncbi_fetch(db: str, ids: list, rettype: str = "abstract", retmode: str = "text"):
25
+ """
26
+ Fetch records from an NCBI database by IDs.
27
+ rettype can be 'abstract', 'fasta', 'gb', etc.
28
+ """
29
+ if not ids:
30
+ return None
31
+
32
+ url = f"{NCBI_BASE}/efetch.fcgi"
33
+ params = {
34
+ "db": db,
35
+ "id": ",".join(ids),
36
+ "rettype": rettype,
37
+ "retmode": retmode
38
  }
39
+ res = requests.get(url, params=params)
40
+ res.raise_for_status()
41
+ return res.text
42
+
43
+ def get_gene_summary(gene_name: str, organism: str = None):
44
+ """
45
+ Get a gene summary from NCBI Gene.
46
+ """
47
+ query = gene_name
48
+ if organism:
49
+ query += f" AND {organism}[Organism]"
50
 
51
+ ids = ncbi_search("gene", query, retmax=1)
52
+ if not ids:
53
+ return None
54
+
55
+ url = f"{NCBI_BASE}/esummary.fcgi"
56
  params = {
57
  "db": "gene",
58
+ "id": ids[0],
59
+ "retmode": "json"
 
60
  }
61
+ res = requests.get(url, params=params)
62
+ res.raise_for_status()
63
+ data = res.json()
64
+ return data.get("result", {}).get(ids[0], {})