Spaces:
Sleeping
Sleeping
File size: 3,623 Bytes
9bee6a4 117cd77 9bee6a4 7227edd 9bee6a4 117cd77 7227edd 9bee6a4 7227edd ef76eaa 7227edd ef76eaa 7227edd 117cd77 ef76eaa 117cd77 ef76eaa 7227edd ef76eaa 7227edd 117cd77 ef76eaa 7227edd 117cd77 7227edd 117cd77 7227edd 117cd77 7227edd 117cd77 7227edd 117cd77 7227edd 117cd77 7227edd 117cd77 7227edd 117cd77 7227edd 117cd77 7227edd 117cd77 7227edd 117cd77 7227edd 117cd77 7227edd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
# genesis/api_clients/ncbi_api.py
import os
import requests
from xml.etree import ElementTree as ET
NCBI_API_KEY = os.getenv("NCBI_API_KEY")
NCBI_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
def _add_api_key(params: dict):
"""
Adds API key to request params if available.
"""
if NCBI_API_KEY:
params["api_key"] = NCBI_API_KEY
return params
def search_pubmed(query: str, max_results: int = 10):
"""
Search PubMed via NCBI Entrez for a query string.
Returns a list of PubMed IDs (PMIDs).
"""
params = _add_api_key({
"db": "pubmed",
"term": query,
"retmax": max_results,
"retmode": "json"
})
r = requests.get(f"{NCBI_BASE}/esearch.fcgi", params=params)
r.raise_for_status()
data = r.json()
return data.get("esearchresult", {}).get("idlist", [])
def fetch_pubmed_details(pmids: list):
"""
Fetch abstracts, authors, and metadata for given PMIDs.
"""
if not pmids:
return []
params = _add_api_key({
"db": "pubmed",
"id": ",".join(pmids),
"retmode": "xml"
})
r = requests.get(f"{NCBI_BASE}/efetch.fcgi", params=params)
r.raise_for_status()
root = ET.fromstring(r.text)
results = []
for article in root.findall(".//PubmedArticle"):
title = article.findtext(".//ArticleTitle", default="")
abstract = " ".join([abst.text or "" for abst in article.findall(".//AbstractText")])
authors = [
f"{a.findtext('ForeName', '')} {a.findtext('LastName', '')}".strip()
for a in article.findall(".//Author")
]
pmid = article.findtext(".//PMID", default="")
results.append({
"pmid": pmid,
"title": title,
"abstract": abstract,
"authors": authors
})
return results
def fetch_gene_info(gene_id: str):
"""
Fetch gene metadata from NCBI Gene database.
"""
params = _add_api_key({
"db": "gene",
"id": gene_id,
"retmode": "xml"
})
r = requests.get(f"{NCBI_BASE}/efetch.fcgi", params=params)
r.raise_for_status()
return r.text # XML - downstream parsing can be done in pipeline
def search_gene_by_symbol(symbol: str, organism: str = None):
"""
Search for a gene by symbol, optionally filtered by organism.
"""
term = symbol
if organism:
term += f" AND {organism}[Organism]"
params = _add_api_key({
"db": "gene",
"term": term,
"retmax": 5,
"retmode": "json"
})
r = requests.get(f"{NCBI_BASE}/esearch.fcgi", params=params)
r.raise_for_status()
data = r.json()
return data.get("esearchresult", {}).get("idlist", [])
def fetch_protein_info(protein_id: str):
"""
Fetch protein metadata from NCBI Protein database.
"""
params = _add_api_key({
"db": "protein",
"id": protein_id,
"retmode": "xml"
})
r = requests.get(f"{NCBI_BASE}/efetch.fcgi", params=params)
r.raise_for_status()
return r.text
def search_protein_by_name(name: str, organism: str = None):
"""
Search for proteins by name, optionally filtered by organism.
"""
term = name
if organism:
term += f" AND {organism}[Organism]"
params = _add_api_key({
"db": "protein",
"term": term,
"retmax": 5,
"retmode": "json"
})
r = requests.get(f"{NCBI_BASE}/esearch.fcgi", params=params)
r.raise_for_status()
data = r.json()
return data.get("esearchresult", {}).get("idlist", [])
|