Spaces:

mgbam
/

Synthetic_Biology

Sleeping

File size: 5,274 Bytes

9bee6a4
117cd77
9bee6a4
eea1b53
800d67f
9bee6a4
eea1b53
800d67f
 
 
9bee6a4
9b0c279
800d67f
9b0c279
800d67f
9b0c279
800d67f
9b0c279
eea1b53
800d67f
 
 
 
eea1b53
800d67f
 
eea1b53
800d67f
eea1b53
 
800d67f
eea1b53
 
800d67f
eea1b53
800d67f
9b0c279
800d67f
9b0c279
eea1b53
800d67f
 
 
 
eea1b53
800d67f
 
 
 
eea1b53
117cd77
800d67f
eea1b53
800d67f
 
 
 
117cd77
800d67f
117cd77
800d67f
 
9b0c279
eea1b53
 
800d67f
 
 
eea1b53
800d67f
 
 
 
eea1b53
 
800d67f
 
 
 
 
 
 
 
 
 
 
eea1b53
800d67f
117cd77
ee653fa
800d67f
ee653fa
800d67f
117cd77
800d67f
117cd77
800d67f
 
 
 
eea1b53
800d67f
 
 
eea1b53
800d67f
 
 
 
eea1b53
 
800d67f
 
 
 
 
 
 
 
 
 
 
 
117cd77
800d67f
 
 
 
117cd77
800d67f
117cd77
800d67f
 
eea1b53
 
800d67f
 
 
eea1b53
39e5a96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eea1b53
800d67f
eea1b53
800d67f
7227edd
39e5a96
7227edd
eea1b53
800d67f
 
 
 
39e5a96
 
eea1b53

# genesis/api_clients/ncbi_api.py
import os
import requests
from typing import List, Dict
from datetime import datetime

NCBI_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
NCBI_API_KEY = os.getenv("NCBI_API_KEY")  # Optional — speeds up requests

session = requests.Session()

# -------------------------
# Generic NCBI Search
# -------------------------
def ncbi_search(db: str, term: str, retmax: int = 10) -> List[str]:
    """
    Search an NCBI database and return a list of IDs.
    """
    params = {
        "db": db,
        "term": term,
        "retmode": "json",
        "retmax": retmax
    }
    if NCBI_API_KEY:
        params["api_key"] = NCBI_API_KEY

    r = session.get(f"{NCBI_BASE}/esearch.fcgi", params=params)
    r.raise_for_status()

    return r.json().get("esearchresult", {}).get("idlist", [])

# -------------------------
# Generic NCBI Fetch
# -------------------------
def ncbi_fetch(db: str, ids: List[str], rettype: str = "abstract", retmode: str = "text") -> str:
    """
    Fetch detailed records from an NCBI database.
    """
    params = {
        "db": db,
        "id": ",".join(ids),
        "rettype": rettype,
        "retmode": retmode
    }
    if NCBI_API_KEY:
        params["api_key"] = NCBI_API_KEY

    r = session.get(f"{NCBI_BASE}/efetch.fcgi", params=params)
    r.raise_for_status()

    return r.text

# -------------------------
# PubMed Literature Search
# -------------------------
def search_pubmed(term: str, retmax: int = 5) -> List[Dict]:
    """
    Search PubMed for biomedical literature.
    """
    ids = ncbi_search("pubmed", term, retmax)
    if not ids:
        return []

    params = {
        "db": "pubmed",
        "id": ",".join(ids),
        "retmode": "json"
    }
    if NCBI_API_KEY:
        params["api_key"] = NCBI_API_KEY

    r = session.get(f"{NCBI_BASE}/esummary.fcgi", params=params)
    r.raise_for_status()

    records = r.json().get("result", {})
    papers = []
    for pid in ids:
        rec = records.get(pid, {})
        papers.append({
            "title": rec.get("title"),
            "authors": [a["name"] for a in rec.get("authors", [])],
            "pubdate": rec.get("pubdate"),
            "journal": rec.get("fulljournalname"),
            "uid": pid,
            "link": f"https://pubmed.ncbi.nlm.nih.gov/{pid}/"
        })
    return papers

# -------------------------
# Gene Search
# -------------------------
def search_genes(term: str, retmax: int = 5) -> List[Dict]:
    """
    Search NCBI Gene database for gene information.
    """
    ids = ncbi_search("gene", term, retmax)
    if not ids:
        return []

    params = {
        "db": "gene",
        "id": ",".join(ids),
        "retmode": "json"
    }
    if NCBI_API_KEY:
        params["api_key"] = NCBI_API_KEY

    r = session.get(f"{NCBI_BASE}/esummary.fcgi", params=params)
    r.raise_for_status()

    records = r.json().get("result", {})
    genes = []
    for gid in ids:
        rec = records.get(gid, {})
        genes.append({
            "symbol": rec.get("name"),
            "description": rec.get("description"),
            "organism": rec.get("organism", {}).get("scientificname"),
            "uid": gid,
            "link": f"https://www.ncbi.nlm.nih.gov/gene/{gid}"
        })
    return genes

# -------------------------
# Protein Search
# -------------------------
def search_proteins(term: str, retmax: int = 5) -> List[Dict]:
    """
    Search NCBI Protein database for protein sequences.
    """
    ids = ncbi_search("protein", term, retmax)
    if not ids:
        return []

    fasta_data = ncbi_fetch("protein", ids, rettype="fasta", retmode="text")
    proteins = [{"id": pid, "fasta": fasta_data} for pid in ids]
    return proteins

# -------------------------
# Structure Search
# -------------------------
def fetch_ncbi_structure(term: str, retmax: int = 5) -> List[Dict]:
    """
    Search NCBI Structure database and return structure metadata.
    """
    ids = ncbi_search("structure", term, retmax)
    if not ids:
        return []

    params = {
        "db": "structure",
        "id": ",".join(ids),
        "retmode": "json"
    }
    if NCBI_API_KEY:
        params["api_key"] = NCBI_API_KEY

    r = session.get(f"{NCBI_BASE}/esummary.fcgi", params=params)
    r.raise_for_status()

    records = r.json().get("result", {})
    structures = []
    for sid in ids:
        rec = records.get(sid, {})
        structures.append({
            "structure_id": sid,
            "title": rec.get("title"),
            "organism": rec.get("organism"),
            "release_date": rec.get("releasedate"),
            "link": f"https://www.ncbi.nlm.nih.gov/structure/{sid}"
        })
    return structures

# -------------------------
# Build Cross-Database Profile
# -------------------------
def ncbi_cross_profile(term: str) -> Dict:
    """
    Given a term, pull literature, genes, proteins, and structures for unified output.
    """
    return {
        "term": term,
        "timestamp": datetime.utcnow().isoformat(),
        "literature": search_pubmed(term, retmax=5),
        "genes": search_genes(term, retmax=5),
        "proteins": search_proteins(term, retmax=2),
        "structures": fetch_ncbi_structure(term, retmax=3)
    }