Synthetic_Biology / genesis /providers.py
mgbam's picture
Update genesis/providers.py
de250ca verified
"""
Providers module for GENESIS-AI
Handles:
1. Medical/Biological APIs (PubMed, ChEMBL, BioPortal, UMLS, NCBI)
2. AI Text Generation (Gemini, OpenAI, Claude, DeepSeek)
3. Image Generation (Gemini Vision, OpenAI DALL·E, Hugging Face Diffusion)
4. Text-to-Speech (ElevenLabs + fallback)
5. Graph DB Integration (Neo4j for pathways/funding)
"""
import os
import requests
import logging
from dotenv import load_dotenv
from neo4j import GraphDatabase
# ========================
# SETUP & LOGGING
# ========================
load_dotenv()
logging.basicConfig(level=logging.INFO)
# API KEYS
PUBMED_API_KEY = os.getenv("PUBMED_API_KEY")
CHEMBL_API_KEY = os.getenv("CHEMBL_API_KEY")
BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY")
UMLS_API_KEY = os.getenv("UMLS_API_KEY")
NCBI_API_KEY = os.getenv("NCBI_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
CLAUDE_API_KEY = os.getenv("CLAUDE_API_KEY")
DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY")
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USER = os.getenv("NEO4J_USER")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
# ========================
# OPTIONAL NEO4J CONNECTION
# ========================
neo4j_driver = None
if NEO4J_URI and NEO4J_USER and NEO4J_PASSWORD:
try:
neo4j_driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
logging.info("[Neo4j] Connected successfully.")
except Exception as e:
logging.error(f"[Neo4j] Connection failed: {e}")
else:
logging.info("[Neo4j] No URI/user/password set — skipping connection.")
# ========================
# SAFE REQUEST WRAPPER
# ========================
def safe_request(url, headers=None, params=None, data=None, method="GET", json_data=None):
try:
if method == "GET":
r = requests.get(url, headers=headers, params=params, timeout=20)
elif method == "POST":
r = requests.post(url, headers=headers, data=data, json=json_data, timeout=30)
r.raise_for_status()
return r.json()
except Exception as e:
logging.error(f"Request failed: {e}")
return None
# ========================
# MEDICAL & BIOLOGY API CLIENTS
# ========================
def run_pubmed_literature(query, max_results=5):
url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
params = {
"db": "pubmed",
"term": query,
"retmode": "json",
"retmax": max_results,
"api_key": PUBMED_API_KEY
}
data = safe_request(url, params=params)
if not data:
return []
ids = data.get("esearchresult", {}).get("idlist", [])
results = []
for pmid in ids:
fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
fetch_params = {"db": "pubmed", "id": pmid, "retmode": "json"}
summary = safe_request(fetch_url, params=fetch_params)
if summary:
results.append(summary)
return results
def run_chembl_search(molecule_name):
return safe_request(
"https://www.ebi.ac.uk/chembl/api/data/molecule",
params={"molecule_synonyms__icontains": molecule_name, "format": "json"}
)
def run_bioportal_ontology(term):
return safe_request(
"https://data.bioontology.org/search",
params={"q": term, "apikey": BIOPORTAL_API_KEY}
)
def run_umls_search(term):
return safe_request(
"https://uts-ws.nlm.nih.gov/rest/search/current",
params={"string": term, "apiKey": UMLS_API_KEY}
)
def run_ncbi_gene_lookup(gene_id):
return safe_request(
f"https://api.ncbi.nlm.nih.gov/gene/{gene_id}",
headers={"api-key": NCBI_API_KEY}
)
# ========================
# AI TEXT GENERATION
# ========================
def ai_generate_text(prompt, model="gemini"):
"""Fallback order: Gemini → OpenAI → Claude → DeepSeek"""
if model == "gemini" and GEMINI_API_KEY:
resp = safe_request(
f"https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?key={GEMINI_API_KEY}",
method="POST",
json_data={"contents": [{"parts": [{"text": prompt}]}]}
)
if resp:
return resp.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "")
if model == "openai" and OPENAI_API_KEY:
try:
import openai
openai.api_key = OPENAI_API_KEY
completion = openai.ChatCompletion.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}]
)
return completion.choices[0].message["content"]
except Exception as e:
logging.error(f"OpenAI error: {e}")
if model == "claude" and CLAUDE_API_KEY:
resp = safe_request(
"https://api.anthropic.com/v1/messages",
headers={"x-api-key": CLAUDE_API_KEY, "Content-Type": "application/json"},
method="POST",
json_data={"model": "claude-3-opus-20240229", "messages": [{"role": "user", "content": prompt}], "max_tokens": 500}
)
if resp:
return resp.get("content", [{}])[0].get("text", "")
if model == "deepseek" and DEEPSEEK_API_KEY:
resp = safe_request(
"https://api.deepseek.com/v1/chat/completions",
headers={"Authorization": f"Bearer {DEEPSEEK_API_KEY}", "Content-Type": "application/json"},
method="POST",
json_data={"model": "deepseek-chat", "messages": [{"role": "user", "content": prompt}]}
)
if resp:
return resp.get("choices", [{}])[0].get("message", {}).get("content", "")
return "No AI provider available or all failed."
# ========================
# IMAGE GENERATION
# ========================
def ai_generate_image(prompt):
"""Fallback: Gemini Vision → OpenAI → Hugging Face"""
if GEMINI_API_KEY:
resp = safe_request(
f"https://generativelanguage.googleapis.com/v1beta/models/gemini-pro-vision:generateImage?key={GEMINI_API_KEY}",
method="POST",
json_data={"prompt": prompt}
)
if resp:
return resp
if OPENAI_API_KEY:
try:
import openai
openai.api_key = OPENAI_API_KEY
image = openai.Image.create(model="dall-e-3", prompt=prompt, size="1024x1024")
return image.data[0].url
except Exception as e:
logging.error(f"OpenAI Image error: {e}")
HF_TOKEN = os.getenv("HF_TOKEN")
if HF_TOKEN:
resp = safe_request(
"https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2",
headers={"Authorization": f"Bearer {HF_TOKEN}"},
method="POST",
json_data={"inputs": prompt}
)
if resp:
return resp
return None
# ========================
# TEXT-TO-SPEECH
# ========================
def run_tts(text, voice="Rachel"):
if ELEVENLABS_API_KEY:
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice}"
headers = {"xi-api-key": ELEVENLABS_API_KEY, "Content-Type": "application/json"}
resp = requests.post(url, headers=headers, json={"text": text, "voice_settings": {"stability": 0.5, "similarity_boost": 0.8}})
if resp.status_code == 200:
return resp.content
return None
# ========================
# GRAPH DB QUERIES (Optional)
# ========================
def query_funding_network(keyword):
if not neo4j_driver:
logging.warning("[Neo4j] Skipping query — no connection.")
return []
with neo4j_driver.session() as session:
result = session.run(
"""
MATCH (c:Company)-[r:RECEIVED_FUNDING_FROM]->(i:Investor)
WHERE toLower(c.name) CONTAINS toLower($keyword)
RETURN c.name as company, collect(i.name) as investors
""",
keyword=keyword
)
return [dict(record) for record in result]
def query_pathway_graph(pathway):
if not neo4j_driver:
logging.warning("[Neo4j] Skipping query — no connection.")
return []
with neo4j_driver.session() as session:
result = session.run(
"""
MATCH (p:Pathway {name: $pathway})-[r:INVOLVES]->(g:Gene)
RETURN p.name as pathway, collect(g.name) as genes
""",
pathway=pathway
)
return [dict(record) for record in result]
# ========================
# PIPELINE COMPATIBILITY WRAPPERS
# ========================
def run_deepseek_summary(prompt): return ai_generate_text(prompt, model="deepseek")
def run_gemini_summary(prompt): return ai_generate_text(prompt, model="gemini")
def run_openai_summary(prompt): return ai_generate_text(prompt, model="openai")
def run_gemini_image(prompt): return ai_generate_image(prompt)
def run_openai_image(prompt): return ai_generate_image(prompt)
def run_hf_image(prompt): return ai_generate_image(prompt)
def narrate_text_elevenlabs(text): return run_tts(text)
# ========================
# EXPORTS
# ========================
__all__ = [
"run_pubmed_literature",
"run_chembl_search",
"run_bioportal_ontology",
"run_umls_search",
"run_ncbi_gene_lookup",
"ai_generate_text",
"ai_generate_image",
"run_tts",
"query_funding_network",
"query_pathway_graph",
"run_deepseek_summary",
"run_gemini_summary",
"run_openai_summary",
"run_gemini_image",
"run_openai_image",
"run_hf_image",
"narrate_text_elevenlabs"
]