Spaces:
Running
Running
File size: 9,644 Bytes
f9cc55d de250ca f9cc55d e995e3b 487bfb7 f9cc55d 72d9500 f9cc55d 72d9500 f9cc55d 72d9500 f9cc55d e995e3b 443e9f1 f9cc55d 97ec060 f9cc55d 443e9f1 f9cc55d 443e9f1 72d9500 f9cc55d 72d9500 f9cc55d 443e9f1 f9cc55d 443e9f1 f9cc55d 443e9f1 f9cc55d 72d9500 f9cc55d 72d9500 f9cc55d 443e9f1 f9cc55d 72d9500 97ec060 f9cc55d 72d9500 443e9f1 f9cc55d 72d9500 f9cc55d 72d9500 f9cc55d 72d9500 f9cc55d de250ca f9cc55d de250ca f9cc55d de250ca f9cc55d 72d9500 f9cc55d 72d9500 f9cc55d 72d9500 f9cc55d 72d9500 f9cc55d 72d9500 f9cc55d de250ca f9cc55d 72d9500 f9cc55d de250ca f9cc55d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 |
"""
Providers module for GENESIS-AI
Handles:
1. Medical/Biological APIs (PubMed, ChEMBL, BioPortal, UMLS, NCBI)
2. AI Text Generation (Gemini, OpenAI, Claude, DeepSeek)
3. Image Generation (Gemini Vision, OpenAI DALLΒ·E, Hugging Face Diffusion)
4. Text-to-Speech (ElevenLabs + fallback)
5. Graph DB Integration (Neo4j for pathways/funding)
"""
import os
import requests
import logging
from dotenv import load_dotenv
from neo4j import GraphDatabase
# ========================
# SETUP & LOGGING
# ========================
load_dotenv()
logging.basicConfig(level=logging.INFO)
# API KEYS
PUBMED_API_KEY = os.getenv("PUBMED_API_KEY")
CHEMBL_API_KEY = os.getenv("CHEMBL_API_KEY")
BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY")
UMLS_API_KEY = os.getenv("UMLS_API_KEY")
NCBI_API_KEY = os.getenv("NCBI_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
CLAUDE_API_KEY = os.getenv("CLAUDE_API_KEY")
DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY")
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USER = os.getenv("NEO4J_USER")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
# ========================
# OPTIONAL NEO4J CONNECTION
# ========================
neo4j_driver = None
if NEO4J_URI and NEO4J_USER and NEO4J_PASSWORD:
try:
neo4j_driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
logging.info("[Neo4j] Connected successfully.")
except Exception as e:
logging.error(f"[Neo4j] Connection failed: {e}")
else:
logging.info("[Neo4j] No URI/user/password set β skipping connection.")
# ========================
# SAFE REQUEST WRAPPER
# ========================
def safe_request(url, headers=None, params=None, data=None, method="GET", json_data=None):
try:
if method == "GET":
r = requests.get(url, headers=headers, params=params, timeout=20)
elif method == "POST":
r = requests.post(url, headers=headers, data=data, json=json_data, timeout=30)
r.raise_for_status()
return r.json()
except Exception as e:
logging.error(f"Request failed: {e}")
return None
# ========================
# MEDICAL & BIOLOGY API CLIENTS
# ========================
def run_pubmed_literature(query, max_results=5):
url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
params = {
"db": "pubmed",
"term": query,
"retmode": "json",
"retmax": max_results,
"api_key": PUBMED_API_KEY
}
data = safe_request(url, params=params)
if not data:
return []
ids = data.get("esearchresult", {}).get("idlist", [])
results = []
for pmid in ids:
fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
fetch_params = {"db": "pubmed", "id": pmid, "retmode": "json"}
summary = safe_request(fetch_url, params=fetch_params)
if summary:
results.append(summary)
return results
def run_chembl_search(molecule_name):
return safe_request(
"https://www.ebi.ac.uk/chembl/api/data/molecule",
params={"molecule_synonyms__icontains": molecule_name, "format": "json"}
)
def run_bioportal_ontology(term):
return safe_request(
"https://data.bioontology.org/search",
params={"q": term, "apikey": BIOPORTAL_API_KEY}
)
def run_umls_search(term):
return safe_request(
"https://uts-ws.nlm.nih.gov/rest/search/current",
params={"string": term, "apiKey": UMLS_API_KEY}
)
def run_ncbi_gene_lookup(gene_id):
return safe_request(
f"https://api.ncbi.nlm.nih.gov/gene/{gene_id}",
headers={"api-key": NCBI_API_KEY}
)
# ========================
# AI TEXT GENERATION
# ========================
def ai_generate_text(prompt, model="gemini"):
"""Fallback order: Gemini β OpenAI β Claude β DeepSeek"""
if model == "gemini" and GEMINI_API_KEY:
resp = safe_request(
f"https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?key={GEMINI_API_KEY}",
method="POST",
json_data={"contents": [{"parts": [{"text": prompt}]}]}
)
if resp:
return resp.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "")
if model == "openai" and OPENAI_API_KEY:
try:
import openai
openai.api_key = OPENAI_API_KEY
completion = openai.ChatCompletion.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}]
)
return completion.choices[0].message["content"]
except Exception as e:
logging.error(f"OpenAI error: {e}")
if model == "claude" and CLAUDE_API_KEY:
resp = safe_request(
"https://api.anthropic.com/v1/messages",
headers={"x-api-key": CLAUDE_API_KEY, "Content-Type": "application/json"},
method="POST",
json_data={"model": "claude-3-opus-20240229", "messages": [{"role": "user", "content": prompt}], "max_tokens": 500}
)
if resp:
return resp.get("content", [{}])[0].get("text", "")
if model == "deepseek" and DEEPSEEK_API_KEY:
resp = safe_request(
"https://api.deepseek.com/v1/chat/completions",
headers={"Authorization": f"Bearer {DEEPSEEK_API_KEY}", "Content-Type": "application/json"},
method="POST",
json_data={"model": "deepseek-chat", "messages": [{"role": "user", "content": prompt}]}
)
if resp:
return resp.get("choices", [{}])[0].get("message", {}).get("content", "")
return "No AI provider available or all failed."
# ========================
# IMAGE GENERATION
# ========================
def ai_generate_image(prompt):
"""Fallback: Gemini Vision β OpenAI β Hugging Face"""
if GEMINI_API_KEY:
resp = safe_request(
f"https://generativelanguage.googleapis.com/v1beta/models/gemini-pro-vision:generateImage?key={GEMINI_API_KEY}",
method="POST",
json_data={"prompt": prompt}
)
if resp:
return resp
if OPENAI_API_KEY:
try:
import openai
openai.api_key = OPENAI_API_KEY
image = openai.Image.create(model="dall-e-3", prompt=prompt, size="1024x1024")
return image.data[0].url
except Exception as e:
logging.error(f"OpenAI Image error: {e}")
HF_TOKEN = os.getenv("HF_TOKEN")
if HF_TOKEN:
resp = safe_request(
"https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2",
headers={"Authorization": f"Bearer {HF_TOKEN}"},
method="POST",
json_data={"inputs": prompt}
)
if resp:
return resp
return None
# ========================
# TEXT-TO-SPEECH
# ========================
def run_tts(text, voice="Rachel"):
if ELEVENLABS_API_KEY:
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice}"
headers = {"xi-api-key": ELEVENLABS_API_KEY, "Content-Type": "application/json"}
resp = requests.post(url, headers=headers, json={"text": text, "voice_settings": {"stability": 0.5, "similarity_boost": 0.8}})
if resp.status_code == 200:
return resp.content
return None
# ========================
# GRAPH DB QUERIES (Optional)
# ========================
def query_funding_network(keyword):
if not neo4j_driver:
logging.warning("[Neo4j] Skipping query β no connection.")
return []
with neo4j_driver.session() as session:
result = session.run(
"""
MATCH (c:Company)-[r:RECEIVED_FUNDING_FROM]->(i:Investor)
WHERE toLower(c.name) CONTAINS toLower($keyword)
RETURN c.name as company, collect(i.name) as investors
""",
keyword=keyword
)
return [dict(record) for record in result]
def query_pathway_graph(pathway):
if not neo4j_driver:
logging.warning("[Neo4j] Skipping query β no connection.")
return []
with neo4j_driver.session() as session:
result = session.run(
"""
MATCH (p:Pathway {name: $pathway})-[r:INVOLVES]->(g:Gene)
RETURN p.name as pathway, collect(g.name) as genes
""",
pathway=pathway
)
return [dict(record) for record in result]
# ========================
# PIPELINE COMPATIBILITY WRAPPERS
# ========================
def run_deepseek_summary(prompt): return ai_generate_text(prompt, model="deepseek")
def run_gemini_summary(prompt): return ai_generate_text(prompt, model="gemini")
def run_openai_summary(prompt): return ai_generate_text(prompt, model="openai")
def run_gemini_image(prompt): return ai_generate_image(prompt)
def run_openai_image(prompt): return ai_generate_image(prompt)
def run_hf_image(prompt): return ai_generate_image(prompt)
def narrate_text_elevenlabs(text): return run_tts(text)
# ========================
# EXPORTS
# ========================
__all__ = [
"run_pubmed_literature",
"run_chembl_search",
"run_bioportal_ontology",
"run_umls_search",
"run_ncbi_gene_lookup",
"ai_generate_text",
"ai_generate_image",
"run_tts",
"query_funding_network",
"query_pathway_graph",
"run_deepseek_summary",
"run_gemini_summary",
"run_openai_summary",
"run_gemini_image",
"run_openai_image",
"run_hf_image",
"narrate_text_elevenlabs"
]
|