Spaces:
Running
Running
""" | |
GENESIS-AI β Multimodal Synthetic Biology Research Pipeline | |
Coordinates ontology expansion, literature review, AI summarization, image generation, | |
funding intelligence, safety/biosecurity checks, and report export. | |
""" | |
import os | |
import re | |
from datetime import datetime | |
from typing import Dict, Any, List | |
# API clients | |
from genesis.api_clients.pubmed_api import search_pubmed_literature | |
from genesis.api_clients.bioportal_api import expand_with_bioportal | |
from genesis.api_clients.umls_api import expand_with_umls | |
from genesis.api_clients.chembl_api import get_molecule_data | |
from genesis.structures import fetch_structures_for_terms | |
# Core logic providers | |
from genesis.providers import ( | |
run_deepseek_summary, | |
run_gemini_summary, | |
run_openai_summary, | |
run_gemini_image, | |
run_openai_image, | |
run_hf_image, | |
narrate_text_elevenlabs | |
) | |
# Utility modules | |
from genesis.utils.pdf_export import export_report_to_pdf | |
from genesis.utils.graph_tools import write_topic_and_papers | |
# Visualizations | |
from genesis.visualization import generate_pathway_graph, generate_funding_network | |
# Data sources | |
from genesis.funding import fetch_funding_data | |
from genesis.trials import fetch_clinical_trials | |
from genesis.biosecurity import analyze_biosecurity_risks | |
from genesis.regulation import fetch_regulatory_info | |
from genesis.safety import analyze_safety_concerns | |
from genesis.ontology import merge_ontology_terms | |
# Environment vars | |
ELEVEN_LABS_API_KEY = os.getenv("ELEVEN_LABS_API_KEY") | |
NEO4J_URI = os.getenv("NEO4J_URI") | |
SYNBIO_MODE = True # Bias towards synthetic biology context | |
# Demo queries | |
DEMO_QUERIES = [ | |
"CRISPR living therapeutics in clinical trials since 2020", | |
"AI-designed enzymes for plastic degradation β literature + pathways", | |
"Synthetic biology startups in oncology β funding map", | |
"Metabolic pathway for artemisinin biosynthesis in yeast", | |
"Oncolytic virus engineering β biosecurity risk analysis" | |
] | |
# ---------- Helper Functions ---------- | |
def extract_citations(text: str) -> List[Dict[str, str]]: | |
"""Extract DOI, PMID, and URLs from text.""" | |
citations = [] | |
doi_pattern = r"(10\.\d{4,9}/[-._;()/:A-Z0-9]+)" | |
pmid_pattern = r"PMID:\s*(\d+)" | |
url_pattern = r"(https?://[^\s)]+)" | |
for match in re.finditer(doi_pattern, text, re.IGNORECASE): | |
citations.append({"type": "DOI", "id": match.group(1), "url": f"https://doi.org/{match.group(1)}"}) | |
for match in re.finditer(pmid_pattern, text, re.IGNORECASE): | |
citations.append({"type": "PMID", "id": match.group(1), "url": f"https://pubmed.ncbi.nlm.nih.gov/{match.group(1)}/"}) | |
for match in re.finditer(url_pattern, text, re.IGNORECASE): | |
if not any(c["url"] == match.group(1) for c in citations): | |
citations.append({"type": "URL", "id": "", "url": match.group(1)}) | |
return citations | |
def inject_synbio_context(query: str, expanded_terms: List[str]) -> str: | |
"""Inject synthetic biology expertise into the prompt.""" | |
context = ( | |
"You are an expert in synthetic biology, biosecurity, and regulatory affairs. " | |
"Provide literature review, molecular insights, market trends, and policy implications. " | |
"Focus on CRISPR, metabolic engineering, living therapeutics, protein design, biosensors, and biosecurity. " | |
"Be concise, factual, and provide citations." | |
) | |
return f"{context}\n\nQuery: {query}\nExpanded terms: {', '.join(expanded_terms)}" | |
# ---------- Main Pipeline ---------- | |
def multimodal_research(query: str, narration: bool = False, generate_pdf: bool = False) -> Dict[str, Any]: | |
"""Main research pipeline for GENESIS-AI.""" | |
print(f"[Pipeline] Starting research for query: {query}") | |
# 1 β Expand query with ontology | |
expanded_terms = merge_ontology_terms( | |
query, | |
expand_with_umls(query), | |
expand_with_bioportal(query) | |
) | |
print(f"[Pipeline] Expanded terms: {expanded_terms}") | |
# 2 β Enrich query with domain-specific context | |
enriched_query = inject_synbio_context(query, expanded_terms) if SYNBIO_MODE else query | |
# 3 β Summarization (fallback order) | |
summary = None | |
for summarizer in [run_deepseek_summary, run_gemini_summary, run_openai_summary]: | |
try: | |
summary = summarizer(enriched_query) | |
if summary: | |
print(f"[Pipeline] Summary generated by {summarizer.__name__}") | |
break | |
except Exception as e: | |
print(f"[Pipeline] {summarizer.__name__} failed: {e}") | |
if not summary: | |
summary = "No summary generated β please refine your query." | |
# 4 β Extract citations, fallback to PubMed if none found | |
citations = extract_citations(summary) | |
if not citations: | |
print("[Pipeline] No citations in summary, querying PubMed...") | |
citations = search_pubmed_literature(query) | |
# 5 β Fetch related structures (NCBI, ChEMBL) | |
structures = fetch_structures_for_terms(expanded_terms) | |
# 6 β Image generation with fallback | |
image_url = None | |
for img_fn in [run_gemini_image, run_openai_image, run_hf_image]: | |
try: | |
image_url = img_fn(query) | |
if image_url: | |
print(f"[Pipeline] Image generated by {img_fn.__name__}") | |
break | |
except Exception as e: | |
print(f"[Pipeline] {img_fn.__name__} failed: {e}") | |
# 7 β Funding, trials, regulations, safety, biosecurity | |
funding_data = fetch_funding_data(query) or [] | |
trial_data = fetch_clinical_trials(query) or [] | |
regulation_data = fetch_regulatory_info(query) or [] | |
safety_data = analyze_safety_concerns(query) or [] | |
biosecurity_data = analyze_biosecurity_risks(query) or [] | |
# 8 β Graph visualizations | |
pathway_graph = generate_pathway_graph(query) if expanded_terms else None | |
funding_graph = generate_funding_network(query) if funding_data else None | |
# 9 β Save to Neo4j if configured | |
if NEO4J_URI: | |
try: | |
write_topic_and_papers(query, citations, expanded_terms) | |
print("[Pipeline] Data saved to Neo4j") | |
except Exception as e: | |
print(f"[Pipeline] Neo4j save failed: {e}") | |
# 10 β Narration (optional) | |
audio_url = None | |
if narration and ELEVEN_LABS_API_KEY: | |
try: | |
audio_url = narrate_text_elevenlabs(summary) | |
print("[Pipeline] Narration generated") | |
except Exception as e: | |
print(f"[Pipeline] Narration failed: {e}") | |
# 11 β PDF export (optional) | |
pdf_path = None | |
if generate_pdf: | |
try: | |
pdf_path = export_report_to_pdf(query, summary, citations, structures, funding_data, regulation_data) | |
print("[Pipeline] PDF report generated") | |
except Exception as e: | |
print(f"[Pipeline] PDF generation failed: {e}") | |
return { | |
"timestamp": datetime.utcnow().isoformat(), | |
"query": query, | |
"expanded_terms": expanded_terms, | |
"summary": summary, | |
"citations": citations, | |
"structures": structures, | |
"image_url": image_url, | |
"funding_data": funding_data, | |
"trial_data": trial_data, | |
"regulation_data": regulation_data, | |
"safety_data": safety_data, | |
"biosecurity_data": biosecurity_data, | |
"pathway_graph": pathway_graph, | |
"funding_graph": funding_graph, | |
"audio_url": audio_url, | |
"pdf_path": pdf_path | |
} | |
# ---------- Wrappers for app.py ---------- | |
def research_once(topic: str) -> Dict[str, Any]: | |
"""Alias for multimodal_research.""" | |
return multimodal_research(topic) | |
def run_literature_review(query: str): | |
"""For literature review tab.""" | |
result = multimodal_research(query) | |
return result["summary"], result["citations"] | |
def run_molecule_lookup(molecule_name: str): | |
"""For molecule lookup tab.""" | |
try: | |
chembl_data = get_molecule_data(molecule_name) | |
except Exception as e: | |
chembl_data = {"error": str(e)} | |
structures = fetch_structures_for_terms([molecule_name]) | |
img_url = None | |
for img_fn in [run_gemini_image, run_openai_image, run_hf_image]: | |
try: | |
img_url = img_fn(molecule_name) | |
if img_url: | |
break | |
except: | |
pass | |
return str({"chembl": chembl_data, "structures": structures}), img_url | |
def run_pathway_analysis(pathway_name: str): | |
"""For pathway analysis tab.""" | |
graph_data = generate_pathway_graph(pathway_name, [pathway_name]) | |
return f"Pathway analysis for {pathway_name}", graph_data | |
def run_funding_analysis(keyword: str): | |
"""For funding analysis tab.""" | |
funding_info = fetch_funding_data(keyword) | |
return str(funding_info) | |
def run_image_analysis(image_path: str): | |
"""For image analysis tab.""" | |
prompt = f"Analyze this microscopy or biological image: {image_path}" | |
analysis = None | |
for summarizer in [run_gemini_summary, run_openai_summary, run_deepseek_summary]: | |
try: | |
analysis = summarizer(prompt) | |
if analysis: | |
break | |
except: | |
pass | |
return analysis or "Image analysis failed." | |