Spaces:
Sleeping
Sleeping
# genesis/pipeline.py | |
import os | |
import re | |
from datetime import datetime | |
from typing import Dict, Any, List | |
# API client imports | |
from genesis.api_clients.pubmed_api import search_pubmed_literature | |
from genesis.api_clients.bioportal_api import expand_with_bioportal | |
from genesis.api_clients.umls_api import expand_with_umls | |
from genesis.api_clients.chembl_api import get_molecule_data | |
from genesis.api_clients.ncbi_api import fetch_ncbi_structure | |
from genesis.utils.pdf_export import export_report_to_pdf | |
# Core logic providers | |
from genesis.providers import ( | |
run_deepseek_summary, | |
run_gemini_summary, | |
run_openai_summary, | |
run_gemini_image, | |
run_openai_image, | |
run_hf_image, | |
narrate_text_elevenlabs | |
) | |
from genesis.utils.pdf_export import export_report_to_pdf | |
from genesis.visualization import generate_pathway_graph, generate_funding_network | |
from genesis.funding import fetch_funding_data | |
from genesis.trials import fetch_clinical_trials | |
from genesis.biosecurity import analyze_biosecurity_risks | |
from genesis.regulation import fetch_regulatory_info | |
from genesis.safety import analyze_safety_concerns | |
from genesis.structures import fetch_structures_for_terms | |
from genesis.ontology import merge_ontology_terms | |
from genesis.utils.graph_tools import write_topic_and_papers | |
# Environment vars | |
ELEVEN_LABS_API_KEY = os.getenv("ELEVEN_LABS_API_KEY") | |
NEO4J_URI = os.getenv("NEO4J_URI") | |
NEO4J_USER = os.getenv("NEO4J_USER") | |
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD") | |
SYNBIO_MODE = True # Bias towards synthetic biology context | |
# Demo queries for UI preload | |
DEMO_QUERIES = [ | |
"CRISPR living therapeutics in clinical trials since 2020", | |
"AI-designed enzymes for plastic degradation β literature + pathways", | |
"Synthetic biology startups in oncology β funding map", | |
"Metabolic pathway for artemisinin biosynthesis in yeast", | |
"Oncolytic virus engineering β biosecurity risk analysis" | |
] | |
def extract_citations(text: str) -> List[Dict[str, str]]: | |
"""Extract citations (DOI, PMID, URLs) from text.""" | |
citations = [] | |
doi_pattern = r"(10\.\d{4,9}/[-._;()/:A-Z0-9]+)" | |
pmid_pattern = r"PMID:\s*(\d+)" | |
url_pattern = r"(https?://[^\s)]+)" | |
for match in re.finditer(doi_pattern, text, re.IGNORECASE): | |
citations.append({"type": "DOI", "id": match.group(1), "url": f"https://doi.org/{match.group(1)}"}) | |
for match in re.finditer(pmid_pattern, text, re.IGNORECASE): | |
citations.append({"type": "PMID", "id": match.group(1), "url": f"https://pubmed.ncbi.nlm.nih.gov/{match.group(1)}/"}) | |
for match in re.finditer(url_pattern, text, re.IGNORECASE): | |
if not any(c["url"] == match.group(1) for c in citations): | |
citations.append({"type": "URL", "id": "", "url": match.group(1)}) | |
return citations | |
def inject_synbio_context(query: str, expanded_terms: List[str]) -> str: | |
"""Injects synthetic biology expertise into the prompt.""" | |
context = ( | |
"You are an expert in synthetic biology, biosecurity, and regulatory affairs. " | |
"Provide literature review, molecular insights, market trends, and policy implications. " | |
"Focus on CRISPR, metabolic engineering, living therapeutics, protein design, biosensors, and biosecurity. " | |
"Be concise, factual, and provide citations." | |
) | |
return f"{context}\n\nQuery: {query}\nExpanded terms: {', '.join(expanded_terms)}" | |
def multimodal_research(query: str, narration: bool = False, generate_pdf: bool = False) -> Dict[str, Any]: | |
"""Main multi-modal synthetic biology pipeline.""" | |
print(f"[Pipeline] Starting research for query: {query}") | |
# 1 β Expand query with ontology | |
expanded_terms = merge_ontology_terms( | |
query, | |
expand_with_umls(query), | |
expand_with_bioportal(query) | |
) | |
print(f"[Pipeline] Expanded terms: {expanded_terms}") | |
# 2 β Inject domain-specific context | |
enriched_query = inject_synbio_context(query, expanded_terms) if SYNBIO_MODE else query | |
# 3 β Summarization with fallback | |
summary = None | |
for summarizer in [run_deepseek_summary, run_gemini_summary, run_openai_summary]: | |
try: | |
summary = summarizer(enriched_query) | |
if summary: | |
print(f"[Pipeline] Summary generated by {summarizer.__name__}") | |
break | |
except Exception as e: | |
print(f"[Pipeline] {summarizer.__name__} failed: {e}") | |
if not summary: | |
summary = "No summary generated β please refine your query." | |
# 4 β Citations extraction & PubMed fallback | |
citations = extract_citations(summary) | |
if not citations: | |
print("[Pipeline] No citations in summary, querying PubMed...") | |
citations = search_pubmed_literature(query) | |
# 5 β Structures (NCBI, ChEMBL) | |
structures = fetch_structures_for_terms(expanded_terms) | |
# 6 β Image generation with fallback | |
image_url = None | |
for img_fn in [run_gemini_image, run_openai_image, run_hf_image]: | |
try: | |
image_url = img_fn(query) | |
if image_url: | |
print(f"[Pipeline] Image generated by {img_fn.__name__}") | |
break | |
except Exception as e: | |
print(f"[Pipeline] {img_fn.__name__} failed: {e}") | |
# 7 β Funding, Trials, Regulation, Safety, Biosecurity | |
funding_data = fetch_funding_data(query) or [] | |
trial_data = fetch_clinical_trials(query) or [] | |
regulation_data = fetch_regulatory_info(query) or [] | |
safety_data = analyze_safety_concerns(query) or [] | |
biosecurity_data = analyze_biosecurity_risks(query) or [] | |
# 8 β Graph visualizations | |
pathway_graph = generate_pathway_graph(query, expanded_terms) if expanded_terms else None | |
funding_graph = generate_funding_network(query, funding_data) if funding_data else None | |
# 9 β Save to Neo4j | |
if NEO4J_URI: | |
try: | |
write_topic_and_papers(query, citations, expanded_terms) | |
print("[Pipeline] Data saved to Neo4j") | |
except Exception as e: | |
print(f"[Pipeline] Neo4j save failed: {e}") | |
# 10 β Optional narration | |
audio_url = None | |
if narration and ELEVEN_LABS_API_KEY: | |
try: | |
audio_url = narrate_text_elevenlabs(summary) | |
print("[Pipeline] Narration generated") | |
except Exception as e: | |
print(f"[Pipeline] Narration failed: {e}") | |
# 11 β Optional PDF export | |
pdf_path = None | |
if generate_pdf: | |
try: | |
pdf_path = export_report_to_pdf(query, summary, citations, structures, funding_data, regulation_data) | |
print("[Pipeline] PDF report generated") | |
except Exception as e: | |
print(f"[Pipeline] PDF generation failed: {e}") | |
# 12 β Build output | |
return { | |
"timestamp": datetime.utcnow().isoformat(), | |
"query": query, | |
"expanded_terms": expanded_terms, | |
"summary": summary, | |
"citations": citations, | |
"structures": structures, | |
"image_url": image_url, | |
"funding_data": funding_data, | |
"trial_data": trial_data, | |
"regulation_data": regulation_data, | |
"safety_data": safety_data, | |
"biosecurity_data": biosecurity_data, | |
"pathway_graph": pathway_graph, | |
"funding_graph": funding_graph, | |
"audio_url": audio_url, | |
"pdf_path": pdf_path | |
} | |