# genesis/pipeline.py import os from datetime import datetime from typing import Dict, Any, List from genesis.ontology import expand_terms_with_ontology from genesis.structures import fetch_structures_for_terms from genesis.narration import narrate_text from genesis.graphdb import write_topic_and_papers from genesis.providers import ( run_pubmed_literature, run_deepseek_summary, run_gemini_polish, run_image_generation ) UMLS_API_KEY = os.getenv("UMLS_API_KEY") BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY") NCBI_API_KEY = os.getenv("NCBI_API_KEY") NCBI_EMAIL = os.getenv("NCBI_EMAIL") NEO4J_URI = os.getenv("NEO4J_URI") ELEVEN_LABS_API_KEY = os.getenv("ELEVEN_LABS_API_KEY") SYNBIO_MODE = True DEMO_QUERIES = [ "Map all CRISPR-based living therapeutics in clinical trials since 2020", "Graph metabolic engineering pathways for bio-based drug production", "Synthetic biology startups developing oncolytic viruses — funding + trials", "3D bioprinting advances for organ transplantation with regulatory analysis", "AI-driven biosensor design for early cancer detection" ] def synthetic_biology_prompt_inject(query: str, expanded_terms: List[str]) -> str: """Bias toward synthetic biology.""" context = ( "You are an expert synthetic biologist and AI researcher. " "Focus on CRISPR, metabolic engineering, living therapeutics, protein design, " "biosensors, and biosecurity. Integrate literature, molecular structures, market trends, " "and policy/regulatory outlook. Produce a structured, citation-rich report." ) return f"{context}\n\nQuery: {query}\nExpanded terms: {', '.join(expanded_terms)}" def research_once(query: str, graph_preview: bool = True, narration: bool = True) -> Dict[str, Any]: """Main research pipeline.""" expanded_terms = expand_terms_with_ontology(query, UMLS_API_KEY, BIOPORTAL_API_KEY) enriched_query = synthetic_biology_prompt_inject(query, expanded_terms) if SYNBIO_MODE else query # Summarize with DeepSeek raw_summary = run_deepseek_summary(enriched_query) or "Summary unavailable." polished_summary = run_gemini_polish(raw_summary) # Literature from PubMed citations = run_pubmed_literature(query, max_results=10) # Molecular structures structures = fetch_structures_for_terms(expanded_terms) # Image generation visual_image_url = run_image_generation(query) # GraphDB integration if graph_preview and NEO4J_URI: write_topic_and_papers(query, citations, expanded_terms) # Narration audio_url = narrate_text(polished_summary) if narration and ELEVEN_LABS_API_KEY else None return { "timestamp": datetime.utcnow().isoformat(), "query": query, "expanded_terms": expanded_terms, "summary": polished_summary, "citations": citations, "structures": structures, "visual_image_url": visual_image_url, "audio_url": audio_url }