File size: 2,997 Bytes
4d521f6
e2c04b6
fa94666
 
 
 
 
 
 
47975fb
 
fa94666
 
 
7b0bd9a
5d480b1
fa94666
 
 
 
5d480b1
fa94666
5d480b1
fa94666
7b0bd9a
fa94666
 
 
 
 
 
 
7b0bd9a
fa94666
 
 
 
 
 
 
 
 
7b0bd9a
fa94666
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b0bd9a
fa94666
 
 
 
 
 
 
 
 
5d480b1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# genesis/pipeline.py
import os
from datetime import datetime
from typing import Dict, Any, List

from genesis.ontology import expand_terms_with_ontology
from genesis.structures import fetch_structures_for_terms
from genesis.narration import narrate_text
from genesis.graphdb import write_topic_and_papers
from genesis.providers import (
    run_pubmed_literature,
    run_deepseek_summary,
    run_gemini_polish,
    run_image_generation
)

UMLS_API_KEY = os.getenv("UMLS_API_KEY")
BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY")
NCBI_API_KEY = os.getenv("NCBI_API_KEY")
NCBI_EMAIL = os.getenv("NCBI_EMAIL")
NEO4J_URI = os.getenv("NEO4J_URI")
ELEVEN_LABS_API_KEY = os.getenv("ELEVEN_LABS_API_KEY")

SYNBIO_MODE = True

DEMO_QUERIES = [
    "Map all CRISPR-based living therapeutics in clinical trials since 2020",
    "Graph metabolic engineering pathways for bio-based drug production",
    "Synthetic biology startups developing oncolytic viruses β€” funding + trials",
    "3D bioprinting advances for organ transplantation with regulatory analysis",
    "AI-driven biosensor design for early cancer detection"
]

def synthetic_biology_prompt_inject(query: str, expanded_terms: List[str]) -> str:
    """Bias toward synthetic biology."""
    context = (
        "You are an expert synthetic biologist and AI researcher. "
        "Focus on CRISPR, metabolic engineering, living therapeutics, protein design, "
        "biosensors, and biosecurity. Integrate literature, molecular structures, market trends, "
        "and policy/regulatory outlook. Produce a structured, citation-rich report."
    )
    return f"{context}\n\nQuery: {query}\nExpanded terms: {', '.join(expanded_terms)}"

def research_once(query: str, graph_preview: bool = True, narration: bool = True) -> Dict[str, Any]:
    """Main research pipeline."""
    expanded_terms = expand_terms_with_ontology(query, UMLS_API_KEY, BIOPORTAL_API_KEY)
    enriched_query = synthetic_biology_prompt_inject(query, expanded_terms) if SYNBIO_MODE else query

    # Summarize with DeepSeek
    raw_summary = run_deepseek_summary(enriched_query) or "Summary unavailable."
    polished_summary = run_gemini_polish(raw_summary)

    # Literature from PubMed
    citations = run_pubmed_literature(query, max_results=10)

    # Molecular structures
    structures = fetch_structures_for_terms(expanded_terms)

    # Image generation
    visual_image_url = run_image_generation(query)

    # GraphDB integration
    if graph_preview and NEO4J_URI:
        write_topic_and_papers(query, citations, expanded_terms)

    # Narration
    audio_url = narrate_text(polished_summary) if narration and ELEVEN_LABS_API_KEY else None

    return {
        "timestamp": datetime.utcnow().isoformat(),
        "query": query,
        "expanded_terms": expanded_terms,
        "summary": polished_summary,
        "citations": citations,
        "structures": structures,
        "visual_image_url": visual_image_url,
        "audio_url": audio_url
    }