Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- genesis/__init__.py +1 -0
- genesis/pipeline.py +106 -0
- genesis/safety.py +40 -0
- genesis/tools.py +111 -0
genesis/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from .pipeline import research_once
|
genesis/pipeline.py
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from __future__ import annotations
|
3 |
+
import os, json
|
4 |
+
from typing import Any, Dict, List
|
5 |
+
from pydantic import BaseModel
|
6 |
+
|
7 |
+
from openai import AsyncOpenAI
|
8 |
+
from agents import Agent, Runner, RunConfig, WebSearchTool, HostedMCPTool
|
9 |
+
|
10 |
+
from .safety import SafetyGuard
|
11 |
+
from .tools import OntologyTool, PubMedTool, StructureTool, CrossrefTool
|
12 |
+
|
13 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY","")
|
14 |
+
OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL","https://api.openai.com/v1")
|
15 |
+
GENESIS_DISABLE_TRACING = os.getenv("GENESIS_DISABLE_TRACING","1")
|
16 |
+
os.environ["OPENAI_AGENTS_DISABLE_TRACING"] = GENESIS_DISABLE_TRACING
|
17 |
+
|
18 |
+
client = AsyncOpenAI(api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL, timeout=600.0)
|
19 |
+
|
20 |
+
DEEP_MODEL_PRIMARY = os.getenv("GENESIS_DEEP_MODEL", "o3-deep-research")
|
21 |
+
DEEP_MODEL_FAST = os.getenv("GENESIS_DEEP_FAST_MODEL", "o4-mini-deep-research")
|
22 |
+
INSTRUCTION_MODEL = os.getenv("GENESIS_INSTRUCTION_MODEL", "gpt-4o-mini")
|
23 |
+
TRIAGE_MODEL = os.getenv("GENESIS_TRIAGE_MODEL", "gpt-4o-mini")
|
24 |
+
CLARIFY_MODEL = os.getenv("GENESIS_CLARIFY_MODEL", "gpt-4o-mini")
|
25 |
+
MCP_URL = os.getenv("GENESIS_MCP_URL")
|
26 |
+
|
27 |
+
safety_guard = SafetyGuard()
|
28 |
+
|
29 |
+
class Clarifications(BaseModel):
|
30 |
+
questions: List[str]
|
31 |
+
|
32 |
+
CLARIFY_PROMPT = """
|
33 |
+
Ask at most 3 essential questions to improve a high-level synthetic biology research brief.
|
34 |
+
Focus only on: organism/system, target (gene/protein/pathway), timeframe, preferred outputs.
|
35 |
+
Never request operational lab details. Friendly tone.
|
36 |
+
"""
|
37 |
+
|
38 |
+
INSTRUCTION_PROMPT = """
|
39 |
+
Rewrite the user query into detailed DEEP RESEARCH instructions in English.
|
40 |
+
OUTPUT ONLY the instructions.
|
41 |
+
Include dimensions: organism/system, target, scope/timeframe, evaluation axes, required tables.
|
42 |
+
Format requested output as a report with headers: Abstract, Background, Findings, Synthesis, Open Questions,
|
43 |
+
Limitations, Risk & Ethics, References. Prefer primary literature (PubMed/Crossref) and databases (UMLS/BioPortal/RCSB).
|
44 |
+
Strictly avoid operational wet-lab protocols.
|
45 |
+
"""
|
46 |
+
|
47 |
+
base_tools = [WebSearchTool(), OntologyTool(), PubMedTool(), StructureTool(), CrossrefTool()]
|
48 |
+
if MCP_URL:
|
49 |
+
base_tools.append(HostedMCPTool(tool_config={"type":"mcp","server_label":"file_search","server_url":MCP_URL,"require_approval":"never"}))
|
50 |
+
|
51 |
+
research_agent = Agent(
|
52 |
+
name="Synthetic Biology Research Agent",
|
53 |
+
model=DEEP_MODEL_PRIMARY,
|
54 |
+
instructions=("Perform high-level empirical research with citations. Use tools judiciously. "
|
55 |
+
"NEVER produce step-by-step lab instructions or protocols."),
|
56 |
+
tools=base_tools,
|
57 |
+
)
|
58 |
+
|
59 |
+
instruction_agent = Agent(
|
60 |
+
name="Research Instruction Agent",
|
61 |
+
model=INSTRUCTION_MODEL,
|
62 |
+
instructions=INSTRUCTION_PROMPT,
|
63 |
+
handoffs=[research_agent],
|
64 |
+
)
|
65 |
+
|
66 |
+
clarifying_agent = Agent(
|
67 |
+
name="Clarifying Questions Agent",
|
68 |
+
model=CLARIFY_MODEL,
|
69 |
+
instructions=CLARIFY_PROMPT,
|
70 |
+
output_type=Clarifications,
|
71 |
+
handoffs=[instruction_agent],
|
72 |
+
)
|
73 |
+
|
74 |
+
triage_agent = Agent(
|
75 |
+
name="Triage Agent",
|
76 |
+
model=TRIAGE_MODEL,
|
77 |
+
instructions=("If the user query lacks essential context, handoff to Clarifying Questions Agent; "
|
78 |
+
"otherwise handoff to Research Instruction Agent. Return EXACTLY one function call."),
|
79 |
+
handoffs=[clarifying_agent, instruction_agent],
|
80 |
+
)
|
81 |
+
|
82 |
+
async def research_once(query: str, fast: bool=False) -> Dict[str, Any]:
|
83 |
+
dec = safety_guard.gate(query)
|
84 |
+
if not dec.allowed:
|
85 |
+
query = "SAFE-ONLY REVIEW: " + query + "\nOnly produce high-level literature synthesis with citations."
|
86 |
+
if fast and research_agent.model != DEEP_MODEL_FAST:
|
87 |
+
research_agent.model = DEEP_MODEL_FAST
|
88 |
+
|
89 |
+
stream = Runner.run_streamed(triage_agent, query, run_config=RunConfig(tracing_disabled=True))
|
90 |
+
async for _ in stream.stream_events():
|
91 |
+
pass
|
92 |
+
final_text = stream.final_output
|
93 |
+
|
94 |
+
citations = []
|
95 |
+
try:
|
96 |
+
for item in reversed(stream.new_items):
|
97 |
+
if item.type == "message_output_item":
|
98 |
+
for content in getattr(item.raw_item, "content", []):
|
99 |
+
for ann in getattr(content, "annotations", []):
|
100 |
+
if getattr(ann, "type", None) == "url_citation":
|
101 |
+
citations.append({"title": getattr(ann,"title",""), "url": getattr(ann,"url","")})
|
102 |
+
break
|
103 |
+
except Exception:
|
104 |
+
pass
|
105 |
+
|
106 |
+
return {"final_output": final_text, "citations": citations}
|
genesis/safety.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from __future__ import annotations
|
3 |
+
import re
|
4 |
+
from pydantic import BaseModel
|
5 |
+
from typing import List
|
6 |
+
|
7 |
+
DUAL_USE_PATTERNS = [
|
8 |
+
r"step-?by-?step",
|
9 |
+
r"protocol",
|
10 |
+
r"wet[- ]?lab",
|
11 |
+
r"culture\s*conditions",
|
12 |
+
r"viral\s*vector",
|
13 |
+
r"pathogen",
|
14 |
+
r"gain[- ]of[- ]function",
|
15 |
+
r"increase\s*virulence",
|
16 |
+
r"synthesis\s*of\s*toxin",
|
17 |
+
r"biosafety\s*level\s*(2|3|4)",
|
18 |
+
r"kill\s*curve",
|
19 |
+
r"CFU|colony\s*forming\s*units",
|
20 |
+
]
|
21 |
+
|
22 |
+
class SafetyDecision(BaseModel):
|
23 |
+
allowed: bool
|
24 |
+
rationale: str
|
25 |
+
redactions: List[str] = []
|
26 |
+
|
27 |
+
class SafetyGuard:
|
28 |
+
# Block operational/dual-use outputs; allow high-level literature review only.
|
29 |
+
def __init__(self) -> None:
|
30 |
+
pass
|
31 |
+
|
32 |
+
def gate(self, text: str) -> SafetyDecision:
|
33 |
+
hits = [p for p in DUAL_USE_PATTERNS if re.search(p, text, flags=re.I)]
|
34 |
+
if hits:
|
35 |
+
return SafetyDecision(
|
36 |
+
allowed=False,
|
37 |
+
rationale="Operational/dual-use intent detected. Only high-level review permitted.",
|
38 |
+
redactions=hits,
|
39 |
+
)
|
40 |
+
return SafetyDecision(allowed=True, rationale="High-level research intent.")
|
genesis/tools.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from __future__ import annotations
|
3 |
+
import os, json, re
|
4 |
+
import httpx
|
5 |
+
from typing import Any, Dict, Optional, List
|
6 |
+
|
7 |
+
class ToolBase:
|
8 |
+
name: str = "tool"
|
9 |
+
description: str = ""
|
10 |
+
|
11 |
+
async def call(self, *args, **kwargs) -> Dict[str, Any]:
|
12 |
+
raise NotImplementedError
|
13 |
+
|
14 |
+
class OntologyTool(ToolBase):
|
15 |
+
name = "ontology_normalize"
|
16 |
+
description = "Normalize biomedical terms via BioPortal; returns concept info (no protocols)."
|
17 |
+
|
18 |
+
def __init__(self, timeout: float = 20.0):
|
19 |
+
self.http = httpx.AsyncClient(timeout=timeout)
|
20 |
+
self.bioportal_key = os.getenv("BIOPORTAL_API_KEY")
|
21 |
+
|
22 |
+
async def call(self, term: str) -> dict:
|
23 |
+
out = {"term": term, "bioportal": None}
|
24 |
+
try:
|
25 |
+
if self.bioportal_key:
|
26 |
+
r = await self.http.get(
|
27 |
+
"https://data.bioontology.org/search",
|
28 |
+
params={"q": term, "pagesize": 5},
|
29 |
+
headers={"Authorization": f"apikey token={self.bioportal_key}"}
|
30 |
+
)
|
31 |
+
out["bioportal"] = r.json()
|
32 |
+
except Exception as e:
|
33 |
+
out["bioportal_error"] = str(e)
|
34 |
+
return out
|
35 |
+
|
36 |
+
class PubMedTool(ToolBase):
|
37 |
+
name = "pubmed_search"
|
38 |
+
description = "Search PubMed via NCBI; return metadata with citations."
|
39 |
+
|
40 |
+
def __init__(self, timeout: float = 20.0):
|
41 |
+
self.http = httpx.AsyncClient(timeout=timeout)
|
42 |
+
self.key = os.getenv("NCBI_API_KEY")
|
43 |
+
self.email = os.getenv("NCBI_EMAIL")
|
44 |
+
|
45 |
+
async def call(self, query: str) -> dict:
|
46 |
+
base = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
|
47 |
+
try:
|
48 |
+
es = await self.http.get(
|
49 |
+
base + "esearch.fcgi",
|
50 |
+
params={"db":"pubmed","term":query,"retmode":"json","retmax":20,"api_key":self.key,"email":self.email}
|
51 |
+
)
|
52 |
+
ids = es.json().get("esearchresult",{}).get("idlist",[])
|
53 |
+
if not ids: return {"query":query,"results":[]}
|
54 |
+
su = await self.http.get(
|
55 |
+
base + "esummary.fcgi",
|
56 |
+
params={"db":"pubmed","id":",".join(ids),"retmode":"json","api_key":self.key,"email":self.email}
|
57 |
+
)
|
58 |
+
recs = su.json().get("result",{})
|
59 |
+
items = []
|
60 |
+
for pmid in ids:
|
61 |
+
r = recs.get(pmid,{ })
|
62 |
+
items.append({
|
63 |
+
"pmid": pmid,
|
64 |
+
"title": r.get("title"),
|
65 |
+
"journal": r.get("fulljournalname"),
|
66 |
+
"year": (r.get("pubdate") or "")[:4],
|
67 |
+
"authors": [a.get("name") for a in r.get("authors",[])],
|
68 |
+
})
|
69 |
+
return {"query":query,"results":items}
|
70 |
+
except Exception as e:
|
71 |
+
return {"query":query,"error":str(e)}
|
72 |
+
|
73 |
+
class StructureTool(ToolBase):
|
74 |
+
name = "structure_info"
|
75 |
+
description = "Query RCSB structure metadata (no lab steps)."
|
76 |
+
|
77 |
+
def __init__(self, timeout: float = 20.0):
|
78 |
+
self.http = httpx.AsyncClient(timeout=timeout)
|
79 |
+
|
80 |
+
async def call(self, pdb_id: str) -> dict:
|
81 |
+
out = {"pdb_id": pdb_id}
|
82 |
+
try:
|
83 |
+
r = await self.http.get(f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}")
|
84 |
+
r.raise_for_status()
|
85 |
+
out["rcsb_core"] = r.json()
|
86 |
+
except Exception as e:
|
87 |
+
out["error"] = str(e)
|
88 |
+
return out
|
89 |
+
|
90 |
+
class CrossrefTool(ToolBase):
|
91 |
+
name = "crossref_search"
|
92 |
+
description = "Crossref search for DOIs; titles, years, authors."
|
93 |
+
|
94 |
+
def __init__(self, timeout: float = 20.0):
|
95 |
+
self.http = httpx.AsyncClient(timeout=timeout)
|
96 |
+
|
97 |
+
async def call(self, query: str) -> dict:
|
98 |
+
try:
|
99 |
+
r = await self.http.get("https://api.crossref.org/works", params={"query":query,"rows":10})
|
100 |
+
items = r.json().get("message",{}).get("items",[])
|
101 |
+
papers = []
|
102 |
+
for it in items:
|
103 |
+
papers.append({
|
104 |
+
"title": (it.get("title") or [None])[0],
|
105 |
+
"doi": it.get("DOI"),
|
106 |
+
"year": (it.get("issued") or {}).get("date-parts", [[None]])[0][0],
|
107 |
+
"authors": [f"{a.get('given','')} {a.get('family','')}".strip() for a in it.get("author",[])],
|
108 |
+
})
|
109 |
+
return {"query":query,"results":papers}
|
110 |
+
except Exception as e:
|
111 |
+
return {"query":query,"error":str(e)}
|