Spaces:

mgbam
/

Synthetic_Biology

Sleeping

App Files Files Community

mgbam commited on 12 days ago

Commit

2689723

verified ·

1 Parent(s): a9868dc

Upload 4 files

Browse files

Files changed (4) hide show

genesis/__init__.py +1 -0
genesis/pipeline.py +106 -0
genesis/safety.py +40 -0
genesis/tools.py +111 -0

genesis/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .pipeline import research_once

genesis/pipeline.py ADDED Viewed

	@@ -0,0 +1,106 @@

+from __future__ import annotations
+import os, json
+from typing import Any, Dict, List
+from pydantic import BaseModel
+from openai import AsyncOpenAI
+from agents import Agent, Runner, RunConfig, WebSearchTool, HostedMCPTool
+from .safety import SafetyGuard
+from .tools import OntologyTool, PubMedTool, StructureTool, CrossrefTool
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY","")
+OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL","https://api.openai.com/v1")
+GENESIS_DISABLE_TRACING = os.getenv("GENESIS_DISABLE_TRACING","1")
+os.environ["OPENAI_AGENTS_DISABLE_TRACING"] = GENESIS_DISABLE_TRACING
+client = AsyncOpenAI(api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL, timeout=600.0)
+DEEP_MODEL_PRIMARY = os.getenv("GENESIS_DEEP_MODEL", "o3-deep-research")
+DEEP_MODEL_FAST = os.getenv("GENESIS_DEEP_FAST_MODEL", "o4-mini-deep-research")
+INSTRUCTION_MODEL = os.getenv("GENESIS_INSTRUCTION_MODEL", "gpt-4o-mini")
+TRIAGE_MODEL = os.getenv("GENESIS_TRIAGE_MODEL", "gpt-4o-mini")
+CLARIFY_MODEL = os.getenv("GENESIS_CLARIFY_MODEL", "gpt-4o-mini")
+MCP_URL = os.getenv("GENESIS_MCP_URL")
+safety_guard = SafetyGuard()
+class Clarifications(BaseModel):
+    questions: List[str]
+CLARIFY_PROMPT = """
+Ask at most 3 essential questions to improve a high-level synthetic biology research brief.
+Focus only on: organism/system, target (gene/protein/pathway), timeframe, preferred outputs.
+Never request operational lab details. Friendly tone.
+"""
+INSTRUCTION_PROMPT = """
+Rewrite the user query into detailed DEEP RESEARCH instructions in English.
+OUTPUT ONLY the instructions.
+Include dimensions: organism/system, target, scope/timeframe, evaluation axes, required tables.
+Format requested output as a report with headers: Abstract, Background, Findings, Synthesis, Open Questions,
+Limitations, Risk & Ethics, References. Prefer primary literature (PubMed/Crossref) and databases (UMLS/BioPortal/RCSB).
+Strictly avoid operational wet-lab protocols.
+"""
+base_tools = [WebSearchTool(), OntologyTool(), PubMedTool(), StructureTool(), CrossrefTool()]
+if MCP_URL:
+    base_tools.append(HostedMCPTool(tool_config={"type":"mcp","server_label":"file_search","server_url":MCP_URL,"require_approval":"never"}))
+research_agent = Agent(
+    name="Synthetic Biology Research Agent",
+    model=DEEP_MODEL_PRIMARY,
+    instructions=("Perform high-level empirical research with citations. Use tools judiciously. "
+                  "NEVER produce step-by-step lab instructions or protocols."),
+    tools=base_tools,
+)
+instruction_agent = Agent(
+    name="Research Instruction Agent",
+    model=INSTRUCTION_MODEL,
+    instructions=INSTRUCTION_PROMPT,
+    handoffs=[research_agent],
+)
+clarifying_agent = Agent(
+    name="Clarifying Questions Agent",
+    model=CLARIFY_MODEL,
+    instructions=CLARIFY_PROMPT,
+    output_type=Clarifications,
+    handoffs=[instruction_agent],
+)
+triage_agent = Agent(
+    name="Triage Agent",
+    model=TRIAGE_MODEL,
+    instructions=("If the user query lacks essential context, handoff to Clarifying Questions Agent; "
+                  "otherwise handoff to Research Instruction Agent. Return EXACTLY one function call."),
+    handoffs=[clarifying_agent, instruction_agent],
+)
+async def research_once(query: str, fast: bool=False) -> Dict[str, Any]:
+    dec = safety_guard.gate(query)
+    if not dec.allowed:
+        query = "SAFE-ONLY REVIEW: " + query + "\nOnly produce high-level literature synthesis with citations."
+    if fast and research_agent.model != DEEP_MODEL_FAST:
+        research_agent.model = DEEP_MODEL_FAST
+    stream = Runner.run_streamed(triage_agent, query, run_config=RunConfig(tracing_disabled=True))
+    async for _ in stream.stream_events():
+        pass
+    final_text = stream.final_output
+    citations = []
+    try:
+        for item in reversed(stream.new_items):
+            if item.type == "message_output_item":
+                for content in getattr(item.raw_item, "content", []):
+                    for ann in getattr(content, "annotations", []):
+                        if getattr(ann, "type", None) == "url_citation":
+                            citations.append({"title": getattr(ann,"title",""), "url": getattr(ann,"url","")})
+                break
+    except Exception:
+        pass
+    return {"final_output": final_text, "citations": citations}

genesis/safety.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from __future__ import annotations
+import re
+from pydantic import BaseModel
+from typing import List
+DUAL_USE_PATTERNS = [
+    r"step-?by-?step",
+    r"protocol",
+    r"wet[- ]?lab",
+    r"culture\s*conditions",
+    r"viral\s*vector",
+    r"pathogen",
+    r"gain[- ]of[- ]function",
+    r"increase\s*virulence",
+    r"synthesis\s*of\s*toxin",
+    r"biosafety\s*level\s*(2|3|4)",
+    r"kill\s*curve",
+    r"CFU|colony\s*forming\s*units",
+]
+class SafetyDecision(BaseModel):
+    allowed: bool
+    rationale: str
+    redactions: List[str] = []
+class SafetyGuard:
+    # Block operational/dual-use outputs; allow high-level literature review only.
+    def __init__(self) -> None:
+        pass
+    def gate(self, text: str) -> SafetyDecision:
+        hits = [p for p in DUAL_USE_PATTERNS if re.search(p, text, flags=re.I)]
+        if hits:
+            return SafetyDecision(
+                allowed=False,
+                rationale="Operational/dual-use intent detected. Only high-level review permitted.",
+                redactions=hits,
+            )
+        return SafetyDecision(allowed=True, rationale="High-level research intent.")

genesis/tools.py ADDED Viewed

	@@ -0,0 +1,111 @@

+from __future__ import annotations
+import os, json, re
+import httpx
+from typing import Any, Dict, Optional, List
+class ToolBase:
+    name: str = "tool"
+    description: str = ""
+    async def call(self, *args, **kwargs) -> Dict[str, Any]:
+        raise NotImplementedError
+class OntologyTool(ToolBase):
+    name = "ontology_normalize"
+    description = "Normalize biomedical terms via BioPortal; returns concept info (no protocols)."
+    def __init__(self, timeout: float = 20.0):
+        self.http = httpx.AsyncClient(timeout=timeout)
+        self.bioportal_key = os.getenv("BIOPORTAL_API_KEY")
+    async def call(self, term: str) -> dict:
+        out = {"term": term, "bioportal": None}
+        try:
+            if self.bioportal_key:
+                r = await self.http.get(
+                    "https://data.bioontology.org/search",
+                    params={"q": term, "pagesize": 5},
+                    headers={"Authorization": f"apikey token={self.bioportal_key}"}
+                )
+                out["bioportal"] = r.json()
+        except Exception as e:
+            out["bioportal_error"] = str(e)
+        return out
+class PubMedTool(ToolBase):
+    name = "pubmed_search"
+    description = "Search PubMed via NCBI; return metadata with citations."
+    def __init__(self, timeout: float = 20.0):
+        self.http = httpx.AsyncClient(timeout=timeout)
+        self.key = os.getenv("NCBI_API_KEY")
+        self.email = os.getenv("NCBI_EMAIL")
+    async def call(self, query: str) -> dict:
+        base = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
+        try:
+            es = await self.http.get(
+                base + "esearch.fcgi",
+                params={"db":"pubmed","term":query,"retmode":"json","retmax":20,"api_key":self.key,"email":self.email}
+            )
+            ids = es.json().get("esearchresult",{}).get("idlist",[])
+            if not ids: return {"query":query,"results":[]}
+            su = await self.http.get(
+                base + "esummary.fcgi",
+                params={"db":"pubmed","id":",".join(ids),"retmode":"json","api_key":self.key,"email":self.email}
+            )
+            recs = su.json().get("result",{})
+            items = []
+            for pmid in ids:
+                r = recs.get(pmid,{ })
+                items.append({
+                    "pmid": pmid,
+                    "title": r.get("title"),
+                    "journal": r.get("fulljournalname"),
+                    "year": (r.get("pubdate") or "")[:4],
+                    "authors": [a.get("name") for a in r.get("authors",[])],
+                })
+            return {"query":query,"results":items}
+        except Exception as e:
+            return {"query":query,"error":str(e)}
+class StructureTool(ToolBase):
+    name = "structure_info"
+    description = "Query RCSB structure metadata (no lab steps)."
+    def __init__(self, timeout: float = 20.0):
+        self.http = httpx.AsyncClient(timeout=timeout)
+    async def call(self, pdb_id: str) -> dict:
+        out = {"pdb_id": pdb_id}
+        try:
+            r = await self.http.get(f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}")
+            r.raise_for_status()
+            out["rcsb_core"] = r.json()
+        except Exception as e:
+            out["error"] = str(e)
+        return out
+class CrossrefTool(ToolBase):
+    name = "crossref_search"
+    description = "Crossref search for DOIs; titles, years, authors."
+    def __init__(self, timeout: float = 20.0):
+        self.http = httpx.AsyncClient(timeout=timeout)
+    async def call(self, query: str) -> dict:
+        try:
+            r = await self.http.get("https://api.crossref.org/works", params={"query":query,"rows":10})
+            items = r.json().get("message",{}).get("items",[])
+            papers = []
+            for it in items:
+                papers.append({
+                    "title": (it.get("title") or [None])[0],
+                    "doi": it.get("DOI"),
+                    "year": (it.get("issued") or {}).get("date-parts", [[None]])[0][0],
+                    "authors": [f"{a.get('given','')} {a.get('family','')}".strip() for a in it.get("author",[])],
+                })
+            return {"query":query,"results":papers}
+        except Exception as e:
+            return {"query":query,"error":str(e)}