Spaces:

mgbam
/

Synthetic_Biology

Sleeping

App Files Files Community

mgbam commited on 19 days ago

Commit

e1444b4

verified ·

1 Parent(s): e22ad8c

Update genesis/pipeline.py

Browse files

Files changed (1) hide show

genesis/pipeline.py +53 -100

genesis/pipeline.py CHANGED Viewed

@@ -1,106 +1,59 @@
 from __future__ import annotations
 import os, json
 from typing import Any, Dict, List
-from pydantic import BaseModel
-from openai import AsyncOpenAI
-from agents import Agent, Runner, RunConfig, WebSearchTool, HostedMCPTool
-from .safety import SafetyGuard
-from .tools import OntologyTool, PubMedTool, StructureTool, CrossrefTool
-OPENAI_API_KEY = os.getenv("OPENAI_API_KEY","")
-OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL","https://api.openai.com/v1")
-GENESIS_DISABLE_TRACING = os.getenv("GENESIS_DISABLE_TRACING","1")
-os.environ["OPENAI_AGENTS_DISABLE_TRACING"] = GENESIS_DISABLE_TRACING
-client = AsyncOpenAI(api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL, timeout=600.0)
-DEEP_MODEL_PRIMARY = os.getenv("GENESIS_DEEP_MODEL", "o3-deep-research")
-DEEP_MODEL_FAST = os.getenv("GENESIS_DEEP_FAST_MODEL", "o4-mini-deep-research")
-INSTRUCTION_MODEL = os.getenv("GENESIS_INSTRUCTION_MODEL", "gpt-4o-mini")
-TRIAGE_MODEL = os.getenv("GENESIS_TRIAGE_MODEL", "gpt-4o-mini")
-CLARIFY_MODEL = os.getenv("GENESIS_CLARIFY_MODEL", "gpt-4o-mini")
-MCP_URL = os.getenv("GENESIS_MCP_URL")
-safety_guard = SafetyGuard()
-class Clarifications(BaseModel):
-    questions: List[str]
-CLARIFY_PROMPT = """
-Ask at most 3 essential questions to improve a high-level synthetic biology research brief.
-Focus only on: organism/system, target (gene/protein/pathway), timeframe, preferred outputs.
-Never request operational lab details. Friendly tone.
-"""
-INSTRUCTION_PROMPT = """
-Rewrite the user query into detailed DEEP RESEARCH instructions in English.
-OUTPUT ONLY the instructions.
-Include dimensions: organism/system, target, scope/timeframe, evaluation axes, required tables.
-Format requested output as a report with headers: Abstract, Background, Findings, Synthesis, Open Questions,
-Limitations, Risk & Ethics, References. Prefer primary literature (PubMed/Crossref) and databases (UMLS/BioPortal/RCSB).
-Strictly avoid operational wet-lab protocols.
-"""
-base_tools = [WebSearchTool(), OntologyTool(), PubMedTool(), StructureTool(), CrossrefTool()]
-if MCP_URL:
-    base_tools.append(HostedMCPTool(tool_config={"type":"mcp","server_label":"file_search","server_url":MCP_URL,"require_approval":"never"}))
-research_agent = Agent(
-    name="Synthetic Biology Research Agent",
-    model=DEEP_MODEL_PRIMARY,
-    instructions=("Perform high-level empirical research with citations. Use tools judiciously. "
-                  "NEVER produce step-by-step lab instructions or protocols."),
-    tools=base_tools,
-)
-instruction_agent = Agent(
-    name="Research Instruction Agent",
-    model=INSTRUCTION_MODEL,
-    instructions=INSTRUCTION_PROMPT,
-    handoffs=[research_agent],
-)
-clarifying_agent = Agent(
-    name="Clarifying Questions Agent",
-    model=CLARIFY_MODEL,
-    instructions=CLARIFY_PROMPT,
-    output_type=Clarifications,
-    handoffs=[instruction_agent],
-)
-triage_agent = Agent(
-    name="Triage Agent",
-    model=TRIAGE_MODEL,
-    instructions=("If the user query lacks essential context, handoff to Clarifying Questions Agent; "
-                  "otherwise handoff to Research Instruction Agent. Return EXACTLY one function call."),
-    handoffs=[clarifying_agent, instruction_agent],
-)
-async def research_once(query: str, fast: bool=False) -> Dict[str, Any]:
-    dec = safety_guard.gate(query)
-    if not dec.allowed:
-        query = "SAFE-ONLY REVIEW: " + query + "\nOnly produce high-level literature synthesis with citations."
-    if fast and research_agent.model != DEEP_MODEL_FAST:
-        research_agent.model = DEEP_MODEL_FAST
-    stream = Runner.run_streamed(triage_agent, query, run_config=RunConfig(tracing_disabled=True))
-    async for _ in stream.stream_events():
-        pass
-    final_text = stream.final_output
-    citations = []
     try:
-        for item in reversed(stream.new_items):
-            if item.type == "message_output_item":
-                for content in getattr(item.raw_item, "content", []):
-                    for ann in getattr(content, "annotations", []):
-                        if getattr(ann, "type", None) == "url_citation":
-                            citations.append({"title": getattr(ann,"title",""), "url": getattr(ann,"url","")})
-                break
     except Exception:
-        pass
-    return {"final_output": final_text, "citations": citations}

 from __future__ import annotations
 import os, json
+import httpx
+import google.generativeai as genai
 from typing import Any, Dict, List
+# Optional post-processors for polishing final summaries (NO lab steps)
+async def gemini_postprocess(text: str, citations: List[dict]) -> str:
+    api_key = os.getenv("GEMINI_API_KEY")
+    if not api_key:
+        return text
+    genai.configure(api_key=api_key)
+    model = genai.GenerativeModel("gemini-1.5-flash")
+    prompt = (
+        "Polish the following high-level scientific synthesis for clarity and flow. "
+        "Do NOT add wet-lab procedures or operational details. Maintain citations list context.
+" + text
+    )
+    resp = await model.asynchronous.generate_content_async(prompt)
+    return resp.text or text
+async def deepseek_postprocess(text: str, citations: List[dict]) -> str:
+    # Generic OpenAI-compatible chat completions call
+    base = os.getenv("DEEPSEEK_BASE_URL")
+    key = os.getenv("DEEPSEEK_API_KEY")
+    if not base or not key:
+        return text
     try:
+        async with httpx.AsyncClient(timeout=60.0) as http:
+            r = await http.post(
+                f"{base.rstrip('/')}/v1/chat/completions",
+                headers={"Authorization": f"Bearer {key}", "Content-Type": "application/json"},
+                json={
+                    "model": os.getenv("DEEPSEEK_MODEL", "deepseek-chat"),
+                    "messages": [
+                        {"role": "system", "content": "You are a scientific editor. Never add lab protocols."},
+                        {"role": "user", "content": (
+                            "Polish the following high-level synthesis without adding operational details.
+" + text
+                        )},
+                    ],
+                    "temperature": 0.3,
+                },
+            )
+            data = r.json()
+            return data.get("choices", [{}])[0].get("message", {}).get("content", text)
     except Exception:
+        return text
+async def postprocess_summary(base_text: str, citations: List[dict], engine: str = "none") -> str:
+    engine = (engine or "none").lower()
+    if engine == "gemini":
+        return await gemini_postprocess(base_text, citations)
+    if engine == "deepseek":
+        return await deepseek_postprocess(base_text, citations)
+    return base_text