Spaces:
Sleeping
Sleeping
Update genesis/pipeline.py
Browse files- genesis/pipeline.py +53 -100
genesis/pipeline.py
CHANGED
@@ -1,106 +1,59 @@
|
|
1 |
-
|
2 |
from __future__ import annotations
|
3 |
import os, json
|
|
|
|
|
4 |
from typing import Any, Dict, List
|
5 |
-
from pydantic import BaseModel
|
6 |
-
|
7 |
-
from openai import AsyncOpenAI
|
8 |
-
from agents import Agent, Runner, RunConfig, WebSearchTool, HostedMCPTool
|
9 |
-
|
10 |
-
from .safety import SafetyGuard
|
11 |
-
from .tools import OntologyTool, PubMedTool, StructureTool, CrossrefTool
|
12 |
-
|
13 |
-
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY","")
|
14 |
-
OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL","https://api.openai.com/v1")
|
15 |
-
GENESIS_DISABLE_TRACING = os.getenv("GENESIS_DISABLE_TRACING","1")
|
16 |
-
os.environ["OPENAI_AGENTS_DISABLE_TRACING"] = GENESIS_DISABLE_TRACING
|
17 |
-
|
18 |
-
client = AsyncOpenAI(api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL, timeout=600.0)
|
19 |
-
|
20 |
-
DEEP_MODEL_PRIMARY = os.getenv("GENESIS_DEEP_MODEL", "o3-deep-research")
|
21 |
-
DEEP_MODEL_FAST = os.getenv("GENESIS_DEEP_FAST_MODEL", "o4-mini-deep-research")
|
22 |
-
INSTRUCTION_MODEL = os.getenv("GENESIS_INSTRUCTION_MODEL", "gpt-4o-mini")
|
23 |
-
TRIAGE_MODEL = os.getenv("GENESIS_TRIAGE_MODEL", "gpt-4o-mini")
|
24 |
-
CLARIFY_MODEL = os.getenv("GENESIS_CLARIFY_MODEL", "gpt-4o-mini")
|
25 |
-
MCP_URL = os.getenv("GENESIS_MCP_URL")
|
26 |
-
|
27 |
-
safety_guard = SafetyGuard()
|
28 |
-
|
29 |
-
class Clarifications(BaseModel):
|
30 |
-
questions: List[str]
|
31 |
-
|
32 |
-
CLARIFY_PROMPT = """
|
33 |
-
Ask at most 3 essential questions to improve a high-level synthetic biology research brief.
|
34 |
-
Focus only on: organism/system, target (gene/protein/pathway), timeframe, preferred outputs.
|
35 |
-
Never request operational lab details. Friendly tone.
|
36 |
-
"""
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
""
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
)
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
model=INSTRUCTION_MODEL,
|
62 |
-
instructions=INSTRUCTION_PROMPT,
|
63 |
-
handoffs=[research_agent],
|
64 |
-
)
|
65 |
-
|
66 |
-
clarifying_agent = Agent(
|
67 |
-
name="Clarifying Questions Agent",
|
68 |
-
model=CLARIFY_MODEL,
|
69 |
-
instructions=CLARIFY_PROMPT,
|
70 |
-
output_type=Clarifications,
|
71 |
-
handoffs=[instruction_agent],
|
72 |
-
)
|
73 |
-
|
74 |
-
triage_agent = Agent(
|
75 |
-
name="Triage Agent",
|
76 |
-
model=TRIAGE_MODEL,
|
77 |
-
instructions=("If the user query lacks essential context, handoff to Clarifying Questions Agent; "
|
78 |
-
"otherwise handoff to Research Instruction Agent. Return EXACTLY one function call."),
|
79 |
-
handoffs=[clarifying_agent, instruction_agent],
|
80 |
-
)
|
81 |
-
|
82 |
-
async def research_once(query: str, fast: bool=False) -> Dict[str, Any]:
|
83 |
-
dec = safety_guard.gate(query)
|
84 |
-
if not dec.allowed:
|
85 |
-
query = "SAFE-ONLY REVIEW: " + query + "\nOnly produce high-level literature synthesis with citations."
|
86 |
-
if fast and research_agent.model != DEEP_MODEL_FAST:
|
87 |
-
research_agent.model = DEEP_MODEL_FAST
|
88 |
-
|
89 |
-
stream = Runner.run_streamed(triage_agent, query, run_config=RunConfig(tracing_disabled=True))
|
90 |
-
async for _ in stream.stream_events():
|
91 |
-
pass
|
92 |
-
final_text = stream.final_output
|
93 |
-
|
94 |
-
citations = []
|
95 |
try:
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
except Exception:
|
104 |
-
|
105 |
-
|
106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from __future__ import annotations
|
2 |
import os, json
|
3 |
+
import httpx
|
4 |
+
import google.generativeai as genai
|
5 |
from typing import Any, Dict, List
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
+
# Optional post-processors for polishing final summaries (NO lab steps)
|
8 |
+
|
9 |
+
async def gemini_postprocess(text: str, citations: List[dict]) -> str:
|
10 |
+
api_key = os.getenv("GEMINI_API_KEY")
|
11 |
+
if not api_key:
|
12 |
+
return text
|
13 |
+
genai.configure(api_key=api_key)
|
14 |
+
model = genai.GenerativeModel("gemini-1.5-flash")
|
15 |
+
prompt = (
|
16 |
+
"Polish the following high-level scientific synthesis for clarity and flow. "
|
17 |
+
"Do NOT add wet-lab procedures or operational details. Maintain citations list context.
|
18 |
+
|
19 |
+
" + text
|
20 |
+
)
|
21 |
+
resp = await model.asynchronous.generate_content_async(prompt)
|
22 |
+
return resp.text or text
|
23 |
+
|
24 |
+
async def deepseek_postprocess(text: str, citations: List[dict]) -> str:
|
25 |
+
# Generic OpenAI-compatible chat completions call
|
26 |
+
base = os.getenv("DEEPSEEK_BASE_URL")
|
27 |
+
key = os.getenv("DEEPSEEK_API_KEY")
|
28 |
+
if not base or not key:
|
29 |
+
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
try:
|
31 |
+
async with httpx.AsyncClient(timeout=60.0) as http:
|
32 |
+
r = await http.post(
|
33 |
+
f"{base.rstrip('/')}/v1/chat/completions",
|
34 |
+
headers={"Authorization": f"Bearer {key}", "Content-Type": "application/json"},
|
35 |
+
json={
|
36 |
+
"model": os.getenv("DEEPSEEK_MODEL", "deepseek-chat"),
|
37 |
+
"messages": [
|
38 |
+
{"role": "system", "content": "You are a scientific editor. Never add lab protocols."},
|
39 |
+
{"role": "user", "content": (
|
40 |
+
"Polish the following high-level synthesis without adding operational details.
|
41 |
+
|
42 |
+
" + text
|
43 |
+
)},
|
44 |
+
],
|
45 |
+
"temperature": 0.3,
|
46 |
+
},
|
47 |
+
)
|
48 |
+
data = r.json()
|
49 |
+
return data.get("choices", [{}])[0].get("message", {}).get("content", text)
|
50 |
except Exception:
|
51 |
+
return text
|
52 |
+
|
53 |
+
async def postprocess_summary(base_text: str, citations: List[dict], engine: str = "none") -> str:
|
54 |
+
engine = (engine or "none").lower()
|
55 |
+
if engine == "gemini":
|
56 |
+
return await gemini_postprocess(base_text, citations)
|
57 |
+
if engine == "deepseek":
|
58 |
+
return await deepseek_postprocess(base_text, citations)
|
59 |
+
return base_text
|