mgbam commited on
Commit
a596e6e
·
verified ·
1 Parent(s): e76888b

Create biosecurity.py

Browse files
Files changed (1) hide show
  1. genesis/biosecurity.py +54 -89
genesis/biosecurity.py CHANGED
@@ -1,105 +1,70 @@
1
  # genesis/biosecurity.py
2
  import os
3
  import requests
 
4
  from datetime import datetime
5
- from typing import Dict, Any, List
 
6
 
7
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
8
- BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY")
9
- UMLS_API_KEY = os.getenv("UMLS_API_KEY")
10
  NCBI_API_KEY = os.getenv("NCBI_API_KEY")
11
  NCBI_EMAIL = os.getenv("NCBI_EMAIL")
12
 
13
- def search_pubmed_recent(query: str, max_results: int = 5) -> List[Dict[str, str]]:
14
- """Fetch recent PubMed papers for biosecurity context."""
15
- try:
16
- url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
17
- params = {
18
- "db": "pubmed",
19
- "term": query,
20
- "retmax": max_results,
21
- "sort": "date",
22
- "retmode": "json",
23
- "api_key": NCBI_API_KEY,
24
- "email": NCBI_EMAIL
25
- }
26
- r = requests.get(url, params=params, timeout=15)
27
- r.raise_for_status()
28
- ids = r.json().get("esearchresult", {}).get("idlist", [])
29
-
30
- papers = []
31
- if ids:
32
- fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
33
- fetch_params = {
34
- "db": "pubmed",
35
- "id": ",".join(ids),
36
- "retmode": "json",
37
- "api_key": NCBI_API_KEY,
38
- "email": NCBI_EMAIL
39
- }
40
- fr = requests.get(fetch_url, params=fetch_params, timeout=15)
41
- fr.raise_for_status()
42
- summaries = fr.json().get("result", {})
43
- for pmid in ids:
44
- if pmid in summaries:
45
- papers.append({
46
- "title": summaries[pmid].get("title", ""),
47
- "url": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
48
- })
49
- return papers
50
- except Exception as e:
51
- print(f"[PubMed Error] {e}")
52
- return []
53
 
54
- def ai_biosecurity_assessment(entity: str) -> Dict[str, Any]:
55
- """Run AI-powered biosecurity risk assessment."""
56
- import openai
57
- openai.api_key = OPENAI_API_KEY
 
58
  try:
 
59
  prompt = f"""
60
- You are a synthetic biology biosecurity officer.
61
- Assess the biosecurity risk of the following entity: {entity}.
62
-
63
- Consider:
64
- - Is it a known dangerous pathogen, toxin, or dual-use technology?
65
- - Potential misuse (bioterrorism, lab escape)
66
- - Regulatory oversight and biosafety levels
67
- - Recent trends in research or weaponization
68
- - Ethical concerns
69
-
70
- Return:
71
- - Risk Score (0-100)
72
- - Category (Low, Medium, High)
73
- - Reasons
74
- - Recommended Actions
75
  """
76
- response = openai.ChatCompletion.create(
77
- model="gpt-4o-mini",
78
- messages=[{"role": "user", "content": prompt}],
79
- temperature=0.2
80
- )
81
- return {"ai_report": response.choices[0].message["content"]}
82
- except Exception as e:
83
- print(f"[OpenAI Error] {e}")
84
- return {"ai_report": "AI risk analysis unavailable."}
85
-
86
- def run_biosecurity_scan(entity: str) -> Dict[str, Any]:
87
- """Main function to scan biosecurity risks."""
88
- # Step 1: AI assessment
89
- ai_results = ai_biosecurity_assessment(entity)
90
 
91
- # Step 2: PubMed latest research
92
- papers = search_pubmed_recent(entity)
93
 
94
- # Step 3: Fake simple scoring logic (can be replaced with ontology check)
95
- score = 85 if any(word in entity.lower() for word in ["smallpox", "anthrax", "ebola"]) else 30
96
- category = "High" if score >= 70 else ("Medium" if score >= 40 else "Low")
97
 
98
- return {
99
- "entity": entity,
100
- "score": score,
101
- "category": category,
102
- "ai_report": ai_results.get("ai_report", ""),
103
- "pubmed_links": papers,
104
- "timestamp": datetime.utcnow().isoformat()
105
- }
 
 
 
 
 
 
 
 
 
1
  # genesis/biosecurity.py
2
  import os
3
  import requests
4
+ from typing import Dict, Any
5
  from datetime import datetime
6
+
7
+ from .providers import pubmed_fallback_search, run_deepseek_summary
8
 
9
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 
 
10
  NCBI_API_KEY = os.getenv("NCBI_API_KEY")
11
  NCBI_EMAIL = os.getenv("NCBI_EMAIL")
12
 
13
+ RISK_KEYWORDS = {
14
+ "low": ["harmless", "biosafety level 1", "safe", "non-pathogenic"],
15
+ "medium": ["biosafety level 2", "BSL-2", "infectious", "containment"],
16
+ "high": ["BSL-3", "BSL-4", "pandemic potential", "gain-of-function", "biosecurity concern"]
17
+ }
18
+
19
+ def score_biosecurity_risk(text: str) -> int:
20
+ """Score risk based on keywords in AI report."""
21
+ text_lower = text.lower()
22
+ score = 0
23
+ for word in RISK_KEYWORDS["low"]:
24
+ if word in text_lower:
25
+ score += 10
26
+ for word in RISK_KEYWORDS["medium"]:
27
+ if word in text_lower:
28
+ score += 25
29
+ for word in RISK_KEYWORDS["high"]:
30
+ if word in text_lower:
31
+ score += 50
32
+ return min(score, 100) # cap at 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ def run_biosecurity_scan(entity: str) -> Dict[str, Any]:
35
+ """
36
+ Run AI-powered biosecurity risk scan for a given biological entity.
37
+ Includes AI assessment + PubMed literature.
38
+ """
39
  try:
40
+ # AI assessment
41
  prompt = f"""
42
+ You are a synthetic biology & biosecurity expert.
43
+ Assess the potential biosecurity risks of the following entity: {entity}.
44
+ Classify its biosafety level, potential misuse, regulatory concerns, and safe handling guidelines.
45
+ Respond with detailed analysis.
 
 
 
 
 
 
 
 
 
 
 
46
  """
47
+ ai_report = run_deepseek_summary(prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ # Risk score
50
+ score = score_biosecurity_risk(ai_report)
51
 
52
+ # PubMed citations
53
+ citations = pubmed_fallback_search(entity, NCBI_API_KEY, NCBI_EMAIL)
 
54
 
55
+ return {
56
+ "entity": entity,
57
+ "timestamp": datetime.utcnow().isoformat(),
58
+ "risk_score": score,
59
+ "report": ai_report,
60
+ "citations": citations
61
+ }
62
+ except Exception as e:
63
+ return {
64
+ "entity": entity,
65
+ "timestamp": datetime.utcnow().isoformat(),
66
+ "error": str(e),
67
+ "risk_score": 0,
68
+ "report": "Error running biosecurity scan.",
69
+ "citations": []
70
+ }