mgbam commited on
Commit
9d88d95
·
verified ·
1 Parent(s): 311fafd

Update genesis/graphdb.py

Browse files
Files changed (1) hide show
  1. genesis/graphdb.py +97 -72
genesis/graphdb.py CHANGED
@@ -1,81 +1,106 @@
1
  # genesis/graphdb.py
2
- from __future__ import annotations
3
-
4
  import os
5
- from typing import Dict, List
6
-
7
  from neo4j import GraphDatabase
 
8
 
9
  NEO4J_URI = os.getenv("NEO4J_URI")
10
  NEO4J_USER = os.getenv("NEO4J_USER")
11
  NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- def _require_config():
15
- if not (NEO4J_URI and NEO4J_USER and NEO4J_PASSWORD):
16
- raise RuntimeError(
17
- "Neo4j not configured. Set NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD in Space Secrets."
18
- )
19
-
20
-
21
- async def write_topic_and_papers(topic: str, citations: List[Dict[str, str]]) -> Dict[str, int]:
22
- """
23
- Create/merge a Topic node and Paper nodes, and connect them with MENTIONS edges.
24
- Schema:
25
- (t:Topic {id})-[:MENTIONS]->(p:Paper {url}) with p.title set/updated.
26
- Returns: {"nodes": <int>, "rels": <int>}
27
- """
28
- _require_config()
29
- driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
30
-
31
- nodes = 0
32
- rels = 0
33
- try:
34
- with driver.session() as sess:
35
- # Topic
36
- sess.run(
37
- "MERGE (t:Topic {id:$id}) SET t.title=$title",
38
- id=(topic or "Topic")[:200],
39
- title=(topic or "Topic")[:500],
40
- )
41
- nodes += 1
42
-
43
- # Papers + edges
44
- for c in citations or []:
45
- title = (c.get("title") or "citation")[:500]
46
- url = (c.get("url") or "")[:1000]
47
- if not url and not title:
48
- continue
49
-
50
- sess.run(
51
- """
52
- MERGE (p:Paper {url:$url})
53
- ON CREATE SET p.title=$title
54
- ON MATCH SET p.title = coalesce(p.title, $title)
55
- WITH p
56
- MATCH (t:Topic {id:$topic})
57
- MERGE (t)-[:MENTIONS]->(p)
58
- """,
59
- url=url,
60
- title=title,
61
- topic=(topic or "Topic")[:200],
62
- )
63
- nodes += 1
64
- rels += 1
65
-
66
- return {"nodes": nodes, "rels": rels}
67
- finally:
68
- driver.close()
69
-
70
-
71
- # Optional helper for debugging connectivity in a notebook/Space shell
72
- def verify_connection() -> str:
73
- _require_config()
74
- driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
75
- try:
76
- with driver.session() as sess:
77
- res = sess.run("RETURN 1 AS ok").single()
78
- assert res and res["ok"] == 1
79
- return "Neo4j connection OK."
80
- finally:
81
- driver.close()
 
1
  # genesis/graphdb.py
 
 
2
  import os
 
 
3
  from neo4j import GraphDatabase
4
+ from typing import List, Dict, Optional
5
 
6
  NEO4J_URI = os.getenv("NEO4J_URI")
7
  NEO4J_USER = os.getenv("NEO4J_USER")
8
  NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
9
 
10
+ class GraphDB:
11
+ def __init__(self):
12
+ if not all([NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD]):
13
+ raise ValueError("Neo4j credentials are missing from environment variables.")
14
+ self.driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
15
+
16
+ def close(self):
17
+ self.driver.close()
18
+
19
+ # ---------- Basic Helpers ----------
20
+ def run_query(self, query: str, params: Optional[Dict] = None) -> List[Dict]:
21
+ with self.driver.session() as session:
22
+ result = session.run(query, params or {})
23
+ return [record.data() for record in result]
24
+
25
+ def create_node(self, label: str, properties: Dict):
26
+ props = {k: v for k, v in properties.items() if v is not None}
27
+ query = f"MERGE (n:{label} {{name: $name}}) SET n += $props RETURN n"
28
+ return self.run_query(query, {"name": props.get("name"), "props": props})
29
+
30
+ def create_relationship(self, start_label: str, start_name: str,
31
+ rel_type: str, end_label: str, end_name: str):
32
+ query = f"""
33
+ MATCH (a:{start_label} {{name: $start_name}})
34
+ MATCH (b:{end_label} {{name: $end_name}})
35
+ MERGE (a)-[r:{rel_type}]->(b)
36
+ RETURN r
37
+ """
38
+ return self.run_query(query, {
39
+ "start_name": start_name,
40
+ "end_name": end_name
41
+ })
42
+
43
+ # ---------- Domain-Specific Functions ----------
44
+ def add_molecule(self, name: str, description: str = None):
45
+ return self.create_node("Molecule", {"name": name, "description": description})
46
+
47
+ def add_pathway(self, name: str, description: str = None):
48
+ return self.create_node("Pathway", {"name": name, "description": description})
49
+
50
+ def add_company(self, name: str, country: str = None, funding: float = None):
51
+ return self.create_node("Company", {"name": name, "country": country, "funding": funding})
52
+
53
+ def add_trial(self, trial_id: str, title: str = None, status: str = None):
54
+ return self.create_node("Trial", {"name": trial_id, "title": title, "status": status})
55
+
56
+ def add_regulation(self, region: str, title: str, summary: str = None):
57
+ return self.create_node("Regulation", {"name": title, "region": region, "summary": summary})
58
+
59
+ def add_biosecurity_alert(self, title: str, severity: str, details: str = None):
60
+ return self.create_node("BiosecurityAlert", {"name": title, "severity": severity, "details": details})
61
+
62
+ # ---------- Relationships ----------
63
+ def link_molecule_pathway(self, molecule: str, pathway: str):
64
+ return self.create_relationship("Molecule", molecule, "INVOLVED_IN", "Pathway", pathway)
65
+
66
+ def link_company_molecule(self, company: str, molecule: str):
67
+ return self.create_relationship("Company", company, "DEVELOPS", "Molecule", molecule)
68
+
69
+ def link_trial_molecule(self, trial_id: str, molecule: str):
70
+ return self.create_relationship("Trial", trial_id, "TESTS", "Molecule", molecule)
71
+
72
+ def link_company_funder(self, company: str, funder: str):
73
+ return self.create_relationship("Company", company, "FUNDED_BY", "Funder", funder)
74
+
75
+ def link_regulation_pathway(self, regulation: str, pathway: str):
76
+ return self.create_relationship("Regulation", regulation, "REGULATES", "Pathway", pathway)
77
+
78
+ def link_biosecurity_molecule(self, alert: str, molecule: str):
79
+ return self.create_relationship("BiosecurityAlert", alert, "ASSOCIATED_WITH", "Molecule", molecule)
80
+
81
+ # ---------- Queries ----------
82
+ def get_global_funding_network(self) -> List[Dict]:
83
+ query = """
84
+ MATCH (c:Company)-[r:FUNDED_BY]->(f:Funder)
85
+ RETURN c.name AS company, f.name AS funder, c.country AS country, c.funding AS amount
86
+ ORDER BY amount DESC
87
+ """
88
+ return self.run_query(query)
89
+
90
+ def get_pathway_network(self, pathway_name: str) -> List[Dict]:
91
+ query = """
92
+ MATCH (m:Molecule)-[:INVOLVED_IN]->(p:Pathway {name: $pathway})
93
+ OPTIONAL MATCH (m)<-[:DEVELOPS]-(c:Company)
94
+ RETURN m.name AS molecule, c.name AS company
95
+ """
96
+ return self.run_query(query, {"pathway": pathway_name})
97
+
98
+ def get_all_entities(self) -> List[Dict]:
99
+ query = """
100
+ MATCH (n) RETURN DISTINCT labels(n) AS labels, n.name AS name
101
+ """
102
+ return self.run_query(query)
103
+
104
 
105
+ # Singleton instance for app-wide use
106
+ graphdb = GraphDB()