Spaces:
Sleeping
Sleeping
Update genesis/graphdb.py
Browse files- genesis/graphdb.py +97 -72
genesis/graphdb.py
CHANGED
@@ -1,81 +1,106 @@
|
|
1 |
# genesis/graphdb.py
|
2 |
-
from __future__ import annotations
|
3 |
-
|
4 |
import os
|
5 |
-
from typing import Dict, List
|
6 |
-
|
7 |
from neo4j import GraphDatabase
|
|
|
8 |
|
9 |
NEO4J_URI = os.getenv("NEO4J_URI")
|
10 |
NEO4J_USER = os.getenv("NEO4J_USER")
|
11 |
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
raise RuntimeError(
|
17 |
-
"Neo4j not configured. Set NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD in Space Secrets."
|
18 |
-
)
|
19 |
-
|
20 |
-
|
21 |
-
async def write_topic_and_papers(topic: str, citations: List[Dict[str, str]]) -> Dict[str, int]:
|
22 |
-
"""
|
23 |
-
Create/merge a Topic node and Paper nodes, and connect them with MENTIONS edges.
|
24 |
-
Schema:
|
25 |
-
(t:Topic {id})-[:MENTIONS]->(p:Paper {url}) with p.title set/updated.
|
26 |
-
Returns: {"nodes": <int>, "rels": <int>}
|
27 |
-
"""
|
28 |
-
_require_config()
|
29 |
-
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
|
30 |
-
|
31 |
-
nodes = 0
|
32 |
-
rels = 0
|
33 |
-
try:
|
34 |
-
with driver.session() as sess:
|
35 |
-
# Topic
|
36 |
-
sess.run(
|
37 |
-
"MERGE (t:Topic {id:$id}) SET t.title=$title",
|
38 |
-
id=(topic or "Topic")[:200],
|
39 |
-
title=(topic or "Topic")[:500],
|
40 |
-
)
|
41 |
-
nodes += 1
|
42 |
-
|
43 |
-
# Papers + edges
|
44 |
-
for c in citations or []:
|
45 |
-
title = (c.get("title") or "citation")[:500]
|
46 |
-
url = (c.get("url") or "")[:1000]
|
47 |
-
if not url and not title:
|
48 |
-
continue
|
49 |
-
|
50 |
-
sess.run(
|
51 |
-
"""
|
52 |
-
MERGE (p:Paper {url:$url})
|
53 |
-
ON CREATE SET p.title=$title
|
54 |
-
ON MATCH SET p.title = coalesce(p.title, $title)
|
55 |
-
WITH p
|
56 |
-
MATCH (t:Topic {id:$topic})
|
57 |
-
MERGE (t)-[:MENTIONS]->(p)
|
58 |
-
""",
|
59 |
-
url=url,
|
60 |
-
title=title,
|
61 |
-
topic=(topic or "Topic")[:200],
|
62 |
-
)
|
63 |
-
nodes += 1
|
64 |
-
rels += 1
|
65 |
-
|
66 |
-
return {"nodes": nodes, "rels": rels}
|
67 |
-
finally:
|
68 |
-
driver.close()
|
69 |
-
|
70 |
-
|
71 |
-
# Optional helper for debugging connectivity in a notebook/Space shell
|
72 |
-
def verify_connection() -> str:
|
73 |
-
_require_config()
|
74 |
-
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
|
75 |
-
try:
|
76 |
-
with driver.session() as sess:
|
77 |
-
res = sess.run("RETURN 1 AS ok").single()
|
78 |
-
assert res and res["ok"] == 1
|
79 |
-
return "Neo4j connection OK."
|
80 |
-
finally:
|
81 |
-
driver.close()
|
|
|
1 |
# genesis/graphdb.py
|
|
|
|
|
2 |
import os
|
|
|
|
|
3 |
from neo4j import GraphDatabase
|
4 |
+
from typing import List, Dict, Optional
|
5 |
|
6 |
NEO4J_URI = os.getenv("NEO4J_URI")
|
7 |
NEO4J_USER = os.getenv("NEO4J_USER")
|
8 |
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
|
9 |
|
10 |
+
class GraphDB:
|
11 |
+
def __init__(self):
|
12 |
+
if not all([NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD]):
|
13 |
+
raise ValueError("Neo4j credentials are missing from environment variables.")
|
14 |
+
self.driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
|
15 |
+
|
16 |
+
def close(self):
|
17 |
+
self.driver.close()
|
18 |
+
|
19 |
+
# ---------- Basic Helpers ----------
|
20 |
+
def run_query(self, query: str, params: Optional[Dict] = None) -> List[Dict]:
|
21 |
+
with self.driver.session() as session:
|
22 |
+
result = session.run(query, params or {})
|
23 |
+
return [record.data() for record in result]
|
24 |
+
|
25 |
+
def create_node(self, label: str, properties: Dict):
|
26 |
+
props = {k: v for k, v in properties.items() if v is not None}
|
27 |
+
query = f"MERGE (n:{label} {{name: $name}}) SET n += $props RETURN n"
|
28 |
+
return self.run_query(query, {"name": props.get("name"), "props": props})
|
29 |
+
|
30 |
+
def create_relationship(self, start_label: str, start_name: str,
|
31 |
+
rel_type: str, end_label: str, end_name: str):
|
32 |
+
query = f"""
|
33 |
+
MATCH (a:{start_label} {{name: $start_name}})
|
34 |
+
MATCH (b:{end_label} {{name: $end_name}})
|
35 |
+
MERGE (a)-[r:{rel_type}]->(b)
|
36 |
+
RETURN r
|
37 |
+
"""
|
38 |
+
return self.run_query(query, {
|
39 |
+
"start_name": start_name,
|
40 |
+
"end_name": end_name
|
41 |
+
})
|
42 |
+
|
43 |
+
# ---------- Domain-Specific Functions ----------
|
44 |
+
def add_molecule(self, name: str, description: str = None):
|
45 |
+
return self.create_node("Molecule", {"name": name, "description": description})
|
46 |
+
|
47 |
+
def add_pathway(self, name: str, description: str = None):
|
48 |
+
return self.create_node("Pathway", {"name": name, "description": description})
|
49 |
+
|
50 |
+
def add_company(self, name: str, country: str = None, funding: float = None):
|
51 |
+
return self.create_node("Company", {"name": name, "country": country, "funding": funding})
|
52 |
+
|
53 |
+
def add_trial(self, trial_id: str, title: str = None, status: str = None):
|
54 |
+
return self.create_node("Trial", {"name": trial_id, "title": title, "status": status})
|
55 |
+
|
56 |
+
def add_regulation(self, region: str, title: str, summary: str = None):
|
57 |
+
return self.create_node("Regulation", {"name": title, "region": region, "summary": summary})
|
58 |
+
|
59 |
+
def add_biosecurity_alert(self, title: str, severity: str, details: str = None):
|
60 |
+
return self.create_node("BiosecurityAlert", {"name": title, "severity": severity, "details": details})
|
61 |
+
|
62 |
+
# ---------- Relationships ----------
|
63 |
+
def link_molecule_pathway(self, molecule: str, pathway: str):
|
64 |
+
return self.create_relationship("Molecule", molecule, "INVOLVED_IN", "Pathway", pathway)
|
65 |
+
|
66 |
+
def link_company_molecule(self, company: str, molecule: str):
|
67 |
+
return self.create_relationship("Company", company, "DEVELOPS", "Molecule", molecule)
|
68 |
+
|
69 |
+
def link_trial_molecule(self, trial_id: str, molecule: str):
|
70 |
+
return self.create_relationship("Trial", trial_id, "TESTS", "Molecule", molecule)
|
71 |
+
|
72 |
+
def link_company_funder(self, company: str, funder: str):
|
73 |
+
return self.create_relationship("Company", company, "FUNDED_BY", "Funder", funder)
|
74 |
+
|
75 |
+
def link_regulation_pathway(self, regulation: str, pathway: str):
|
76 |
+
return self.create_relationship("Regulation", regulation, "REGULATES", "Pathway", pathway)
|
77 |
+
|
78 |
+
def link_biosecurity_molecule(self, alert: str, molecule: str):
|
79 |
+
return self.create_relationship("BiosecurityAlert", alert, "ASSOCIATED_WITH", "Molecule", molecule)
|
80 |
+
|
81 |
+
# ---------- Queries ----------
|
82 |
+
def get_global_funding_network(self) -> List[Dict]:
|
83 |
+
query = """
|
84 |
+
MATCH (c:Company)-[r:FUNDED_BY]->(f:Funder)
|
85 |
+
RETURN c.name AS company, f.name AS funder, c.country AS country, c.funding AS amount
|
86 |
+
ORDER BY amount DESC
|
87 |
+
"""
|
88 |
+
return self.run_query(query)
|
89 |
+
|
90 |
+
def get_pathway_network(self, pathway_name: str) -> List[Dict]:
|
91 |
+
query = """
|
92 |
+
MATCH (m:Molecule)-[:INVOLVED_IN]->(p:Pathway {name: $pathway})
|
93 |
+
OPTIONAL MATCH (m)<-[:DEVELOPS]-(c:Company)
|
94 |
+
RETURN m.name AS molecule, c.name AS company
|
95 |
+
"""
|
96 |
+
return self.run_query(query, {"pathway": pathway_name})
|
97 |
+
|
98 |
+
def get_all_entities(self) -> List[Dict]:
|
99 |
+
query = """
|
100 |
+
MATCH (n) RETURN DISTINCT labels(n) AS labels, n.name AS name
|
101 |
+
"""
|
102 |
+
return self.run_query(query)
|
103 |
+
|
104 |
|
105 |
+
# Singleton instance for app-wide use
|
106 |
+
graphdb = GraphDB()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|