Spaces:
Running
Running
Update multimodal_module.py
Browse files- multimodal_module.py +748 -1245
multimodal_module.py
CHANGED
@@ -1,1303 +1,806 @@
|
|
1 |
-
#
|
2 |
"""
|
3 |
-
Multimodal SuperAgent
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
- Close-to-Human Brain
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
-
|
13 |
-
-
|
14 |
-
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
-
|
20 |
-
-
|
21 |
-
-
|
22 |
-
-
|
23 |
"""
|
24 |
|
25 |
from __future__ import annotations
|
26 |
-
import os, sys,
|
27 |
from dataclasses import dataclass, field, asdict
|
28 |
-
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
import asyncio
|
30 |
|
31 |
-
#
|
|
|
|
|
|
|
32 |
try:
|
33 |
-
import
|
|
|
|
|
|
|
|
|
34 |
except Exception:
|
35 |
-
|
|
|
36 |
try:
|
37 |
-
import
|
38 |
except Exception:
|
39 |
-
|
40 |
-
|
41 |
-
# Logging
|
42 |
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s")
|
43 |
-
logger = logging.getLogger("SuperAgent")
|
44 |
-
|
45 |
-
# ---------------------------
|
46 |
-
# CONFIG — edit BEFORE running
|
47 |
-
# ---------------------------
|
48 |
-
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
49 |
-
TMP_DIR = os.getenv("SUPERAGENT_TMP", os.path.join(BASE_DIR, "tmp"))
|
50 |
-
CACHE_DIR = os.getenv("SUPERAGENT_CACHE", os.path.join(BASE_DIR, "model_cache"))
|
51 |
-
os.makedirs(TMP_DIR, exist_ok=True)
|
52 |
-
os.makedirs(CACHE_DIR, exist_ok=True)
|
53 |
-
|
54 |
-
# Central configuration: change model paths here or provide config.yaml and set env SUPERAGENT_CONFIG
|
55 |
-
MODEL_CONFIG = {
|
56 |
-
"device": "auto", # "auto" | "cpu" | "cuda"
|
57 |
-
"llm": { # prefer local HF model id or path; optionally set 'backend' to 'llamacpp'
|
58 |
-
"model_path": None, # e.g., "/path/to/Mistral-7B-Instruct"
|
59 |
-
"backend": "transformers", # "transformers" | "llamacpp"
|
60 |
-
"quantize": None, # e.g., "bitsandbytes" or GGUF for llamacpp
|
61 |
-
},
|
62 |
-
"embedder": "sentence-transformers/all-MiniLM-L6-v2",
|
63 |
-
"faiss_index_dir": os.path.join(CACHE_DIR, "faiss_index"),
|
64 |
-
"sdxl_base": None, # e.g., "/path/to/sdxl-base"
|
65 |
-
"sdxl_refiner": None, # optional
|
66 |
-
"sdxl_inpaint": None, # optional
|
67 |
-
"blip_caption": None, # e.g., "Salesforce/blip-image-captioning-base" (optional)
|
68 |
-
"piper_binary": "/usr/local/bin/piper", # optional
|
69 |
-
"piper_voice": None, # optional
|
70 |
-
"allow_web_search": False, # opt-in
|
71 |
-
"safety_blocklist": ["terror", "explosive", "harm"],
|
72 |
-
"knowledge_graph_path": os.path.join(CACHE_DIR, "kg.json"),
|
73 |
-
"memory_persist": True,
|
74 |
-
"memory_file": os.path.join(CACHE_DIR, "longterm_memory.json"),
|
75 |
-
"embed_dim": 384, # MiniLM-L6-v2
|
76 |
-
"auto_learn": True,
|
77 |
-
"vfs_file": os.path.join(CACHE_DIR, "versioned_facts.json"),
|
78 |
-
"web_cache_file": os.path.join(CACHE_DIR, "webcache.json"),
|
79 |
-
"chb_min_confidence": 0.90,
|
80 |
-
"max_memory_items": 10000,
|
81 |
-
"kg_max_nodes": 500,
|
82 |
-
"short_memory_turns": 50,
|
83 |
-
}
|
84 |
-
|
85 |
-
# ---------------------------
|
86 |
-
# Helpers
|
87 |
-
# ---------------------------
|
88 |
-
def uid(prefix="asset"):
|
89 |
-
return f"{prefix}_{uuid.uuid4().hex[:8]}"
|
90 |
-
|
91 |
-
def tmp_path(suffix=""):
|
92 |
-
p = os.path.join(TMP_DIR, f"{uuid.uuid4().hex}{suffix}")
|
93 |
-
os.makedirs(os.path.dirname(p), exist_ok=True)
|
94 |
-
return p
|
95 |
-
|
96 |
-
def cleanup(*paths):
|
97 |
-
for p in paths:
|
98 |
-
try:
|
99 |
-
if not p: continue
|
100 |
-
if os.path.isfile(p): os.remove(p)
|
101 |
-
elif os.path.isdir(p): shutil.rmtree(p)
|
102 |
-
except Exception as e:
|
103 |
-
logger.debug("cleanup failed %s: %s", p, e)
|
104 |
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
# Provenance & Watermark
|
110 |
-
# ---------------------------
|
111 |
-
@dataclass
|
112 |
-
class Provenance:
|
113 |
-
model: str
|
114 |
-
version: str
|
115 |
-
synthetic: bool = True
|
116 |
-
consent: Optional[str] = None
|
117 |
-
created_at: float = field(default_factory=now_ts)
|
118 |
-
extra: Dict[str, Any] = field(default_factory=dict)
|
119 |
-
|
120 |
-
class ProvenanceManager:
|
121 |
-
def attach(self, filepath: str, meta: Union[Provenance, dict]):
|
122 |
-
try:
|
123 |
-
meta_out = asdict(meta) if isinstance(meta, Provenance) else meta
|
124 |
-
with open(filepath + ".prov.json", "w", encoding="utf-8") as f:
|
125 |
-
json.dump(meta_out, f, indent=2, ensure_ascii=False)
|
126 |
-
except Exception as e:
|
127 |
-
logger.warning("provenance attach failed: %s", e)
|
128 |
-
|
129 |
-
def watermark_audio(self, wav_bytes: bytes) -> bytes:
|
130 |
-
# Non-destructive placeholder; does not alter content.
|
131 |
-
return wav_bytes
|
132 |
-
|
133 |
-
def watermark_image(self, pil_img):
|
134 |
-
# Return as-is. Replace with robust watermark if desired.
|
135 |
-
return pil_img
|
136 |
-
|
137 |
-
# ---------------------------
|
138 |
-
# Safety & Consent
|
139 |
-
# ---------------------------
|
140 |
-
@dataclass
|
141 |
-
class ConsentRecord:
|
142 |
-
user_id: str
|
143 |
-
attestation: str
|
144 |
-
ts: float = field(default_factory=now_ts)
|
145 |
-
|
146 |
-
class SafetyManager:
|
147 |
-
def __init__(self, blocklist=None):
|
148 |
-
self.blocklist = blocklist or MODEL_CONFIG["safety_blocklist"]
|
149 |
-
self.consent_log: List[ConsentRecord] = []
|
150 |
-
|
151 |
-
def record_consent(self, user_id: str, attestation: str) -> str:
|
152 |
-
token = hashlib.sha256(f"{user_id}-{attestation}-{time.time()}".encode()).hexdigest()
|
153 |
-
self.consent_log.append(ConsentRecord(user_id=user_id, attestation=attestation))
|
154 |
-
return token
|
155 |
-
|
156 |
-
def is_allowed(self, text: str) -> bool:
|
157 |
-
t = (text or "").lower()
|
158 |
-
return not any(b in t for b in self.blocklist)
|
159 |
-
|
160 |
-
def check_public_figure(self, embedding) -> bool:
|
161 |
-
# Placeholder: implement real similarity against protected embeddings if available
|
162 |
-
return False
|
163 |
-
|
164 |
-
# ---------------------------
|
165 |
-
# Short-term memory (conversation buffer)
|
166 |
-
# ---------------------------
|
167 |
-
class ShortTermMemory:
|
168 |
-
def __init__(self, max_turns=None):
|
169 |
-
self.max_turns = max_turns or MODEL_CONFIG["short_memory_turns"]
|
170 |
-
self.store: Dict[str, List[Dict[str,Any]]] = {}
|
171 |
-
|
172 |
-
def push(self, user_id: str, role: str, content: Any):
|
173 |
-
k = str(user_id)
|
174 |
-
arr = self.store.setdefault(k, [])
|
175 |
-
arr.append({"ts":now_ts(), "role":role, "content":content})
|
176 |
-
if len(arr) > self.max_turns:
|
177 |
-
self.store[k] = arr[-self.max_turns:]
|
178 |
|
179 |
-
|
180 |
-
|
|
|
|
|
|
|
|
|
181 |
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
self.index_dir = index_dir or MODEL_CONFIG["faiss_index_dir"]
|
188 |
-
os.makedirs(self.index_dir, exist_ok=True)
|
189 |
-
self.memfile = MODEL_CONFIG.get("memory_file")
|
190 |
-
self.embed_model_name = embed_model_name or MODEL_CONFIG["embedder"]
|
191 |
-
self.data: List[Dict[str,Any]] = []
|
192 |
-
self.index = None
|
193 |
-
self.embedder = None
|
194 |
-
self.dim = MODEL_CONFIG.get("embed_dim", 384)
|
195 |
-
self._load()
|
196 |
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
213 |
else:
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
try:
|
223 |
-
|
224 |
-
faiss.write_index(self.index, os.path.join(self.index_dir, "index.faiss"))
|
225 |
except Exception:
|
226 |
pass
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
def
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
def export_all(self):
|
288 |
-
return {"count": len(self.data), "items": self.data}
|
289 |
-
|
290 |
-
def import_bulk(self, items: List[Dict[str,Any]]):
|
291 |
-
self.data = items or []
|
292 |
-
emb = self._get_embedder()
|
293 |
-
if emb is not None and self.data:
|
294 |
-
vecs = emb.encode([m["text"] for m in self.data])
|
295 |
-
import numpy as _np
|
296 |
-
self._ensure_index(vecs.shape[1])
|
297 |
-
if self.index is not None:
|
298 |
-
self.index.reset()
|
299 |
-
self.index.add(_np.asarray(vecs, dtype="float32"))
|
300 |
-
self._save()
|
301 |
-
|
302 |
-
# ---------------------------
|
303 |
-
# Knowledge Graph (simple JSON triples)
|
304 |
-
# ---------------------------
|
305 |
-
class KnowledgeGraph:
|
306 |
-
def __init__(self, path=None):
|
307 |
-
self.path = path or MODEL_CONFIG["knowledge_graph_path"]
|
308 |
-
self.graph = {}
|
309 |
-
self._load()
|
310 |
-
|
311 |
-
def _load(self):
|
312 |
-
if os.path.exists(self.path):
|
313 |
-
try:
|
314 |
-
with open(self.path,"r",encoding="utf-8") as f:
|
315 |
-
self.graph = json.load(f)
|
316 |
-
except Exception:
|
317 |
-
self.graph = {}
|
318 |
-
|
319 |
-
def add_fact(self, subj: str, pred: str, obj: str):
|
320 |
-
# cap nodes to avoid unbounded growth
|
321 |
-
if len(self.graph) > MODEL_CONFIG["kg_max_nodes"]:
|
322 |
-
# basic prune: drop oldest 10% of keys by insertion order
|
323 |
-
keys = list(self.graph.keys())
|
324 |
-
for k in keys[:max(1, len(keys)//10)]:
|
325 |
-
self.graph.pop(k, None)
|
326 |
-
k = f"{subj}::{pred}"
|
327 |
-
self.graph.setdefault(k, []).append(obj)
|
328 |
-
self._save()
|
329 |
-
|
330 |
-
def query(self, subj: str, pred: str):
|
331 |
-
return self.graph.get(f"{subj}::{pred}", [])
|
332 |
-
|
333 |
-
def _save(self):
|
334 |
try:
|
335 |
-
|
336 |
-
|
|
|
|
|
337 |
except Exception as e:
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
Fact schema:
|
347 |
-
{
|
348 |
-
"id": str, "claim": str, "value": str, "scope": str|None,
|
349 |
-
"first_seen": ts, "verified_at": ts, "confidence": float,
|
350 |
-
"sources": [{"type": "web|memory|kg|vision|audio|manual", "ref": str, "title": str|None, "time": ts}],
|
351 |
-
"supersedes": str|None, "valid_from": ts|None, "valid_to": ts|None
|
352 |
-
}
|
353 |
-
"""
|
354 |
-
def __init__(self, path=None):
|
355 |
-
self.path = path or MODEL_CONFIG["vfs_file"]
|
356 |
-
self.facts: List[Dict[str,Any]] = []
|
357 |
-
self._load()
|
358 |
-
|
359 |
-
def _load(self):
|
360 |
-
if os.path.exists(self.path):
|
361 |
try:
|
362 |
-
|
363 |
-
self.facts = json.load(f)
|
364 |
except Exception:
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
new_id = uid("fact")
|
378 |
-
rec = {
|
379 |
-
"id": new_id, "claim": claim, "value": value, "scope": scope,
|
380 |
-
"first_seen": now_ts(), "verified_at": now_ts(), "confidence": float(confidence),
|
381 |
-
"sources": sources or [], "supersedes": supersedes,
|
382 |
-
"valid_from": valid_from, "valid_to": valid_to
|
383 |
-
}
|
384 |
-
self.facts.append(rec)
|
385 |
-
self._save()
|
386 |
-
return rec
|
387 |
-
|
388 |
-
def find(self, claim: str) -> List[Dict[str,Any]]:
|
389 |
-
c = claim.strip().lower()
|
390 |
-
return [f for f in self.facts if f.get("claim","").strip().lower()==c]
|
391 |
-
|
392 |
-
def latest(self, claim: str) -> Optional[Dict[str,Any]]:
|
393 |
-
items = self.find(claim)
|
394 |
-
if not items: return None
|
395 |
-
# return the most recent verified_at
|
396 |
-
return sorted(items, key=lambda x: x.get("verified_at", 0), reverse=True)[0]
|
397 |
-
|
398 |
-
def all(self) -> List[Dict[str,Any]]:
|
399 |
-
return self.facts
|
400 |
-
|
401 |
-
# ---------------------------
|
402 |
-
# Web search (HTML scrape + 24h cache)
|
403 |
-
# ---------------------------
|
404 |
-
class WebSearch:
|
405 |
-
def __init__(self, enabled: bool=False, cache_file: Optional[str]=None):
|
406 |
-
self.enabled = bool(enabled)
|
407 |
-
self.cache_file = cache_file or MODEL_CONFIG["web_cache_file"]
|
408 |
-
self.cache = {}
|
409 |
-
self._load_cache()
|
410 |
-
|
411 |
-
def _load_cache(self):
|
412 |
-
if os.path.exists(self.cache_file):
|
413 |
try:
|
414 |
-
|
415 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
416 |
except Exception:
|
417 |
-
|
418 |
-
|
419 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
420 |
try:
|
421 |
-
|
422 |
-
json.dump(self.cache, f, indent=2, ensure_ascii=False)
|
423 |
except Exception:
|
424 |
pass
|
425 |
|
426 |
-
def
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
433 |
try:
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
r = requests.get(url, headers=headers, timeout=8)
|
439 |
-
soup = BeautifulSoup(r.text, "html.parser")
|
440 |
-
results=[]
|
441 |
-
# capture title/link/snippet (≤100 chars)
|
442 |
-
containers = soup.select(".result")[:max_results]
|
443 |
-
for c in containers:
|
444 |
-
a = c.select_one(".result__a")
|
445 |
-
s = c.select_one(".result__snippet")
|
446 |
-
title = a.get_text(strip=True) if a else ""
|
447 |
-
link = a.get("href") if a else ""
|
448 |
-
snippet = (s.get_text(" ", strip=True) if s else "")[:100]
|
449 |
-
if title and link:
|
450 |
-
results.append({"title": title, "link": link, "snippet": snippet})
|
451 |
-
self.cache[key] = {"ts": now_ts(), "hits": results}
|
452 |
-
self._save_cache()
|
453 |
-
return results
|
454 |
except Exception as e:
|
455 |
-
|
456 |
-
return
|
457 |
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
470 |
|
471 |
-
def
|
472 |
-
|
473 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
474 |
try:
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
self._loaded = True
|
485 |
except Exception as e:
|
486 |
-
|
487 |
-
self._loaded = False
|
488 |
-
elif self.backend == "llamacpp":
|
489 |
-
try:
|
490 |
-
import llama_cpp
|
491 |
-
if not self.model_path:
|
492 |
-
raise ValueError("MODEL_CONFIG['llm']['model_path'] not set")
|
493 |
-
self.model = llama_cpp.Llama(model_path=self.model_path)
|
494 |
-
self._loaded = True
|
495 |
-
except Exception as e:
|
496 |
-
logger.error("llamacpp load failed: %s", e)
|
497 |
-
self._loaded = False
|
498 |
else:
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
if self.backend == "transformers":
|
509 |
-
try:
|
510 |
-
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
|
511 |
-
out = self.model.generate(
|
512 |
-
**inputs, max_new_tokens=max_tokens, do_sample=True, temperature=temperature,
|
513 |
-
pad_token_id=self.tokenizer.eos_token_id
|
514 |
-
)
|
515 |
-
return self.tokenizer.decode(out[0], skip_special_tokens=True)
|
516 |
-
except Exception as e:
|
517 |
-
logger.error("LLM generation failed: %s", e)
|
518 |
-
return "[llm-error]"
|
519 |
-
elif self.backend == "llamacpp":
|
520 |
-
try:
|
521 |
-
result = self.model.create(prompt=prompt, max_tokens=max_tokens, temperature=temperature)
|
522 |
-
if isinstance(result, dict) and "choices" in result:
|
523 |
-
return result["choices"][0]["text"]
|
524 |
-
return str(result)
|
525 |
-
except Exception as e:
|
526 |
-
logger.error("llamacpp generation failed: %s", e)
|
527 |
-
return "[llm-error]"
|
528 |
else:
|
529 |
-
return "
|
530 |
-
|
531 |
-
#
|
532 |
-
#
|
533 |
-
#
|
534 |
-
class
|
535 |
-
def __init__(self,
|
536 |
-
self.
|
537 |
-
self.
|
538 |
-
self.
|
539 |
-
self.
|
540 |
-
self.
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
|
554 |
-
|
555 |
-
|
556 |
-
|
557 |
-
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
|
566 |
-
|
567 |
-
|
568 |
-
|
569 |
-
|
570 |
-
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
|
577 |
-
|
578 |
-
|
579 |
-
|
580 |
-
|
581 |
-
|
582 |
-
|
583 |
-
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
-
|
589 |
-
|
590 |
-
|
591 |
-
|
592 |
-
|
593 |
-
|
594 |
-
|
595 |
-
|
596 |
-
|
597 |
-
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
-
|
606 |
-
|
607 |
-
|
608 |
-
|
609 |
-
|
610 |
-
|
611 |
-
|
612 |
-
|
613 |
-
|
614 |
-
|
615 |
-
|
616 |
-
|
617 |
-
|
618 |
-
|
619 |
-
#
|
620 |
-
|
621 |
-
|
622 |
-
def __init__(self):
|
623 |
-
pass
|
624 |
-
|
625 |
-
def analyze(self, video_path: str, max_frames:int=6):
|
626 |
-
try:
|
627 |
-
import imageio
|
628 |
-
from PIL import Image
|
629 |
-
reader = imageio.get_reader(video_path)
|
630 |
-
total = reader.count_frames()
|
631 |
-
step = max(1, total // max_frames) if total else 1
|
632 |
-
frames=[]
|
633 |
-
for i in range(0,total,step):
|
634 |
try:
|
635 |
-
|
636 |
-
p = tmp_path(f"_frame{i}.jpg")
|
637 |
-
Image.fromarray(arr).save(p)
|
638 |
-
frames.append(p)
|
639 |
-
if len(frames) >= max_frames: break
|
640 |
except Exception:
|
641 |
-
|
642 |
-
|
643 |
-
|
644 |
-
|
645 |
-
|
646 |
-
|
647 |
-
""
|
648 |
-
|
649 |
-
|
650 |
-
|
651 |
-
|
652 |
-
|
653 |
-
|
654 |
-
|
655 |
-
|
656 |
-
|
657 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
658 |
try:
|
659 |
-
|
660 |
-
for frame in reader:
|
661 |
-
# duplicate frames "factor" times
|
662 |
-
for _ in range(factor):
|
663 |
-
writer.append_data(frame)
|
664 |
-
writer.close()
|
665 |
-
return {"status":"ok", "output": out_path, "fps": new_fps}
|
666 |
except Exception as e:
|
667 |
-
|
668 |
-
|
669 |
-
|
670 |
-
|
671 |
-
#
|
672 |
-
|
673 |
-
#
|
674 |
-
|
675 |
-
|
676 |
-
self.prov = provenance
|
677 |
-
self.safety = safety
|
678 |
-
self.piper_bin = piper_bin
|
679 |
-
self.piper_voice = piper_voice
|
680 |
-
self.profiles: Dict[str,Dict[str,Any]] = {}
|
681 |
-
|
682 |
-
def extract_embedding(self, wav_path: str) -> Optional[List[float]]:
|
683 |
try:
|
684 |
-
|
685 |
-
|
686 |
-
h = hashlib.sha256(b).digest()
|
687 |
-
return [float(x)/255.0 for x in h[:192]]
|
688 |
except Exception:
|
689 |
-
|
690 |
-
|
691 |
-
|
692 |
-
token = self.safety.record_consent(user_id, consent_text)
|
693 |
-
emb = self.extract_embedding(wav_path)
|
694 |
-
if block_public and self.safety.check_public_figure(emb):
|
695 |
-
return {"status":"rejected","reason":"protected_speaker"}
|
696 |
-
vid = f"voice_{hashlib.sha1((user_id+str(time.time())).encode()).hexdigest()[:10]}"
|
697 |
-
self.profiles[vid] = {"user_id": user_id, "embedding": emb, "consent": token}
|
698 |
-
return {"status":"ok", "voice_id": vid}
|
699 |
-
|
700 |
-
def synthesize(self, voice_id:Optional[str], text:str, emotion:Optional[str]=None, rate:float=1.0, fmt:str="wav"):
|
701 |
-
if not self.safety.is_allowed(text):
|
702 |
-
return None
|
703 |
-
out = tmp_path(f".{fmt}")
|
704 |
-
# Prefer Piper if configured (local CLI)
|
705 |
-
if self.piper_bin and self.piper_voice and os.path.exists(self.piper_bin) and os.path.exists(self.piper_voice):
|
706 |
-
try:
|
707 |
-
p = subprocess.Popen([self.piper_bin, "-m", self.piper_voice, "-f", out],
|
708 |
-
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
709 |
-
p.communicate(input=text, timeout=60)
|
710 |
-
if os.path.exists(out):
|
711 |
-
with open(out,"rb") as f:
|
712 |
-
b = f.read()
|
713 |
-
b2 = self.prov.watermark_audio(b)
|
714 |
-
with open(out,"wb") as f: f.write(b2)
|
715 |
-
self.prov.attach(out, Provenance(model="piper", version="1.0",
|
716 |
-
consent=self.profiles.get(voice_id, {}).get("consent")))
|
717 |
-
return out
|
718 |
-
except Exception as e:
|
719 |
-
logger.debug("piper fail: %s", e)
|
720 |
-
# Fallback pyttsx3
|
721 |
-
try:
|
722 |
-
import pyttsx3
|
723 |
-
eng = pyttsx3.init()
|
724 |
-
try: eng.setProperty("rate", int(200*rate))
|
725 |
-
except Exception: pass
|
726 |
-
eng.save_to_file(text, out); eng.runAndWait()
|
727 |
-
with open(out,"rb") as f:
|
728 |
-
b = f.read()
|
729 |
-
b2 = self.prov.watermark_audio(b)
|
730 |
-
with open(out,"wb") as f: f.write(b2)
|
731 |
-
self.prov.attach(out, Provenance(model="pyttsx3", version="1.0"))
|
732 |
-
return out
|
733 |
-
except Exception as e:
|
734 |
-
logger.debug("pyttsx3 fail: %s", e)
|
735 |
-
# fallback: silent wav (keeps pipeline functional)
|
736 |
try:
|
737 |
-
|
738 |
-
|
739 |
-
|
740 |
-
|
741 |
-
for _ in range(fr*dur): wf.writeframes(struct.pack('<h',0))
|
742 |
-
self.prov.attach(out, Provenance(model="silence", version="0.1"))
|
743 |
-
return out
|
744 |
except Exception:
|
745 |
-
|
746 |
-
|
747 |
-
#
|
748 |
-
|
749 |
-
|
750 |
-
|
751 |
-
|
752 |
-
|
753 |
-
|
754 |
-
|
755 |
-
|
756 |
-
|
757 |
-
|
758 |
-
|
759 |
-
if
|
760 |
-
|
761 |
-
|
762 |
-
|
763 |
-
|
764 |
-
|
765 |
-
|
766 |
-
|
767 |
-
|
768 |
-
|
769 |
-
|
770 |
-
|
771 |
-
|
772 |
-
|
773 |
-
|
774 |
-
|
775 |
-
|
776 |
-
|
777 |
-
return None
|
778 |
-
|
779 |
-
# ---------------------------
|
780 |
-
# Code Sandbox (lightweight)
|
781 |
-
# ---------------------------
|
782 |
-
class CodeSandbox:
|
783 |
-
def __init__(self):
|
784 |
-
pass
|
785 |
-
def run(self, code:str, timeout: int=5) -> Dict[str,Any]:
|
786 |
-
# VERY light sandbox; for real isolation use subprocess + rlimits in your env.
|
787 |
-
try:
|
788 |
-
import io, contextlib
|
789 |
-
buf = io.StringIO()
|
790 |
-
ns = {}
|
791 |
-
with contextlib.redirect_stdout(buf):
|
792 |
-
exec(code, {"__builtins__": {"print": print, "range": range, "len": len}}, ns)
|
793 |
-
out = buf.getvalue()
|
794 |
-
return {"stdout": out, "keys": list(ns.keys())}
|
795 |
-
except Exception as e:
|
796 |
-
return {"error": str(e)}
|
797 |
-
|
798 |
-
# ---------------------------
|
799 |
-
# Agents
|
800 |
-
# ---------------------------
|
801 |
-
class Agent:
|
802 |
-
def __init__(self, name:str, model: LocalLLM, tools:Dict[str,Any]=None):
|
803 |
-
self.name = name
|
804 |
-
self.model = model
|
805 |
-
self.tools = tools or {}
|
806 |
-
|
807 |
-
def act(self, prompt:str, max_tokens=256, temperature=0.7):
|
808 |
-
try:
|
809 |
-
if not self.model.ready():
|
810 |
-
self.model.load()
|
811 |
-
return self.model.generate(prompt, max_tokens=max_tokens, temperature=temperature)
|
812 |
-
except Exception as e:
|
813 |
-
logger.error("Agent %s failed: %s", self.name, e)
|
814 |
-
return f"[{self.name}-error] {e}"
|
815 |
-
|
816 |
-
class AgentHub:
|
817 |
-
def __init__(self, llm: LocalLLM, img_engine: ImageEngine, vid_engine: VideoEngine):
|
818 |
-
self.research = Agent("ResearchAgent", llm, {"web": None})
|
819 |
-
self.coder = Agent("CoderAgent", llm, {"exec": CodeSandbox()})
|
820 |
-
self.designer = Agent("DesignerAgent", llm, {"image": img_engine})
|
821 |
-
self.vid = Agent("VideoAgent", llm, {"video": vid_engine})
|
822 |
-
|
823 |
-
def coordinate(self, user_request:str):
|
824 |
-
if any(k in user_request.lower() for k in ["code","implement","script"]):
|
825 |
-
return self.coder.act(f"Write code for: {user_request}", max_tokens=512)
|
826 |
-
if any(k in user_request.lower() for k in ["design","image","generate"]):
|
827 |
-
return self.designer.act(f"Create an image plan: {user_request}", max_tokens=256)
|
828 |
-
return self.research.act(f"Research and summarize: {user_request}", max_tokens=512)
|
829 |
-
|
830 |
-
# ---------------------------
|
831 |
-
# Perception (lightweight multimodal glue)
|
832 |
-
# ---------------------------
|
833 |
-
class Perception:
|
834 |
-
EMOJI_MAP = {
|
835 |
-
"😂":"joy","😅":"relief","😭":"sad","😡":"anger","😍":"affection","😎":"confident",
|
836 |
-
"🤔":"thinking","😴":"tired","🙄":"annoyed","😇":"polite","😁":"happy","🤣":"joy"
|
837 |
-
}
|
838 |
-
def text_emotion_tags(self, text:str) -> List[str]:
|
839 |
-
tags=set()
|
840 |
-
for ch in text:
|
841 |
-
if ch in self.EMOJI_MAP: tags.add(self.EMOJI_MAP[ch])
|
842 |
-
if re.search(r"\b(sad|upset|angry|frustrated)\b", text.lower()): tags.add("negative")
|
843 |
-
if re.search(r"\b(happy|great|awesome|love)\b", text.lower()): tags.add("positive")
|
844 |
-
return sorted(tags)
|
845 |
-
|
846 |
-
# ---------------------------
|
847 |
-
# Confidence Scorer
|
848 |
-
# ---------------------------
|
849 |
-
class ConfidenceScorer:
|
850 |
-
def score(self, *,
|
851 |
-
source_reliability: float,
|
852 |
-
recency: float,
|
853 |
-
agreement: float,
|
854 |
-
self_consistency: float,
|
855 |
-
retrieval_strength: float,
|
856 |
-
contradictions: float) -> float:
|
857 |
-
# conservative weighting; each in [0,1]
|
858 |
-
w1,w2,w3,w4,w5,w6 = 0.22,0.18,0.18,0.18,0.14,0.10
|
859 |
-
s = (w1*source_reliability +
|
860 |
-
w2*recency +
|
861 |
-
w3*agreement +
|
862 |
-
w4*self_consistency +
|
863 |
-
w5*retrieval_strength -
|
864 |
-
w6*contradictions)
|
865 |
-
return max(0.0, min(1.0, s))
|
866 |
-
|
867 |
-
# ---------------------------
|
868 |
-
# Close-to-Human Brain (CHB)
|
869 |
-
# ---------------------------
|
870 |
-
class CloseToHumanBrain:
|
871 |
-
def __init__(self, llm: LocalLLM, longmem: LongTermMemory, kg: KnowledgeGraph,
|
872 |
-
vfs: VersionedFactStore, web: WebSearch, img: ImageEngine):
|
873 |
-
self.llm = llm
|
874 |
-
self.long = longmem
|
875 |
-
self.kg = kg
|
876 |
-
self.vfs = vfs
|
877 |
-
self.web = web
|
878 |
-
self.img = img
|
879 |
-
self.math = MathEngine()
|
880 |
-
self.perc = Perception()
|
881 |
-
self.scorer = ConfidenceScorer()
|
882 |
-
|
883 |
-
# ---- Retrieval
|
884 |
-
def retrieve(self, user_text: str, k:int=5) -> Tuple[str, List[Dict[str,str]]]:
|
885 |
-
mem_hits = self.long.search(user_text, top_k=min(8,k))
|
886 |
-
mem_ctx = "\n".join([h["text"] for h in mem_hits]) if mem_hits else ""
|
887 |
-
web_hits = self.web.search(user_text, max_results=3) if self.web.enabled else []
|
888 |
-
return mem_ctx, web_hits
|
889 |
-
|
890 |
-
# ---- Drafting
|
891 |
-
def multi_draft(self, prompt_base: str, drafts:int=3, max_tokens:int=384) -> List[str]:
|
892 |
-
outs=[]
|
893 |
-
temps=[0.4, 0.7, 1.0][:max(1,drafts)]
|
894 |
-
if not self.llm.ready():
|
895 |
-
self.llm.load()
|
896 |
-
for t in temps:
|
897 |
-
out = self.llm.generate(prompt_base, max_tokens=max_tokens, temperature=t)
|
898 |
-
outs.append(out)
|
899 |
-
return outs
|
900 |
-
|
901 |
-
# ---- Verification helpers
|
902 |
-
def _estimate_reliability(self, sources: List[Dict[str,Any]]) -> float:
|
903 |
-
if not sources: return 0.4
|
904 |
-
rel=0.0
|
905 |
-
for s in sources:
|
906 |
-
t = s.get("type","")
|
907 |
-
if t=="memory": rel += 0.6
|
908 |
-
elif t=="kg": rel += 0.7
|
909 |
-
elif t=="web": rel += 0.65
|
910 |
-
elif t in ("vision","audio","video"): rel += 0.55
|
911 |
-
else: rel += 0.5
|
912 |
-
return min(1.0, rel / max(1,len(sources)))
|
913 |
-
|
914 |
-
def _recency(self, sources: List[Dict[str,Any]]) -> float:
|
915 |
-
if not sources: return 0.3
|
916 |
-
ages=[]
|
917 |
-
now=now_ts()
|
918 |
-
for s in sources:
|
919 |
-
ts = s.get("time") or now
|
920 |
-
ages.append(max(0.0, now - ts))
|
921 |
-
avg = sum(ages)/len(ages)
|
922 |
-
# map age (in seconds) to [0,1] with simple decay (~1 day half-life)
|
923 |
-
day = 86400.0
|
924 |
-
return max(0.0, min(1.0, 1.0/(1.0 + (avg/day))))
|
925 |
-
|
926 |
-
def _agreement(self, claims: List[str]) -> float:
|
927 |
-
# crude token overlap agreement
|
928 |
-
if not claims: return 0.0
|
929 |
-
base=set(re.findall(r"\w+", claims[0].lower()))
|
930 |
-
agree=1
|
931 |
-
for c in claims[1:]:
|
932 |
-
toks=set(re.findall(r"\w+", c.lower()))
|
933 |
-
if len(base & toks) > 0: agree += 1
|
934 |
-
return agree/len(claims)
|
935 |
-
|
936 |
-
def _self_consistency(self, drafts: List[str]) -> float:
|
937 |
-
# measure average pairwise Jaccard of word sets
|
938 |
-
if not drafts: return 0.0
|
939 |
-
sets=[set(re.findall(r"\w+", d.lower())) for d in drafts]
|
940 |
-
if len(sets)==1: return 1.0
|
941 |
-
pair_scores=[]
|
942 |
-
for i in range(len(sets)):
|
943 |
-
for j in range(i+1,len(sets)):
|
944 |
-
a,b=sets[i],sets[j]
|
945 |
-
inter=len(a & b); union=len(a | b) or 1
|
946 |
-
pair_scores.append(inter/union)
|
947 |
-
return sum(pair_scores)/len(pair_scores)
|
948 |
-
|
949 |
-
def _retrieval_strength(self, mem_ctx: str) -> float:
|
950 |
-
if not mem_ctx: return 0.4
|
951 |
-
# simple function of context length
|
952 |
-
L = len(mem_ctx.split())
|
953 |
-
return max(0.4, min(1.0, math.log10(1+L)/2))
|
954 |
-
|
955 |
-
# ---- Verify & Synthesize
|
956 |
-
def verify_and_respond(self, user_id:str, user_text: str,
|
957 |
-
preferred_lang: Optional[str]=None) -> Dict[str,Any]:
|
958 |
-
# Perception
|
959 |
-
emotion_tags = self.perc.text_emotion_tags(user_text)
|
960 |
-
|
961 |
-
# Retrieve evidence
|
962 |
-
mem_ctx, web_hits = self.retrieve(user_text, k=6)
|
963 |
-
web_ctx = "\n".join([f"{h['title']} ({h['link']}) — {h.get('snippet','')}" for h in web_hits]) if web_hits else ""
|
964 |
-
citations = [{"type":"web","ref": h["link"], "title": h["title"], "time": now_ts()} for h in web_hits]
|
965 |
-
|
966 |
-
# Construct base prompt
|
967 |
-
prompt = (
|
968 |
-
"Persona: helpful, precise assistant.\n"
|
969 |
-
"Use given memory and web snippets as *evidence*.\n"
|
970 |
-
"If a claim is uncertain, narrow it or state limits.\n\n"
|
971 |
-
f"Memory Evidence:\n{mem_ctx}\n\nWeb Evidence:\n{web_ctx}\n\n"
|
972 |
-
f"User: {user_text}\nAssistant:"
|
973 |
-
)
|
974 |
-
|
975 |
-
# Multi-draft
|
976 |
-
drafts = self.multi_draft(prompt, drafts=3, max_tokens=512)
|
977 |
-
|
978 |
-
# Self-consistency + basic math check (if any plain expression present)
|
979 |
-
math_value = None
|
980 |
-
m = re.search(r"(?:calculate|solve)\s*([0-9\.\+\-\*\/\(\)\s^sqrtpiE]+)", user_text, re.I)
|
981 |
-
if m:
|
982 |
-
math_value = self.math.safe_eval(m.group(1))
|
983 |
-
|
984 |
-
# Build sources list (memory + web)
|
985 |
-
sources = citations[:]
|
986 |
-
if mem_ctx:
|
987 |
-
sources.append({"type":"memory","ref":"longterm_memory","title":"long-term memory","time": now_ts()})
|
988 |
-
|
989 |
-
# Score
|
990 |
-
source_rel = self._estimate_reliability(sources)
|
991 |
-
recency = self._recency(sources)
|
992 |
-
agree = self._agreement(drafts)
|
993 |
-
self_cons = self._self_consistency(drafts)
|
994 |
-
retr = self._retrieval_strength(mem_ctx)
|
995 |
-
contradictions = 0.0
|
996 |
-
|
997 |
-
# Compare with VFS (detect contradictions for simple exact match claims)
|
998 |
-
# We extract simple "X is Y" patterns from drafts; very lightweight.
|
999 |
-
simple_claims=[]
|
1000 |
-
for d in drafts:
|
1001 |
-
for sent in re.split(r"[.\n]", d):
|
1002 |
-
m2 = re.search(r"^([\w\s\-]{3,})\s+is\s+([\w\s\-\%\.]{2,})$", sent.strip(), re.I)
|
1003 |
-
if m2:
|
1004 |
-
c = f"{m2.group(1).strip()} is"
|
1005 |
-
v = m2.group(2).strip()
|
1006 |
-
simple_claims.append((c,v))
|
1007 |
-
# check against latest
|
1008 |
-
for c,v in simple_claims:
|
1009 |
-
latest = self.vfs.latest(c)
|
1010 |
-
if latest and latest.get("value") and latest["value"].strip().lower()!=v.lower():
|
1011 |
-
contradictions += 0.5 # penalize disagreement with stored fact
|
1012 |
-
|
1013 |
-
conf = self.scorer.score(
|
1014 |
-
source_reliability=source_rel,
|
1015 |
-
recency=recency,
|
1016 |
-
agreement=agree,
|
1017 |
-
self_consistency=self_cons,
|
1018 |
-
retrieval_strength=retr,
|
1019 |
-
contradictions=contradictions
|
1020 |
-
)
|
1021 |
-
|
1022 |
-
# Choose the most concise draft
|
1023 |
-
best = min(drafts, key=lambda s: len(s) if s else 1e9)
|
1024 |
-
|
1025 |
-
# If math was requested & computed, splice it in with highest certainty
|
1026 |
-
if math_value and "[math-error]" not in math_value and "sympy missing" not in math_value:
|
1027 |
-
best = f"{best}\n\nMath check: {math_value}"
|
1028 |
-
|
1029 |
-
# If confidence < threshold, trim to certain subset
|
1030 |
-
min_conf = MODEL_CONFIG["chb_min_confidence"]
|
1031 |
-
if conf < min_conf:
|
1032 |
-
# Provide narrowed/certain answer: we extract sentences with highest overlap across drafts.
|
1033 |
-
sent_scores=[]
|
1034 |
-
sents = [s.strip() for s in re.split(r"(?<=[\.\!\?])\s+", best) if s.strip()]
|
1035 |
-
for s in sents:
|
1036 |
-
count=sum(1 for d in drafts if s.lower() in d.lower())
|
1037 |
-
sent_scores.append((count, s))
|
1038 |
-
sent_scores.sort(reverse=True)
|
1039 |
-
certain = " ".join([s for cnt,s in sent_scores if cnt>=2]) # present in >=2 drafts
|
1040 |
-
if not certain:
|
1041 |
-
certain = "I'm not fully confident. Here's what is most certain from the evidence I have."
|
1042 |
-
best = certain
|
1043 |
-
|
1044 |
-
# Record any simple claims to VFS as new knowledge (with provenance)
|
1045 |
-
for c,v in simple_claims[:3]:
|
1046 |
-
self.vfs.add_or_update(claim=c, value=v, sources=sources, confidence=float(conf))
|
1047 |
-
|
1048 |
-
# Build final message with optional citations
|
1049 |
-
if citations:
|
1050 |
-
cites = "\n".join([f"- {c['title']} — {c['ref']}" for c in citations])
|
1051 |
-
best_out = f"{best}\n\nConfidence: {conf:.2f}\nSources:\n{cites}"
|
1052 |
-
else:
|
1053 |
-
best_out = f"{best}\n\nConfidence: {conf:.2f}"
|
1054 |
-
|
1055 |
-
return {"reply": best_out, "confidence": conf, "citations": citations}
|
1056 |
-
|
1057 |
-
# ---------------------------
|
1058 |
-
# Orchestrator (ties everything with CHB)
|
1059 |
-
# ---------------------------
|
1060 |
-
class SuperAgent:
|
1061 |
-
def __init__(self, config:dict):
|
1062 |
-
self.cfg = config
|
1063 |
-
self.prov = ProvenanceManager()
|
1064 |
-
self.safety = SafetyManager(blocklist=config.get("safety_blocklist"))
|
1065 |
-
self.short = ShortTermMemory()
|
1066 |
-
self.long = LongTermMemory(index_dir=config.get("faiss_index_dir"), embed_model_name=config.get("embedder"))
|
1067 |
-
self.kg = KnowledgeGraph(config.get("knowledge_graph_path"))
|
1068 |
-
self.vfs = VersionedFactStore(config.get("vfs_file"))
|
1069 |
-
self.web = WebSearch(enabled=bool(config.get("allow_web_search")), cache_file=config.get("web_cache_file"))
|
1070 |
-
# LLM
|
1071 |
-
self.llm = LocalLLM(model_path=config.get("llm",{}).get("model_path"),
|
1072 |
-
backend=config.get("llm",{}).get("backend","transformers"),
|
1073 |
-
device=config.get("device","auto"),
|
1074 |
-
quantize=config.get("llm",{}).get("quantize",None))
|
1075 |
-
# image & video engines
|
1076 |
-
self.image = ImageEngine(base=config.get("sdxl_base"), refiner=config.get("sdxl_refiner"),
|
1077 |
-
inpaint=config.get("sdxl_inpaint"), blip=config.get("blip_caption"),
|
1078 |
-
device=config.get("device","auto"))
|
1079 |
-
self.video = VideoEngine()
|
1080 |
-
self.voice = VoiceEngine(self.prov, self.safety, piper_bin=config.get("piper_binary"), piper_voice=config.get("piper_voice"))
|
1081 |
-
# agents
|
1082 |
-
self.agents = AgentHub(self.llm, self.image, self.video)
|
1083 |
-
# Close-to-Human Brain
|
1084 |
-
self.chb = CloseToHumanBrain(self.llm, self.long, self.kg, self.vfs, self.web, self.image)
|
1085 |
-
|
1086 |
-
def detect_intent(self, text:str) -> str:
|
1087 |
-
t = (text or "").lower().strip()
|
1088 |
-
if t.startswith("/img ") or t.startswith("/image "): return "image"
|
1089 |
-
if t.startswith("/inpaint "): return "inpaint"
|
1090 |
-
if t.startswith("/tts "): return "tts"
|
1091 |
-
if t.startswith("/video "): return "video"
|
1092 |
-
if t.startswith("/vidinterp "): return "vidinterp"
|
1093 |
-
if t.startswith("/kg "): return "kg"
|
1094 |
-
if t.startswith("/agent "): return "agent"
|
1095 |
-
if any(k in t for k in ["solve", "calculate", "integrate", "differentiate"]): return "math"
|
1096 |
-
return "chat"
|
1097 |
-
|
1098 |
-
def handle(self, user_id:str, text:str, preferred_lang:Optional[str]=None) -> Dict[str,Any]:
|
1099 |
-
if not self.safety.is_allowed(text):
|
1100 |
-
return {"status":"blocked","reason":"policy"}
|
1101 |
-
self.short.push(user_id, "user", text)
|
1102 |
|
1103 |
-
|
1104 |
-
|
1105 |
-
|
|
|
1106 |
|
|
|
|
|
1107 |
try:
|
1108 |
-
|
1109 |
-
|
1110 |
-
|
1111 |
-
|
1112 |
-
|
1113 |
-
|
1114 |
-
|
1115 |
-
|
1116 |
-
|
1117 |
-
|
1118 |
-
|
1119 |
-
|
1120 |
-
|
1121 |
-
|
1122 |
-
|
1123 |
-
|
1124 |
-
|
1125 |
-
|
1126 |
-
|
1127 |
-
|
1128 |
-
|
1129 |
-
|
1130 |
-
|
1131 |
-
|
1132 |
-
|
1133 |
-
|
1134 |
-
|
1135 |
-
elif intent == "kg":
|
1136 |
-
reply = "Use /v1/kg endpoints (not exposed in this single file demo) or extend as needed."
|
1137 |
-
|
1138 |
-
elif intent == "agent":
|
1139 |
-
task = text.split(" ",1)[1] if " " in text else ""
|
1140 |
-
out = self.agents.coordinate(task)
|
1141 |
-
# CHB is the final boss: pass agent output through CHB to verify/format
|
1142 |
-
verified = self.chb.verify_and_respond(user_id, f"{task}\n\nAgentDraft:\n{out}", preferred_lang)
|
1143 |
-
reply = verified["reply"]
|
1144 |
-
payload.update({"confidence": verified["confidence"]})
|
1145 |
-
|
1146 |
-
else:
|
1147 |
-
# Default conversational path goes through CHB (final arbiter)
|
1148 |
-
verified = self.chb.verify_and_respond(user_id, text, preferred_lang)
|
1149 |
-
reply = verified["reply"]
|
1150 |
-
payload.update({"confidence": verified["confidence"]})
|
1151 |
-
|
1152 |
-
except Exception as e:
|
1153 |
-
logger.error("Handle failed: %s", e)
|
1154 |
-
reply = f"[error] {e}"
|
1155 |
-
|
1156 |
-
# Learning: store turn + (optionally) extracted facts handled inside CHB
|
1157 |
-
if MODEL_CONFIG.get("auto_learn"):
|
1158 |
-
try:
|
1159 |
-
self.long.add(user_id, text, kind="turn")
|
1160 |
-
except Exception as e:
|
1161 |
-
logger.debug("long-term add failed: %s", e)
|
1162 |
-
|
1163 |
-
self.short.push(user_id, "assistant", reply)
|
1164 |
-
return {"status":"ok", "reply": reply, **payload}
|
1165 |
-
|
1166 |
-
# ---------------------------
|
1167 |
-
# FastAPI + Gradio integration
|
1168 |
-
# ---------------------------
|
1169 |
-
try:
|
1170 |
-
from fastapi import FastAPI, UploadFile, File, Form
|
1171 |
-
from fastapi.responses import FileResponse, JSONResponse
|
1172 |
-
from pydantic import BaseModel
|
1173 |
-
_FASTAPI = True
|
1174 |
-
except Exception:
|
1175 |
-
_FASTAPI = False
|
1176 |
-
|
1177 |
-
try:
|
1178 |
-
import gradio as gr
|
1179 |
-
_GRADIO = True
|
1180 |
-
except Exception:
|
1181 |
-
_GRADIO = False
|
1182 |
-
|
1183 |
-
app = FastAPI(title="Multimodal SuperAgent") if _FASTAPI else None
|
1184 |
-
_AGENT_SINGLETON: Optional[SuperAgent] = None
|
1185 |
|
1186 |
-
|
1187 |
-
|
1188 |
-
|
1189 |
-
# merge env config file if present
|
1190 |
-
cfg_path = os.environ.get("SUPERAGENT_CONFIG")
|
1191 |
-
cfg = MODEL_CONFIG.copy()
|
1192 |
-
if cfg_path and os.path.exists(cfg_path):
|
1193 |
try:
|
1194 |
-
|
1195 |
-
with open(cfg_path,"r",encoding="utf-8") as f:
|
1196 |
-
y = yaml.safe_load(f) or {}
|
1197 |
-
cfg.update(y)
|
1198 |
except Exception:
|
1199 |
-
|
1200 |
-
|
1201 |
-
|
1202 |
-
|
1203 |
-
|
1204 |
-
|
1205 |
-
|
1206 |
-
|
1207 |
-
|
1208 |
-
|
1209 |
-
|
1210 |
-
|
1211 |
-
|
1212 |
-
|
1213 |
-
|
1214 |
-
|
1215 |
-
|
1216 |
-
|
1217 |
-
|
1218 |
-
|
1219 |
-
|
1220 |
-
|
1221 |
-
|
1222 |
-
|
1223 |
-
|
1224 |
-
|
1225 |
-
|
1226 |
-
|
1227 |
-
|
1228 |
-
|
1229 |
-
|
1230 |
-
|
1231 |
-
|
1232 |
-
|
1233 |
-
|
1234 |
-
|
1235 |
-
|
1236 |
-
|
1237 |
-
|
1238 |
-
|
1239 |
-
|
1240 |
-
|
1241 |
-
|
1242 |
-
|
1243 |
-
|
1244 |
-
tmpf = tmp_path("_vid")
|
1245 |
-
with open(tmpf,"wb") as f: f.write(await file.read())
|
1246 |
-
return get_agent().video.analyze(tmpf)
|
1247 |
-
|
1248 |
-
@app.post("/v1/video/interpolate")
|
1249 |
-
async def api_vid_interp(factor: int = Form(2), file: UploadFile = File(...)):
|
1250 |
-
tmpf = tmp_path("_vid")
|
1251 |
-
with open(tmpf,"wb") as f: f.write(await file.read())
|
1252 |
-
return get_agent().video.interpolate_fps(tmpf, factor=max(2, int(factor)))
|
1253 |
-
|
1254 |
-
@app.post("/v1/memory/export")
|
1255 |
-
async def mem_export():
|
1256 |
-
return get_agent().long.export_all()
|
1257 |
-
|
1258 |
-
@app.post("/v1/memory/import")
|
1259 |
-
async def mem_import(items: List[Dict[str,Any]]):
|
1260 |
-
get_agent().long.import_bulk(items)
|
1261 |
-
return {"status":"ok","count":len(items)}
|
1262 |
-
|
1263 |
-
@app.post("/v1/web/toggle")
|
1264 |
-
async def web_toggle(enabled: bool = Form(...)):
|
1265 |
-
get_agent().web.enabled = bool(enabled); return {"enabled":get_agent().web.enabled}
|
1266 |
-
|
1267 |
-
# ---------------------------
|
1268 |
-
# Optional Gradio demo (runs when module executed)
|
1269 |
-
# ---------------------------
|
1270 |
-
def launch_gradio():
|
1271 |
-
if not _GRADIO:
|
1272 |
-
logger.warning("Gradio not installed")
|
1273 |
-
return
|
1274 |
-
agent = get_agent()
|
1275 |
-
with gr.Blocks(title="Multimodal SuperAgent") as demo:
|
1276 |
-
gr.Markdown("# Multimodal SuperAgent v6.0 (CHB)")
|
1277 |
-
with gr.Row():
|
1278 |
-
user_id = gr.Textbox(value="user1", label="User ID")
|
1279 |
-
prompt = gr.Textbox(label="Prompt")
|
1280 |
-
btn = gr.Button("Send")
|
1281 |
-
out = gr.Markdown(label="Reply")
|
1282 |
-
def send(u,p):
|
1283 |
-
res = agent.handle(u,p)
|
1284 |
-
return res.get("reply","")
|
1285 |
-
btn.click(send, [user_id,prompt], out)
|
1286 |
-
demo.launch(server_name="0.0.0.0", server_port=7860)
|
1287 |
|
1288 |
-
# ---------------------------
|
1289 |
-
# CLI
|
1290 |
-
# ---------------------------
|
1291 |
if __name__ == "__main__":
|
1292 |
import argparse
|
1293 |
ap = argparse.ArgumentParser()
|
1294 |
ap.add_argument("--demo", action="store_true")
|
1295 |
-
ap.add_argument("--
|
|
|
1296 |
args = ap.parse_args()
|
1297 |
if args.demo:
|
1298 |
-
|
1299 |
-
|
1300 |
-
|
1301 |
-
|
1302 |
-
|
1303 |
-
|
|
|
1 |
+
# multimodular_modul version 7.0.py
|
2 |
"""
|
3 |
+
Multimodular Module — Multimodal SuperAgent v7.0
|
4 |
+
Upgrade from v6.0 -> v7.0 (in-place upgrade)
|
5 |
+
|
6 |
+
Features added:
|
7 |
+
- CHB (Close-to-Human Brain) is universal "middle-man" for all inputs/outputs.
|
8 |
+
- User-driven retrieval plan flow (AI generates queries; client fetches; submits results).
|
9 |
+
- Opportunistic Creative Skill Vault (media & text benchmarks).
|
10 |
+
- Versioned Fact Store (VFS) enhancements: freshness, controversy scoring, provenance.
|
11 |
+
- Real-time Global Brain (facts, skills, media) sync via WebSocket + optional HuggingFace dataset push.
|
12 |
+
- Automatic local backups (JSON + SQLite), downloadable via API endpoint.
|
13 |
+
- Self-upgrading modules: safe, signed modules can be auto-integrated into runtime.
|
14 |
+
* Auto-exec only when cryptographic signature verification + sandbox available (wasmtime or subprocess sandboxes).
|
15 |
+
* If signing or sandbox not present, modules are stored but not auto-executed.
|
16 |
+
- Universal FastAPI endpoints and CLI demo preserved from v6 with additions.
|
17 |
+
|
18 |
+
Security & Safety:
|
19 |
+
- No CAPTCHA/TLS evasion. Respect robots.txt & user-driven retrieval model.
|
20 |
+
- Self-upgrade requires signature verification (env GLOBAL_SYNC_SIGNING_PUBKEY).
|
21 |
+
- Default: local-only sync. Enable cloud via env variables (HUGGINGFACE_TOKEN, GLOBAL_SYNC_REPO).
|
22 |
+
- Media sync allowed; personal/private data must be filtered before upload.
|
23 |
"""
|
24 |
|
25 |
from __future__ import annotations
|
26 |
+
import os, sys, json, time, uuid, shutil, tempfile, hashlib, base64, logging
|
27 |
from dataclasses import dataclass, field, asdict
|
28 |
+
from typing import Any, Dict, List, Optional, Tuple
|
29 |
+
from pathlib import Path
|
30 |
+
import threading
|
31 |
+
import sqlite3
|
32 |
+
import zipfile
|
33 |
+
import hmac
|
34 |
+
import hashlib
|
35 |
import asyncio
|
36 |
|
37 |
+
# --------------------------
|
38 |
+
# Optional deps (feature unlocks)
|
39 |
+
# --------------------------
|
40 |
+
# pip install fastapi uvicorn pydantic requests websockets python-multipart cryptography wasmtime
|
41 |
try:
|
42 |
+
import fastapi
|
43 |
+
from fastapi import FastAPI, UploadFile, File, Form
|
44 |
+
from fastapi.responses import FileResponse, JSONResponse
|
45 |
+
from pydantic import BaseModel
|
46 |
+
FASTAPI_AVAILABLE = True
|
47 |
except Exception:
|
48 |
+
FASTAPI_AVAILABLE = False
|
49 |
+
|
50 |
try:
|
51 |
+
import requests
|
52 |
except Exception:
|
53 |
+
requests = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
+
try:
|
56 |
+
import websockets
|
57 |
+
except Exception:
|
58 |
+
websockets = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
+
try:
|
61 |
+
from cryptography.hazmat.primitives import serialization, hashes
|
62 |
+
from cryptography.hazmat.primitives.asymmetric import padding
|
63 |
+
CRYPTO_AVAILABLE = True
|
64 |
+
except Exception:
|
65 |
+
CRYPTO_AVAILABLE = False
|
66 |
|
67 |
+
try:
|
68 |
+
import wasmtime
|
69 |
+
WASM_AVAILABLE = True
|
70 |
+
except Exception:
|
71 |
+
WASM_AVAILABLE = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
+
# --------------------------
|
74 |
+
# Logging
|
75 |
+
# --------------------------
|
76 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s")
|
77 |
+
log = logging.getLogger("Multimodular_v7")
|
78 |
+
|
79 |
+
# --------------------------
|
80 |
+
# Config & Paths (edit env or constants)
|
81 |
+
# --------------------------
|
82 |
+
BASE_DIR = Path(os.getenv("MM_BASE_DIR", Path(__file__).parent.resolve()))
|
83 |
+
DATA_DIR = Path(os.getenv("MM_DATA_DIR", BASE_DIR / "mm_data"))
|
84 |
+
BACKUP_DIR = Path(os.getenv("MM_BACKUP_DIR", DATA_DIR / "backups"))
|
85 |
+
TMP_DIR = Path(os.getenv("MM_TMP_DIR", BASE_DIR / "tmp"))
|
86 |
+
CACHE_DIR = Path(os.getenv("MM_CACHE_DIR", BASE_DIR / "cache"))
|
87 |
+
for d in (DATA_DIR, BACKUP_DIR, TMP_DIR, CACHE_DIR):
|
88 |
+
d.mkdir(parents=True, exist_ok=True)
|
89 |
+
|
90 |
+
# Global sync config
|
91 |
+
GLOBAL_SYNC_ENABLED = os.getenv("MM_GLOBAL_SYNC_ENABLED", "false").lower() in ("1","true","yes")
|
92 |
+
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN", None)
|
93 |
+
GLOBAL_SYNC_REPO = os.getenv("GLOBAL_SYNC_REPO", None) # e.g., "username/mm_global_brain"
|
94 |
+
GLOBAL_SYNC_SIGNING_PUBKEY = os.getenv("GLOBAL_SYNC_SIGNING_PUBKEY", None) # PEM public key for verifying modules
|
95 |
+
REALTIME_WS_PORT = int(os.getenv("MM_WS_PORT", "8765"))
|
96 |
+
|
97 |
+
# Auto-upgrade strictness: require signature & sandbox for auto-exec
|
98 |
+
AUTO_UPGRADE_REQUIRE_SIGN = True
|
99 |
+
AUTO_UPGRADE_REQUIRE_SANDBOX = True
|
100 |
+
|
101 |
+
# Backups
|
102 |
+
BACKUP_RETENTION = int(os.getenv("MM_BACKUP_RETENTION", "30")) # keep last N backups
|
103 |
+
|
104 |
+
# CHB confidence threshold
|
105 |
+
CHB_MIN_CONFIDENCE = float(os.getenv("CHB_MIN_CONFIDENCE", "0.85"))
|
106 |
+
|
107 |
+
# --------------------------
|
108 |
+
# Utilities
|
109 |
+
# --------------------------
|
110 |
+
def uid(prefix="id"):
|
111 |
+
return f"{prefix}_{uuid.uuid4().hex[:10]}"
|
112 |
+
|
113 |
+
def now_iso():
|
114 |
+
return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
115 |
+
|
116 |
+
def sha256_b64(data: bytes) -> str:
|
117 |
+
return base64.urlsafe_b64encode(hashlib.sha256(data).digest()).decode()
|
118 |
+
|
119 |
+
def write_json(path: Path, data: Any):
|
120 |
+
tmp = path.with_suffix(".tmp")
|
121 |
+
with open(tmp, "w", encoding="utf-8") as f:
|
122 |
+
json.dump(data, f, ensure_ascii=False, indent=2)
|
123 |
+
tmp.replace(path)
|
124 |
+
|
125 |
+
def read_json(path: Path, default=None):
|
126 |
+
if not path.exists():
|
127 |
+
return default
|
128 |
+
try:
|
129 |
+
with open(path, "r", encoding="utf-8") as f:
|
130 |
+
return json.load(f)
|
131 |
+
except Exception:
|
132 |
+
return default
|
133 |
+
|
134 |
+
# --------------------------
|
135 |
+
# Database: local SQLite wrapper + JSON mirror
|
136 |
+
# --------------------------
|
137 |
+
class LocalDB:
|
138 |
+
def __init__(self, path: Path):
|
139 |
+
self.path = path
|
140 |
+
self.conn = sqlite3.connect(str(self.path))
|
141 |
+
self._init()
|
142 |
+
self.lock = threading.Lock()
|
143 |
+
def _init(self):
|
144 |
+
cur = self.conn.cursor()
|
145 |
+
cur.execute("""CREATE TABLE IF NOT EXISTS facts (
|
146 |
+
id TEXT PRIMARY KEY,
|
147 |
+
claim TEXT,
|
148 |
+
value TEXT,
|
149 |
+
confidence REAL,
|
150 |
+
sources TEXT,
|
151 |
+
first_seen TEXT,
|
152 |
+
last_seen TEXT,
|
153 |
+
controversy REAL,
|
154 |
+
staleness REAL
|
155 |
+
)""")
|
156 |
+
cur.execute("""CREATE TABLE IF NOT EXISTS skills (
|
157 |
+
id TEXT PRIMARY KEY,
|
158 |
+
tag TEXT,
|
159 |
+
kind TEXT,
|
160 |
+
meta TEXT,
|
161 |
+
score REAL,
|
162 |
+
added_at TEXT,
|
163 |
+
synced INTEGER DEFAULT 0
|
164 |
+
)""")
|
165 |
+
cur.execute("""CREATE TABLE IF NOT EXISTS modules (
|
166 |
+
id TEXT PRIMARY KEY,
|
167 |
+
name TEXT,
|
168 |
+
code TEXT,
|
169 |
+
meta TEXT,
|
170 |
+
verified INTEGER DEFAULT 0,
|
171 |
+
autointegrated INTEGER DEFAULT 0,
|
172 |
+
added_at TEXT
|
173 |
+
)""")
|
174 |
+
cur.execute("""CREATE TABLE IF NOT EXISTS backups (
|
175 |
+
id TEXT PRIMARY KEY,
|
176 |
+
path TEXT,
|
177 |
+
created_at TEXT
|
178 |
+
)""")
|
179 |
+
self.conn.commit()
|
180 |
+
def upsert_fact(self, claim, value, confidence, sources, controversy=0.0, staleness=0.0):
|
181 |
+
fid = sha256_b64(claim.encode())[:32]
|
182 |
+
now = now_iso()
|
183 |
+
with self.lock:
|
184 |
+
cur = self.conn.cursor()
|
185 |
+
cur.execute("SELECT id FROM facts WHERE id=?", (fid,))
|
186 |
+
if cur.fetchone():
|
187 |
+
cur.execute("""UPDATE facts SET value=?, confidence=?, sources=?, last_seen=?, controversy=?, staleness=? WHERE id=?""",
|
188 |
+
(value, float(confidence), json.dumps(sources), now, float(controversy), float(staleness), fid))
|
189 |
else:
|
190 |
+
cur.execute("""INSERT INTO facts (id,claim,value,confidence,sources,first_seen,last_seen,controversy,staleness)
|
191 |
+
VALUES (?,?,?,?,?,?,?,?,?)""",
|
192 |
+
(fid, claim, value, float(confidence), json.dumps(sources), now, now, float(controversy), float(staleness)))
|
193 |
+
self.conn.commit()
|
194 |
+
return fid
|
195 |
+
def add_skill(self, tag, kind, meta, score):
|
196 |
+
sid = uid("skill")
|
197 |
+
now = now_iso()
|
198 |
+
with self.lock:
|
199 |
+
self.conn.execute("INSERT INTO skills (id,tag,kind,meta,score,added_at) VALUES (?,?,?,?,?,?)",
|
200 |
+
(sid, tag, kind, json.dumps(meta), float(score), now))
|
201 |
+
self.conn.commit()
|
202 |
+
return sid
|
203 |
+
def add_module(self, name, code, meta, verified=0, autointegrated=0):
|
204 |
+
mid = uid("mod")
|
205 |
+
now = now_iso()
|
206 |
+
with self.lock:
|
207 |
+
self.conn.execute("INSERT INTO modules (id,name,code,meta,verified,autointegrated,added_at) VALUES (?,?,?,?,?,?,?)",
|
208 |
+
(mid, name, code, json.dumps(meta), int(verified), int(autointegrated), now))
|
209 |
+
self.conn.commit()
|
210 |
+
return mid
|
211 |
+
def list_facts(self):
|
212 |
+
cur = self.conn.cursor(); cur.execute("SELECT * FROM facts"); rows=cur.fetchall()
|
213 |
+
cols=[c[0] for c in cur.description]
|
214 |
+
return [dict(zip(cols, r)) for r in rows]
|
215 |
+
def list_skills(self):
|
216 |
+
cur = self.conn.cursor(); cur.execute("SELECT * FROM skills"); rows=cur.fetchall()
|
217 |
+
cols=[c[0] for c in cur.description]
|
218 |
+
return [dict(zip(cols, r)) for r in rows]
|
219 |
+
def list_modules(self):
|
220 |
+
cur = self.conn.cursor(); cur.execute("SELECT * FROM modules"); rows=cur.fetchall()
|
221 |
+
cols=[c[0] for c in cur.description]
|
222 |
+
return [dict(zip(cols, r)) for r in rows]
|
223 |
+
def mark_module_verified(self, module_id, verified=1):
|
224 |
+
with self.lock:
|
225 |
+
self.conn.execute("UPDATE modules SET verified=? WHERE id=?", (int(verified), module_id))
|
226 |
+
self.conn.commit()
|
227 |
+
def mark_module_autointegrated(self, module_id, val=1):
|
228 |
+
with self.lock:
|
229 |
+
self.conn.execute("UPDATE modules SET autointegrated=? WHERE id=?", (int(val), module_id))
|
230 |
+
self.conn.commit()
|
231 |
+
def add_backup(self, path):
|
232 |
+
bid = uid("bak")
|
233 |
+
now = now_iso()
|
234 |
+
with self.lock:
|
235 |
+
self.conn.execute("INSERT INTO backups (id,path,created_at) VALUES (?,?,?)", (bid, str(path), now))
|
236 |
+
self.conn.commit()
|
237 |
+
self._prune_backups()
|
238 |
+
return bid
|
239 |
+
def _prune_backups(self):
|
240 |
+
cur = self.conn.cursor(); cur.execute("SELECT id,path,created_at FROM backups ORDER BY created_at DESC")
|
241 |
+
rows = cur.fetchall()
|
242 |
+
if len(rows) <= BACKUP_RETENTION: return
|
243 |
+
for r in rows[BACKUP_RETENTION:]:
|
244 |
+
pid, p, _ = r
|
245 |
try:
|
246 |
+
if os.path.exists(p): os.remove(p)
|
|
|
247 |
except Exception:
|
248 |
pass
|
249 |
+
self.conn.execute("DELETE FROM backups WHERE id=?", (pid,))
|
250 |
+
self.conn.commit()
|
251 |
+
|
252 |
+
# --------------------------
|
253 |
+
# VFS and Creative Skill Vault (JSON + SQLite)
|
254 |
+
# --------------------------
|
255 |
+
class VFS:
|
256 |
+
def __init__(self, db: LocalDB):
|
257 |
+
self.db = db
|
258 |
+
def store_fact(self, claim:str, value:str, sources:List[Dict[str,Any]], confidence:float, controversy:float=0.0):
|
259 |
+
# staleness computed from source dates (simple)
|
260 |
+
staleness = 0.0
|
261 |
+
for s in (sources or []):
|
262 |
+
dt = s.get("date")
|
263 |
+
if dt:
|
264 |
+
try:
|
265 |
+
# naive parse as ISO; compute days
|
266 |
+
t = time.mktime(time.strptime(dt[:19], "%Y-%m-%dT%H:%M:%S"))
|
267 |
+
age_days = max(0, (time.time() - t)/86400.0)
|
268 |
+
staleness = max(staleness, min(1.0, age_days/365.0))
|
269 |
+
except Exception:
|
270 |
+
continue
|
271 |
+
fid = self.db.upsert_fact(claim, value, confidence, sources, controversy, staleness)
|
272 |
+
return fid
|
273 |
+
def query(self, q:str):
|
274 |
+
# naive substring search
|
275 |
+
res = self.db.list_facts()
|
276 |
+
qlow = q.lower()
|
277 |
+
return [r for r in res if qlow in (r.get("claim") or "").lower() or qlow in (r.get("value") or "").lower()]
|
278 |
+
|
279 |
+
class CreativeSkillVault:
|
280 |
+
def __init__(self, db: LocalDB):
|
281 |
+
self.db = db
|
282 |
+
def add_benchmark(self, tag:str, kind:str, meta:Dict[str,Any], score:float):
|
283 |
+
sid = self.db.add_skill(tag, kind, meta, score)
|
284 |
+
return sid
|
285 |
+
def top_by_tag(self, tag:str, k:int=5):
|
286 |
+
all_skills = self.db.list_skills()
|
287 |
+
filtered = [s for s in all_skills if s.get("tag")==tag]
|
288 |
+
filtered.sort(key=lambda x: x.get("score",0), reverse=True)
|
289 |
+
return filtered[:k]
|
290 |
+
|
291 |
+
# --------------------------
|
292 |
+
# Global Sync: Hugging Face push & WebSocket real-time (simple)
|
293 |
+
# --------------------------
|
294 |
+
class GlobalSync:
|
295 |
+
def __init__(self, db: LocalDB, hf_token:Optional[str]=None, repo:Optional[str]=None):
|
296 |
+
self.db = db
|
297 |
+
self.hf_token = hf_token
|
298 |
+
self.repo = repo
|
299 |
+
self.ws_clients = set()
|
300 |
+
self.ws_server_task = None
|
301 |
+
self.loop = None
|
302 |
+
self.lock = threading.Lock()
|
303 |
+
# --- push facts/skills package to Hugging Face dataset via simple HTTP (requires token & repo)
|
304 |
+
def push_to_hf(self, package:Dict[str,Any]) -> Tuple[bool,str]:
|
305 |
+
if not (self.hf_token and self.repo and requests):
|
306 |
+
return False, "huggingface not configured or requests missing"
|
307 |
+
# Minimal implementation: upload JSON file to HF repo via API
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
308 |
try:
|
309 |
+
url = f"https://huggingface.co/api/repos/create"
|
310 |
+
# Note: full implementation requires use of hf_hub or dataset APIs; here we do a simple placeholder
|
311 |
+
# We recommend using huggingface_hub library in production.
|
312 |
+
return False, "HF push requires huggingface_hub implementation; configure HF client"
|
313 |
except Exception as e:
|
314 |
+
return False, str(e)
|
315 |
+
# --- broadcast to connected WebSocket clients (realtime)
|
316 |
+
async def ws_broadcast(self, message:Dict[str,Any]):
|
317 |
+
if websockets is None:
|
318 |
+
return
|
319 |
+
data = json.dumps(message)
|
320 |
+
clients = list(self.ws_clients)
|
321 |
+
for ws in clients:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
322 |
try:
|
323 |
+
await ws.send(data)
|
|
|
324 |
except Exception:
|
325 |
+
try:
|
326 |
+
self.ws_clients.remove(ws)
|
327 |
+
except Exception:
|
328 |
+
pass
|
329 |
+
# --- start a simple websocket server to accept other CHBs / clients that want live updates
|
330 |
+
def start_ws_server(self, host="0.0.0.0", port=REALTIME_WS_PORT):
|
331 |
+
if websockets is None:
|
332 |
+
log.warning("websockets library missing; realtime sync disabled")
|
333 |
+
return
|
334 |
+
async def handler(websocket, path):
|
335 |
+
log.info("WS client connected")
|
336 |
+
self.ws_clients.add(websocket)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
337 |
try:
|
338 |
+
async for msg in websocket:
|
339 |
+
# accept 'ping' or 'submit' messages
|
340 |
+
try:
|
341 |
+
data = json.loads(msg)
|
342 |
+
typ = data.get("type")
|
343 |
+
if typ == "submit_skill":
|
344 |
+
payload = data.get("payload")
|
345 |
+
# minimal processing: store skill locally and broadcast
|
346 |
+
tag = payload.get("tag","global")
|
347 |
+
kind = payload.get("kind","image")
|
348 |
+
meta = payload.get("meta",{})
|
349 |
+
score = float(payload.get("score", 0.5))
|
350 |
+
self.db.add_skill(tag, kind, meta, score)
|
351 |
+
await self.ws_broadcast({"type":"skill_added","tag":tag,"kind":kind,"meta":meta,"score":score})
|
352 |
+
except Exception:
|
353 |
+
pass
|
354 |
except Exception:
|
355 |
+
pass
|
356 |
+
finally:
|
357 |
+
try: self.ws_clients.remove(websocket)
|
358 |
+
except Exception: pass
|
359 |
+
log.info("WS client disconnected")
|
360 |
+
log.info("Starting WebSocket server on %s:%d", host, port)
|
361 |
+
self.loop = asyncio.new_event_loop()
|
362 |
+
asyncio.set_event_loop(self.loop)
|
363 |
+
start_server = websockets.serve(handler, host, port)
|
364 |
+
self.ws_server_task = self.loop.run_until_complete(start_server)
|
365 |
try:
|
366 |
+
self.loop.run_forever()
|
|
|
367 |
except Exception:
|
368 |
pass
|
369 |
|
370 |
+
def run_ws_in_thread(self, host="0.0.0.0", port=REALTIME_WS_PORT):
|
371 |
+
t = threading.Thread(target=self.start_ws_server, args=(host,port), daemon=True)
|
372 |
+
t.start()
|
373 |
+
return t
|
374 |
+
|
375 |
+
# --------------------------
|
376 |
+
# ModuleManager: Verify & Sandbox auto-integration of learned modules
|
377 |
+
# --------------------------
|
378 |
+
class ModuleManager:
|
379 |
+
def __init__(self, db: LocalDB, signing_pubkey_pem:Optional[str]=None):
|
380 |
+
self.db = db
|
381 |
+
self.signing_pubkey_pem = signing_pubkey_pem
|
382 |
+
self.sandbox_available = WASM_AVAILABLE # prefer WASM sandbox if available
|
383 |
+
self.lock = threading.Lock()
|
384 |
+
|
385 |
+
def verify_signature(self, code: bytes, signature_b64: str) -> bool:
|
386 |
+
if not (CRYPTO_AVAILABLE and self.signing_pubkey_pem):
|
387 |
+
log.warning("Crypto or public key not available, cannot verify signature")
|
388 |
+
return False
|
389 |
try:
|
390 |
+
pub = serialization.load_pem_public_key(self.signing_pubkey_pem.encode())
|
391 |
+
sig = base64.b64decode(signature_b64)
|
392 |
+
pub.verify(sig, code, padding.PKCS1v15(), hashes.SHA256())
|
393 |
+
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
394 |
except Exception as e:
|
395 |
+
log.warning("signature verification failed: %s", e)
|
396 |
+
return False
|
397 |
|
398 |
+
def sandbox_run_wasm(self, wasm_bytes: bytes, func_name: str="run", inputs: Optional[dict]=None, timeout: int=5) -> Tuple[bool,str]:
|
399 |
+
if not WASM_AVAILABLE:
|
400 |
+
return False, "wasm runtime not available"
|
401 |
+
try:
|
402 |
+
# create store & module
|
403 |
+
engine = wasmtime.Engine()
|
404 |
+
module = wasmtime.Module(engine, wasm_bytes)
|
405 |
+
store = wasmtime.Store(engine)
|
406 |
+
instance = wasmtime.Instance(store, module, [])
|
407 |
+
# This is a very conservative pattern — real WASM modules need standard interface; here we just attempt safe run if exposes memory/free functions
|
408 |
+
# For safety, we do not invoke arbitrary functions unless module authors follow the expected interface
|
409 |
+
# We'll attempt to call an exported function named 'run' that returns int
|
410 |
+
if hasattr(instance.exports, func_name):
|
411 |
+
fn = instance.exports.__getattr__(func_name)
|
412 |
+
try:
|
413 |
+
res = fn()
|
414 |
+
return True, f"wasm-run-res:{res}"
|
415 |
+
except Exception as e:
|
416 |
+
return False, f"wasm-run-exc:{e}"
|
417 |
+
else:
|
418 |
+
return False, "wasm module lacks 'run' export"
|
419 |
+
except Exception as e:
|
420 |
+
return False, f"wasm-failed:{e}"
|
421 |
+
|
422 |
+
def sandbox_run_subprocess(self, code_str: str, timeout: int=5) -> Tuple[bool,str]:
|
423 |
+
# Very limited subprocess sandbox: write file, run in subprocess with restricted env and timeout.
|
424 |
+
# NOTE: this is not fully secure against malicious code. Use real OS-level sandboxing for production.
|
425 |
+
tmp = Path(TMP_DIR) / f"module_{uid()}.py"
|
426 |
+
tmp.write_text(code_str, encoding="utf-8")
|
427 |
+
import subprocess, shlex
|
428 |
+
try:
|
429 |
+
p = subprocess.run([sys.executable, str(tmp)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=timeout, check=False)
|
430 |
+
out = p.stdout.decode()[:4000]
|
431 |
+
err = p.stderr.decode()[:2000]
|
432 |
+
return True, out + ("\nERR:\n" + err if err else "")
|
433 |
+
except subprocess.TimeoutExpired:
|
434 |
+
return False, "timeout"
|
435 |
+
except Exception as e:
|
436 |
+
return False, f"exec-error:{e}"
|
437 |
+
finally:
|
438 |
+
try: tmp.unlink()
|
439 |
+
except Exception: pass
|
440 |
|
441 |
+
def integrate_module(self, name: str, code: str, signature_b64: Optional[str]=None, autointegrate: bool=True) -> Dict[str,Any]:
|
442 |
+
# Store module first
|
443 |
+
meta = {"name": name, "signature": bool(signature_b64), "autointegrate": bool(autointegrate)}
|
444 |
+
mid = self.db.add_module(name, code, meta, verified=0, autointegrated=0)
|
445 |
+
# Verify signature if present and required
|
446 |
+
if AUTO_UPGRADE_REQUIRE_SIGN:
|
447 |
+
if not signature_b64 or not self.verify_signature(code.encode(), signature_b64):
|
448 |
+
return {"ok": False, "reason": "signature_missing_or_invalid", "module_id": mid}
|
449 |
+
# Sandbox-run tests
|
450 |
+
ran_ok = False; run_info = None
|
451 |
+
if self.sandbox_available and AUTO_UPGRADE_REQUIRE_SANDBOX:
|
452 |
+
# expect code to be WASM base64 (prefer) or python code string. Detect if code is base64 wasm by heuristic.
|
453 |
try:
|
454 |
+
# try decode base64, check for wasm magic
|
455 |
+
raw = base64.b64decode(code)
|
456 |
+
if raw[:4] == b"\x00asm":
|
457 |
+
ok, info = self.sandbox_run_wasm(raw)
|
458 |
+
ran_ok, run_info = ok, info
|
459 |
+
else:
|
460 |
+
# treat as python source
|
461 |
+
ok, info = self.sandbox_run_subprocess(code)
|
462 |
+
ran_ok, run_info = ok, info
|
|
|
463 |
except Exception as e:
|
464 |
+
ran_ok, run_info = False, f"sandbox-error:{e}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
465 |
else:
|
466 |
+
# sandbox not available; do not autointegrate (security)
|
467 |
+
ran_ok, run_info = False, "sandbox-not-available"
|
468 |
+
# If sandboxed OK and autointegrate allowed, mark module autointegrated and (optionally) import into runtime
|
469 |
+
if ran_ok and autointegrate:
|
470 |
+
self.db.mark_module_verified(mid, verified=1)
|
471 |
+
# For safety, we will NOT exec arbitrary Python into this process.
|
472 |
+
# Instead, save module to disk as a safe package and mark autointegrated. A separate process can load it.
|
473 |
+
self.db.mark_module_autointegrated(mid, val=1)
|
474 |
+
return {"ok": True, "module_id": mid, "sandbox_result": run_info}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
475 |
else:
|
476 |
+
return {"ok": False, "module_id": mid, "sandbox_result": run_info}
|
477 |
+
|
478 |
+
# --------------------------
|
479 |
+
# CHB — Universal middleman (upgrades v6 behaviour)
|
480 |
+
# --------------------------
|
481 |
+
class CHB:
|
482 |
+
def __init__(self, db: LocalDB, vfs: VFS, csv: CreativeSkillVault, module_mgr: ModuleManager, global_sync: GlobalSync):
|
483 |
+
self.db = db
|
484 |
+
self.vfs = vfs
|
485 |
+
self.csv = csv
|
486 |
+
self.module_mgr = module_mgr
|
487 |
+
self.global_sync = global_sync
|
488 |
+
# lightweight internal state
|
489 |
+
self.min_conf = CHB_MIN_CONFIDENCE
|
490 |
+
|
491 |
+
def perceive(self, incoming: Dict[str,Any]) -> Dict[str,Any]:
|
492 |
+
# normalize inputs (text,image,audio,video,plan_results)
|
493 |
+
kind = "text"
|
494 |
+
if incoming.get("image") or incoming.get("image_path"): kind="image"
|
495 |
+
if incoming.get("audio") or incoming.get("audio_path"): kind="audio"
|
496 |
+
if incoming.get("video") or incoming.get("video_path"): kind="video"
|
497 |
+
if incoming.get("plan_results"): kind="plan_results"
|
498 |
+
return {"kind": kind, "payload": incoming}
|
499 |
+
|
500 |
+
def plan(self, text: str) -> Dict[str,Any]:
|
501 |
+
# produce a user-driven retrieval plan
|
502 |
+
queries = []
|
503 |
+
queries.append({"q": text, "type":"web", "max_results":5})
|
504 |
+
# image/video heuristics
|
505 |
+
if any(k in text.lower() for k in ["image","design","render","photo","logo","illustration","concept"]):
|
506 |
+
queries.append({"q": text + " high quality concept art", "type":"image", "max_results":8})
|
507 |
+
if any(k in text.lower() for k in ["video","footage","tour","demo","walkthrough"]):
|
508 |
+
queries.append({"q": text + " video", "type":"video", "max_results":4})
|
509 |
+
plan = {"id": uid("plan"), "queries": queries, "created_at": now_iso(), "instructions": "Run these queries locally on user's device and return structured results (web/images/videos/audio)."}
|
510 |
+
return plan
|
511 |
+
|
512 |
+
def verify(self, plan_results: Optional[Dict[str,Any]], local_tool_outputs: Optional[List[Dict[str,Any]]]=None) -> Dict[str,Any]:
|
513 |
+
# compute reliability, controversy, citations
|
514 |
+
citations = []
|
515 |
+
reliability = 0.4
|
516 |
+
controversy = 0.0
|
517 |
+
if plan_results:
|
518 |
+
web = plan_results.get("web") or []
|
519 |
+
domains = set()
|
520 |
+
for r in web:
|
521 |
+
u = r.get("url") or r.get("link") or ""
|
522 |
+
domains.add(u.split("/")[2] if "/" in u else u)
|
523 |
+
citations.append({"title": r.get("title"), "url": u, "date": r.get("date")})
|
524 |
+
reliability = min(1.0, 0.2 + 0.1*len(domains))
|
525 |
+
# controversy: if two top results contradict in short text heuristics
|
526 |
+
if len(web) >= 2:
|
527 |
+
s0 = web[0].get("snippet","").lower()
|
528 |
+
s1 = web[1].get("snippet","").lower()
|
529 |
+
if any(w in s1 for w in ["not", "contradict", "dispute"]) or any(w in s0 for w in ["not","contradict","dispute"]):
|
530 |
+
controversy = 0.5
|
531 |
+
# tool outputs contribution
|
532 |
+
tool_bonus = 0.0
|
533 |
+
for t in (local_tool_outputs or []):
|
534 |
+
if t.get("tool") == "image" and t.get("ok"): tool_bonus += 0.2
|
535 |
+
if t.get("tool") == "math" and not t.get("result","").lower().startswith("math error"): tool_bonus += 0.2
|
536 |
+
confidence = min(1.0, reliability*0.6 + tool_bonus)
|
537 |
+
return {"confidence": confidence, "reliability": reliability, "controversy": controversy, "citations": citations}
|
538 |
+
|
539 |
+
def opportunistic_learning(self, plan_results: Dict[str,Any]):
|
540 |
+
# extract images/videos/audio and store as skill benchmarks if quality high
|
541 |
+
images = plan_results.get("images", []) or []
|
542 |
+
for im in images:
|
543 |
+
path = im.get("path") or im.get("url")
|
544 |
+
# naive quality score
|
545 |
+
score = float(im.get("quality_score", 0.6))
|
546 |
+
tag = im.get("tags",[ "web" ])[0] if im.get("tags") else "web"
|
547 |
+
meta = {"source": path, "desc": im.get("caption") or im.get("alt") or "", "origin":"user_client"}
|
548 |
+
self.csv.add_benchmark(tag, "image", meta, score)
|
549 |
+
videos = plan_results.get("videos", []) or []
|
550 |
+
for v in videos:
|
551 |
+
path = v.get("path") or v.get("url")
|
552 |
+
score = float(v.get("quality_score", 0.6))
|
553 |
+
tag = v.get("tags",[ "web" ])[0] if v.get("tags") else "web"
|
554 |
+
meta = {"source": path, "desc": v.get("caption") or "", "origin":"user_client"}
|
555 |
+
self.csv.add_benchmark(tag, "video", meta, score)
|
556 |
+
audios = plan_results.get("audios", []) or []
|
557 |
+
for a in audios:
|
558 |
+
path = a.get("path") or a.get("url")
|
559 |
+
score = float(a.get("quality_score", 0.6))
|
560 |
+
tag = a.get("tags",[ "web" ])[0] if a.get("tags") else "web"
|
561 |
+
meta = {"source": path, "desc": a.get("caption") or "", "origin":"user_client"}
|
562 |
+
self.csv.add_benchmark(tag, "audio", meta, score)
|
563 |
+
# push to global sync immediately
|
564 |
+
if GLOBAL_SYNC_ENABLED:
|
565 |
+
payload = {"type":"skill_update", "time": now_iso(), "added": len(images)+len(videos)+len(audios)}
|
566 |
+
# best-effort: broadcast via websocket
|
567 |
+
if self.global_sync:
|
568 |
+
loop = asyncio.new_event_loop()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
569 |
try:
|
570 |
+
loop.run_until_complete(self.global_sync.ws_broadcast(payload))
|
|
|
|
|
|
|
|
|
571 |
except Exception:
|
572 |
+
pass
|
573 |
+
|
574 |
+
def handle_plan_results(self, plan_id: str, plan_results: Dict[str,Any], local_tool_outputs: Optional[List[Dict[str,Any]]]=None):
|
575 |
+
# verify, opportunistic learn, store facts in VFS
|
576 |
+
v = self.verify(plan_results, local_tool_outputs)
|
577 |
+
# store simple fact example: top web title as a fact
|
578 |
+
web = plan_results.get("web", []) or []
|
579 |
+
if web:
|
580 |
+
top = web[0]
|
581 |
+
claim = top.get("title","").strip()
|
582 |
+
value = top.get("snippet","").strip()
|
583 |
+
sources = [{"url": top.get("url") or top.get("link"), "title": top.get("title"), "date": top.get("date")}]
|
584 |
+
self.vfs.store_fact(claim, value, sources, float(v.get("confidence",0.4)), controversy=v.get("controversy",0.0))
|
585 |
+
# opportunistic learning
|
586 |
+
self.opportunistic_learning(plan_results)
|
587 |
+
return v
|
588 |
+
|
589 |
+
# --------------------------
|
590 |
+
# Auto-backup & Export
|
591 |
+
# --------------------------
|
592 |
+
class BackupManager:
|
593 |
+
def __init__(self, db: LocalDB, data_dir: Path, backup_dir: Path):
|
594 |
+
self.db = db
|
595 |
+
self.data_dir = data_dir
|
596 |
+
self.backup_dir = backup_dir
|
597 |
+
self.lock = threading.Lock()
|
598 |
+
def create_backup(self) -> str:
|
599 |
+
with self.lock:
|
600 |
+
ts = now_iso().replace(":", "-")
|
601 |
+
out_path = self.backup_dir / f"mm_backup_{ts}.zip"
|
602 |
+
with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED) as zf:
|
603 |
+
# include SQLite DB file if exists
|
604 |
+
try:
|
605 |
+
dbf = Path(self.db.path)
|
606 |
+
if dbf.exists():
|
607 |
+
zf.write(str(dbf), arcname=dbf.name)
|
608 |
+
except Exception:
|
609 |
+
pass
|
610 |
+
# include JSON DB mirrors (facts/skills/modules)
|
611 |
+
# try to export via DB list functions
|
612 |
+
try:
|
613 |
+
facts = self.db.list_facts()
|
614 |
+
skills = self.db.list_skills()
|
615 |
+
mods = self.db.list_modules()
|
616 |
+
zf.writestr("facts.json", json.dumps(facts, ensure_ascii=False, indent=2))
|
617 |
+
zf.writestr("skills.json", json.dumps(skills, ensure_ascii=False, indent=2))
|
618 |
+
zf.writestr("modules.json", json.dumps(mods, ensure_ascii=False, indent=2))
|
619 |
+
except Exception:
|
620 |
+
pass
|
621 |
+
bid = self.db.add_backup(str(out_path))
|
622 |
+
return str(out_path)
|
623 |
+
def download_backup_path(self) -> Optional[str]:
|
624 |
+
# return latest backup path
|
625 |
+
cur = self.db.conn.cursor()
|
626 |
+
cur.execute("SELECT id,path,created_at FROM backups ORDER BY created_at DESC LIMIT 1")
|
627 |
+
r = cur.fetchone()
|
628 |
+
if not r: return None
|
629 |
+
return r[1]
|
630 |
+
|
631 |
+
# --------------------------
|
632 |
+
# Main Orchestrator (upgrades v6.SuperAgent)
|
633 |
+
# --------------------------
|
634 |
+
class SuperAgentV7:
|
635 |
+
def __init__(self):
|
636 |
+
self.db_path = DATA_DIR / "multimodular_v7.db"
|
637 |
+
self.db = LocalDB(self.db_path)
|
638 |
+
self.vfs = VFS(self.db)
|
639 |
+
self.csv = CreativeSkillVault(self.db)
|
640 |
+
self.global_sync = GlobalSync(self.db, hf_token=HUGGINGFACE_TOKEN, repo=GLOBAL_SYNC_REPO)
|
641 |
+
# module manager uses GLOBAL_SYNC_SIGNING_PUBKEY if present
|
642 |
+
self.module_mgr = ModuleManager(self.db, signing_pubkey_pem=GLOBAL_SYNC_SIGNING_PUBKEY)
|
643 |
+
self.chb = CHB(self.db, self.vfs, self.csv, self.module_mgr, self.global_sync)
|
644 |
+
self.backup_mgr = BackupManager(self.db, DATA_DIR, BACKUP_DIR)
|
645 |
+
# start realtime ws server in background for incoming updates
|
646 |
+
if websockets is not None:
|
647 |
try:
|
648 |
+
self.global_sync.run_ws_in_thread()
|
|
|
|
|
|
|
|
|
|
|
|
|
649 |
except Exception as e:
|
650 |
+
log.warning("ws server thread failed: %s", e)
|
651 |
+
# --- user-driven search plan creation (CHB.plan)
|
652 |
+
def plan_search(self, text: str) -> Dict[str,Any]:
|
653 |
+
return self.chb.plan(text)
|
654 |
+
# --- client submits results
|
655 |
+
def submit_plan_results(self, plan_id: str, results: Dict[str,Any]) -> Dict[str,Any]:
|
656 |
+
# Accept results; pass to CHB for verify & learning
|
657 |
+
v = self.chb.handle_plan_results(plan_id, results)
|
658 |
+
# create backup after major update
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
659 |
try:
|
660 |
+
bp = self.backup_mgr.create_backup()
|
661 |
+
log.info("backup created: %s", bp)
|
|
|
|
|
662 |
except Exception:
|
663 |
+
log.exception("backup failed")
|
664 |
+
# push to global sync (best-effort)
|
665 |
+
if GLOBAL_SYNC_ENABLED:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
666 |
try:
|
667 |
+
payload = {"type":"fact_skill_update", "ts": now_iso(), "payload": {"summary":"update","plan_id": plan_id}}
|
668 |
+
# attempt HF push or WS broadcast
|
669 |
+
# HF push requires implementation using huggingface_hub; we leave a placeholder
|
670 |
+
asyncio.run(self.global_sync.ws_broadcast(payload))
|
|
|
|
|
|
|
671 |
except Exception:
|
672 |
+
pass
|
673 |
+
return {"ok": True, "verify": v}
|
674 |
+
# --- expose facts/skills
|
675 |
+
def search_facts(self, q: str):
|
676 |
+
return self.vfs.query(q)
|
677 |
+
def top_skills(self, tag: str, k: int=5):
|
678 |
+
return self.csv.top_by_tag(tag, k)
|
679 |
+
def add_module(self, name: str, code: str, signature_b64: Optional[str]=None, autointegrate: bool=True):
|
680 |
+
return self.module_mgr.integrate_module(name, code, signature_b64, autointegrate)
|
681 |
+
def download_latest_backup(self) -> Optional[str]:
|
682 |
+
return self.backup_mgr.download_backup_path()
|
683 |
+
# direct chat route that funnels through CHB
|
684 |
+
def chat(self, text: str, plan_results: Optional[Dict[str,Any]]=None):
|
685 |
+
# For direct chat, if user provided plan_results (client retrieval), handle them
|
686 |
+
if plan_results:
|
687 |
+
v = self.chb.handle_plan_results(uid("plan"), plan_results)
|
688 |
+
return {"status":"ok", "verify": v}
|
689 |
+
# Simple path: CHB will create plan if needed and answer (we re-use simple plan + verify)
|
690 |
+
plan = self.chb.plan(text)
|
691 |
+
# no client retrieval performed: CHB can still reply with local knowledge (fallback)
|
692 |
+
# For v7 we simply return plan and ask client to run it OR CHB will attempt local infer (fallback)
|
693 |
+
return {"status":"ok", "plan": plan, "hint": "Run the plan on client and submit results via submit_plan_results"}
|
694 |
+
|
695 |
+
# --------------------------
|
696 |
+
# FastAPI endpoints (added/extended)
|
697 |
+
# --------------------------
|
698 |
+
if FASTAPI_AVAILABLE:
|
699 |
+
app = FastAPI(title="Multimodular SuperAgent v7.0")
|
700 |
+
AGENT = SuperAgentV7()
|
701 |
+
|
702 |
+
class PlanIn(BaseModel):
|
703 |
+
text: str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
704 |
|
705 |
+
@app.post("/v1/plan_search")
|
706 |
+
async def api_plan_search(inp: PlanIn):
|
707 |
+
plan = AGENT.plan_search(inp.text)
|
708 |
+
return {"ok": True, "plan": plan}
|
709 |
|
710 |
+
@app.post("/v1/submit_results")
|
711 |
+
async def api_submit_results(plan_id: str = Form(...), results: str = Form(...)):
|
712 |
try:
|
713 |
+
payload = json.loads(results)
|
714 |
+
except Exception:
|
715 |
+
return JSONResponse({"ok": False, "error": "invalid_json"}, status_code=400)
|
716 |
+
out = AGENT.submit_plan_results(plan_id, payload)
|
717 |
+
return out
|
718 |
+
|
719 |
+
@app.post("/v1/facts/search")
|
720 |
+
async def api_facts_search(q: str = Form(...)):
|
721 |
+
res = AGENT.search_facts(q)
|
722 |
+
return {"ok": True, "results": res}
|
723 |
+
|
724 |
+
@app.post("/v1/skills/top")
|
725 |
+
async def api_skills_top(tag: str = Form(...), k: int = Form(5)):
|
726 |
+
res = AGENT.top_skills(tag, k)
|
727 |
+
return {"ok": True, "results": res}
|
728 |
+
|
729 |
+
@app.post("/v1/module/add")
|
730 |
+
async def api_module_add(name: str = Form(...), code: str = Form(...), signature_b64: Optional[str] = Form(None)):
|
731 |
+
out = AGENT.add_module(name, code, signature_b64)
|
732 |
+
return out
|
733 |
+
|
734 |
+
@app.get("/v1/backup/download")
|
735 |
+
async def api_backup_download():
|
736 |
+
p = AGENT.download_latest_backup()
|
737 |
+
if not p or not os.path.exists(p):
|
738 |
+
return JSONResponse({"ok": False, "error": "no_backup"}, status_code=404)
|
739 |
+
return FileResponse(p, media_type="application/zip", filename=os.path.basename(p))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
740 |
|
741 |
+
@app.post("/v1/chat")
|
742 |
+
async def api_chat(text: str = Form(...), plan_results: Optional[str] = Form(None)):
|
743 |
+
if plan_results:
|
|
|
|
|
|
|
|
|
744 |
try:
|
745 |
+
pr = json.loads(plan_results)
|
|
|
|
|
|
|
746 |
except Exception:
|
747 |
+
return JSONResponse({"ok": False, "error": "invalid_plan_results"}, status_code=400)
|
748 |
+
out = AGENT.chat(text, pr)
|
749 |
+
else:
|
750 |
+
out = AGENT.chat(text, None)
|
751 |
+
return out
|
752 |
+
else:
|
753 |
+
app = None
|
754 |
+
AGENT = SuperAgentV7()
|
755 |
+
|
756 |
+
# --------------------------
|
757 |
+
# CLI demo & execution
|
758 |
+
# --------------------------
|
759 |
+
def demo_run():
|
760 |
+
print("Multimodular SuperAgent v7.0 - Demo")
|
761 |
+
a = AGENT
|
762 |
+
# 1) plan search
|
763 |
+
plan = a.plan_search("futuristic electric motorcycle neon blue lights battery tech 2025")
|
764 |
+
print("Plan:", plan)
|
765 |
+
# Simulate client retrieval
|
766 |
+
sample_results = {
|
767 |
+
"web": [
|
768 |
+
{"title":"Solid-state battery prototype reaches 500 Wh/kg", "url":"https://example.org/article", "snippet":"Researchers at X report 500 Wh/kg...", "date": now_iso()}
|
769 |
+
],
|
770 |
+
"images":[
|
771 |
+
{"path": str(BASE_DIR / "demo_motorcycle.png"), "quality_score": 0.92, "caption":"Futuristic motorcycle concept", "tags":["motorcycle","futuristic"]}
|
772 |
+
],
|
773 |
+
"videos": [],
|
774 |
+
"audios": []
|
775 |
+
}
|
776 |
+
# ensure demo image exists
|
777 |
+
try:
|
778 |
+
from PIL import Image, ImageDraw
|
779 |
+
img = Image.new("RGB", (640,480), (20,20,30))
|
780 |
+
d = ImageDraw.Draw(img); d.text((20,20), "Demo motorcycle", fill=(200,200,255))
|
781 |
+
img.save(BASE_DIR / "demo_motorcycle.png")
|
782 |
+
except Exception:
|
783 |
+
pass
|
784 |
+
res = a.submit_plan_results(plan["id"], sample_results)
|
785 |
+
print("Submit results ->", res)
|
786 |
+
# show facts
|
787 |
+
facts = a.search_facts("solid-state")
|
788 |
+
print("Facts:", facts)
|
789 |
+
# download backup
|
790 |
+
path = a.download_latest_backup()
|
791 |
+
print("Backup path:", path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
792 |
|
|
|
|
|
|
|
793 |
if __name__ == "__main__":
|
794 |
import argparse
|
795 |
ap = argparse.ArgumentParser()
|
796 |
ap.add_argument("--demo", action="store_true")
|
797 |
+
ap.add_argument("--runserver", action="store_true")
|
798 |
+
ap.add_argument("--port", type=int, default=8000)
|
799 |
args = ap.parse_args()
|
800 |
if args.demo:
|
801 |
+
demo_run()
|
802 |
+
elif args.runserver and FASTAPI_AVAILABLE:
|
803 |
+
import uvicorn
|
804 |
+
uvicorn.run("multimodular_modul version 7.0:app", host="0.0.0.0", port=args.port, reload=False)
|
805 |
+
else:
|
806 |
+
print("Run with --demo or --runserver. FASTAPI available:", FASTAPI_AVAILABLE)
|