Princeaka commited on
Commit
0da93e3
·
verified ·
1 Parent(s): 58ee346

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +329 -808
app.py CHANGED
@@ -1,822 +1,343 @@
1
- # app.py — Close-to-Human Multimodal AI (Gradio + FastAPI)
2
- # Single-file, offline-friendly, CPU/GPU, secure API key, CHB memory.
3
- # NOTE: Uses only free/open models. First run will download weights.
4
-
5
- import os
6
- import io
7
- import gc
8
- import sys
9
- import json
10
- import time
11
- import base64
12
- import random
13
- import string
14
- import hashlib
15
- import threading
16
- from typing import List, Dict, Optional, Tuple
17
-
18
- # ===== Runtime device selection =====
19
- import torch
20
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
21
- DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32
22
-
23
- # ===== Web server (FastAPI) + UI (Gradio) =====
24
- import gradio as gr
25
- from fastapi import FastAPI, Header, HTTPException, UploadFile, File
26
- from fastapi.middleware.cors import CORSMiddleware
27
- import uvicorn
28
-
29
- # ===== Core AI libs =====
30
- from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
31
- from sentence_transformers import SentenceTransformer
32
- import faiss
33
- import networkx as nx
34
-
35
- # Vision & Diffusion
36
- from PIL import Image, ImageOps, ImageFont, ImageDraw
37
- from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionInpaintPipeline
38
- from transformers import BlipProcessor, BlipForConditionalGeneration
39
-
40
- # Audio
41
- from faster_whisper import WhisperModel
42
- import soundfile as sf
43
- import librosa
44
-
45
- # TTS / Voice cloning (XTTS v2)
46
- from TTS.api import TTS
47
-
48
- # Video
49
- import imageio
50
- import cv2
51
- from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
52
-
53
- # Files
54
- import numpy as np
55
- import pandas as pd
56
- from PyPDF2 import PdfReader
57
- import docx
58
- import openpyxl
59
-
60
- # Security
61
- from cryptography.fernet import Fernet
62
-
63
- # =========================================
64
- # Paths & lightweight persistence
65
- # =========================================
66
- ROOT = os.environ.get("APP_ROOT", ".")
67
- DATA_DIR = os.path.join(ROOT, "data")
68
- os.makedirs(DATA_DIR, exist_ok=True)
69
-
70
- KEYS_FILE = os.path.join(DATA_DIR, "api_keys.json")
71
- FACTS_FILE = os.path.join(DATA_DIR, "facts.jsonl")
72
- KG_FILE = os.path.join(DATA_DIR, "knowledge_graph.json")
73
- MEM_INDEX_FILE = os.path.join(DATA_DIR, "faiss.index")
74
- MEM_META_FILE = os.path.join(DATA_DIR, "faiss_meta.json")
75
- FERNET_KEY_FILE = os.path.join(DATA_DIR, "fernet.key")
76
-
77
- # =========================================
78
- # Security: API Keys
79
- # =========================================
80
- def load_or_create_fernet_key():
81
- if os.path.exists(FERNET_KEY_FILE):
82
- with open(FERNET_KEY_FILE, "rb") as f:
83
- return f.read()
84
- key = Fernet.generate_key()
85
- with open(FERNET_KEY_FILE, "wb") as f:
86
- f.write(key)
87
- return key
88
-
89
- FERNET = Fernet(load_or_create_fernet_key())
90
-
91
- def _rand_key(n=25):
92
- chars = string.ascii_letters + string.digits
93
- return "".join(random.choice(chars) for _ in range(n))
94
-
95
- def load_keys():
96
- if os.path.exists(KEYS_FILE):
97
- with open(KEYS_FILE, "rb") as f:
98
- enc = f.read()
99
- if not enc:
100
- return {}
101
  try:
102
- data = FERNET.decrypt(enc).decode("utf-8")
103
- return json.loads(data)
104
- except Exception:
105
- return {}
106
- return {}
107
-
108
- def save_keys(d):
109
- enc = FERNET.encrypt(json.dumps(d).encode("utf-8"))
110
- with open(KEYS_FILE, "wb") as f:
111
- f.write(enc)
112
-
113
- API_KEYS = load_keys()
114
- if not API_KEYS:
115
- # First-run bootstrap default user
116
- api_key = _rand_key(25)
117
- API_KEYS["default_user"] = {"api_key": api_key, "created_at": time.time()}
118
- save_keys(API_KEYS)
119
-
120
- def get_default_api_key():
121
- return API_KEYS["default_user"]["api_key"]
122
-
123
- def verify_api_key(header_key: str):
124
- for user, rec in API_KEYS.items():
125
- if rec.get("api_key") == header_key:
126
- return True
127
- return False
128
-
129
- # =========================================
130
- # CHB Memory: FAISS + KG + Facts
131
- # =========================================
132
- EMBEDDER = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", device=DEVICE)
133
-
134
- # FAISS memory
135
- if os.path.exists(MEM_INDEX_FILE) and os.path.exists(MEM_META_FILE):
136
  try:
137
- index = faiss.read_index(MEM_INDEX_FILE)
138
- with open(MEM_META_FILE, "r") as f:
139
- mem_meta = json.load(f)
 
 
 
 
 
 
140
  except Exception:
141
- index = faiss.IndexFlatL2(384)
142
- mem_meta = []
143
- else:
144
- index = faiss.IndexFlatL2(384)
145
- mem_meta = []
146
-
147
- def mem_add(text: str, meta: Dict):
148
- vec = EMBEDDER.encode([text])
149
- index.add(np.array(vec, dtype="float32"))
150
- mem_meta.append({"text": text, "meta": meta, "ts": time.time()})
151
- faiss.write_index(index, MEM_INDEX_FILE)
152
- with open(MEM_META_FILE, "w") as f:
153
- json.dump(mem_meta, f)
154
-
155
- def mem_search(query: str, top_k=5):
156
- if index.ntotal == 0:
157
- return []
158
- q = EMBEDDER.encode([query])
159
- D, I = index.search(np.array(q, dtype="float32"), top_k)
160
- results = []
161
- for d, i in zip(D[0], I[0]):
162
- if i < len(mem_meta):
163
- results.append((float(d), mem_meta[i]))
164
- return results
165
-
166
- # Knowledge Graph
167
- if os.path.exists(KG_FILE):
168
- with open(KG_FILE, "r") as f:
169
- KG_data = json.load(f)
170
- KG = nx.DiGraph()
171
- KG.add_nodes_from(KG_data["nodes"])
172
- KG.add_edges_from([(e["u"], e["v"], e["data"]) for e in KG_data["edges"]])
173
- else:
174
- KG = nx.DiGraph()
175
-
176
- def kg_save():
177
- data = {
178
- "nodes": list(KG.nodes()),
179
- "edges": [{"u": u, "v": v, "data": KG[u][v]} for u, v in KG.edges()]
180
- }
181
- with open(KG_FILE, "w") as f:
182
- json.dump(data, f)
183
-
184
- def kg_add_fact(subject, predicate, obj, confidence=1.0, source="user"):
185
- KG.add_node(subject)
186
- KG.add_node(obj)
187
- KG.add_edge(subject, obj, data={"predicate": predicate, "confidence": confidence, "source": source, "ts": time.time()})
188
- kg_save()
189
-
190
- # Versioned facts (JSONL)
191
- def add_fact(subject, predicate, obj, confidence=1.0, source="system"):
192
- record = {
193
- "ts": time.time(),
194
- "subject": subject,
195
- "predicate": predicate,
196
- "object": obj,
197
- "confidence": confidence,
198
- "source": source,
199
- }
200
- with open(FACTS_FILE, "a") as f:
201
- f.write(json.dumps(record) + "\n")
202
-
203
- # =========================================
204
- # Core Models (loaded lazily)
205
- # =========================================
206
- LLM_NAME = os.environ.get("LLM_NAME", "google/flan-t5-base") # CPU-friendly
207
- TEXT_GEN = None
208
- BLIP_PROC = None
209
- BLIP_MODEL = None
210
- SD_TXT2IMG = None
211
- SD_IMG2IMG = None
212
- SD_INPAINT = None
213
- WHISPER = None
214
- TTS_MODEL = None
215
-
216
- def load_text_llm():
217
- global TEXT_GEN
218
- if TEXT_GEN is None:
219
- TEXT_GEN = pipeline("text2text-generation", model=LLM_NAME, device=0 if DEVICE=="cuda" else -1)
220
- return TEXT_GEN
221
-
222
- def load_blip():
223
- global BLIP_PROC, BLIP_MODEL
224
- if BLIP_MODEL is None:
225
- BLIP_PROC = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
226
- BLIP_MODEL = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(DEVICE)
227
- return BLIP_PROC, BLIP_MODEL
228
-
229
- def load_sd_txt2img():
230
- global SD_TXT2IMG
231
- if SD_TXT2IMG is None:
232
- SD_TXT2IMG = StableDiffusionPipeline.from_pretrained(
233
- "runwayml/stable-diffusion-v1-5",
234
- torch_dtype=torch.float16 if DEVICE=="cuda" else torch.float32
235
- )
236
- SD_TXT2IMG = SD_TXT2IMG.to(DEVICE)
237
- return SD_TXT2IMG
238
-
239
- def load_sd_img2img():
240
- global SD_IMG2IMG
241
- if SD_IMG2IMG is None:
242
- SD_IMG2IMG = StableDiffusionImg2ImgPipeline.from_pretrained(
243
- "runwayml/stable-diffusion-v1-5",
244
- torch_dtype=torch.float16 if DEVICE=="cuda" else torch.float32
245
- ).to(DEVICE)
246
- return SD_IMG2IMG
247
-
248
- def load_sd_inpaint():
249
- global SD_INPAINT
250
- if SD_INPAINT is None:
251
- SD_INPAINT = StableDiffusionInpaintPipeline.from_pretrained(
252
- "runwayml/stable-diffusion-inpainting",
253
- torch_dtype=torch.float16 if DEVICE=="cuda" else torch.float32
254
- ).to(DEVICE)
255
- return SD_INPAINT
256
-
257
- def load_whisper():
258
- global WHISPER
259
- if WHISPER is None:
260
- # faster-whisper model names: tiny, base, small, medium, large-v3
261
- model_size = os.environ.get("WHISPER_SIZE", "small")
262
- WHISPER = WhisperModel(model_size, device=DEVICE, compute_type="float16" if DEVICE=="cuda" else "int8")
263
- return WHISPER
264
-
265
- def load_tts():
266
- global TTS_MODEL
267
- if TTS_MODEL is None:
268
- # Multilingual XTTS v2 (supports voice cloning)
269
- TTS_MODEL = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2")
270
- return TTS_MODEL
271
-
272
- # =========================================
273
- # CHB Pipeline
274
- # =========================================
275
- def chb_enrich_context(query: str) -> str:
276
- # Retrieve top-3 from FAISS to enrich prompt
277
- hits = mem_search(query, top_k=3)
278
- notes = []
279
- for d, meta in hits:
280
- notes.append(f"[mem@{time.strftime('%Y-%m-%d', time.localtime(meta['ts']))}] {meta['text']}")
281
- return "\n".join(notes)
282
-
283
- def chb_generate_reply(user_text: str) -> str:
284
- # Build prompt with memory enrichment
285
- ctx = chb_enrich_context(user_text)
286
- prompt = ('You are a helpful, warm assistant. Use the references if useful.\n'
287
- 'References:\n'
288
- f'{ctx}\n\n'
289
- f'User: {user_text}\n'
290
- 'Assistant:')
291
- gen = load_text_llm()
292
- out = gen(prompt, max_new_tokens=256)
293
- reply = out[0]["generated_text"]
294
- # Store interaction in memory
295
- mem_add(user_text, {"type": "user"})
296
- mem_add(reply, {"type": "assistant"})
297
- return reply
298
-
299
- def chb_store_user_fact(text: str):
300
- # Simple detection: "my name is X"
301
- lower = text.lower()
302
- if "my name is" in lower:
303
- name = text.split("my name is", 1)[1].strip().split()[0]
304
- add_fact("user", "name", name, confidence=1.0, source="user")
305
- kg_add_fact("user", "name", name, confidence=1.0, source="user")
306
- mem_add(f"User name = {name}", {"type":"fact"})
307
-
308
- # =========================================
309
- # Multimodal Feature Functions
310
- # =========================================
311
- def image_to_text(img: Image.Image) -> str:
312
- proc, model = load_blip()
313
- inputs = proc(images=img, return_tensors="pt").to(DEVICE)
314
- out = model.generate(**inputs, max_new_tokens=64)
315
- caption = proc.decode(out[0], skip_special_tokens=True)
316
- mem_add(f"IMG2TXT: {caption}", {"type":"img2txt"})
317
- return caption
318
-
319
- def text_to_image(prompt: str, steps: int=20, guidance: float=7.5, seed: Optional[int]=None) -> Image.Image:
320
- pipe = load_sd_txt2img()
321
- if seed is None:
322
- seed = random.randint(0, 2**32-1)
323
- generator = torch.Generator(device=DEVICE).manual_seed(seed)
324
- img = pipe(prompt, num_inference_steps=steps, guidance_scale=guidance, generator=generator).images[0]
325
- mem_add(f"TXT2IMG prompt: {prompt}", {"type":"txt2img"})
326
- return img
327
-
328
- def edit_image(img: Image.Image, prompt: str, strength: float=0.6, steps: int=20) -> Image.Image:
329
- pipe = load_sd_img2img()
330
- img = img.convert("RGB")
331
- edited = pipe(prompt=prompt, image=img, strength=strength, num_inference_steps=steps).images[0]
332
- mem_add(f"IMGEDIT: {prompt}", {"type":"imgedit"})
333
- return edited
334
-
335
- def inpaint_image(img: Image.Image, mask: Image.Image, prompt: str, steps: int=20) -> Image.Image:
336
- pipe = load_sd_inpaint()
337
- img = img.convert("RGB")
338
- mask = mask.convert("RGB")
339
- out = pipe(prompt=prompt, image=img, mask_image=mask, num_inference_steps=steps).images[0]
340
- mem_add(f"INPAINT: {prompt}", {"type":"inpaint"})
341
- return out
342
-
343
- def voice_to_text(audio_path: str) -> str:
344
- model = load_whisper()
345
- segments, info = model.transcribe(audio_path, beam_size=5)
346
- text = " ".join([seg.text for seg in segments])
347
- mem_add(f"ASR: {text[:200]}", {"type":"asr"})
348
- return text
349
-
350
- def text_to_voice(text: str, ref_audio: Optional[str]=None, speaker: Optional[str]=None, out_path: Optional[str]=None) -> str:
351
- tts = load_tts()
352
- if out_path is None:
353
- out_path = os.path.join(DATA_DIR, f"tts_{int(time.time())}.wav")
354
- if ref_audio:
355
- tts.tts_to_file(text=text, file_path=out_path, speaker_wav=ref_audio, language="en")
356
- else:
357
- # default voice
358
- tts.tts_to_file(text=text, file_path=out_path, speaker=speaker or "female-en-5", language="en")
359
- mem_add(f"TTS: {text[:120]}", {"type":"tts"})
360
- return out_path
361
-
362
- def video_to_text(video_path: str, frames: int=8) -> str:
363
- # Sample frames evenly, caption via BLIP, join
364
- proc, model = load_blip()
365
- cap = cv2.VideoCapture(video_path)
366
- total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or 1
367
- idxs = np.linspace(0, total-1, num=min(frames, total), dtype=int)
368
- captions = []
369
- for i in idxs:
370
- cap.set(cv2.CAP_PROP_POS_FRAMES, int(i))
371
- ok, frame = cap.read()
372
- if not ok:
373
  continue
374
- img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
375
- inputs = proc(images=img, return_tensors="pt").to(DEVICE)
376
- out = model.generate(**inputs, max_new_tokens=32)
377
- cap_text = proc.decode(out[0], skip_special_tokens=True)
378
- captions.append(cap_text)
379
- cap.release()
380
- summary = " | ".join(captions) if captions else "No frames read."
381
- mem_add(f"VID2TXT: {summary[:200]}", {"type":"vid2txt"})
382
- return summary
383
-
384
- def text_to_video_clip(prompt: str, seconds: int=3, fps: int=8) -> str:
385
- # Lightweight approach: generate N images via SD and stitch into GIF/MP4
386
- frames = []
387
- n = seconds * fps
388
- for i in range(n):
389
- seed = random.randint(0, 2**32-1)
390
- img = text_to_image(prompt + f", cinematic frame {i+1}", steps=15, guidance=7.0, seed=seed)
391
- frames.append(np.array(img))
392
- out_path = os.path.join(DATA_DIR, f"t2v_{int(time.time())}.mp4")
393
- imageio.mimwrite(out_path, frames, fps=fps, quality=7)
394
- mem_add(f"T2V: {prompt}", {"type":"t2v"})
395
- return out_path
396
-
397
- def video_edit_caption(video_path: str, caption_text: str) -> str:
398
- clip = VideoFileClip(video_path)
399
- txt = TextClip(caption_text, fontsize=40, color="white").set_duration(clip.duration).set_position(("center", "bottom"))
400
- out = CompositeVideoClip([clip, txt])
401
- out_path = os.path.join(DATA_DIR, f"captioned_{int(time.time())}.mp4")
402
- out.write_videofile(out_path, codec="libx264", audio_codec="aac", verbose=False, logger=None)
403
- mem_add(f"VIDCAP: {caption_text[:120]}", {"type":"vidcap"})
404
- return out_path
405
-
406
- def code_to_text(code: str, lang: str="python") -> str:
407
- prompt = f"Explain this {lang} code step by step. Be concise.\n\n```{lang}\n{code}\n```"
408
- return chb_generate_reply(prompt)
409
-
410
- def text_to_code(spec: str, lang: str="python") -> str:
411
- prompt = f"Write {lang} code that satisfies the following requirement. Provide only code:\n{spec}"
412
- return chb_generate_reply(prompt)
413
-
414
- def code_to_image(code: str, lang: str="python") -> Image.Image:
415
- from pygments import highlight
416
- from pygments.lexers import get_lexer_by_name
417
- from pygments.formatters import ImageFormatter
418
- lexer = get_lexer_by_name(lang, stripall=True)
419
- formatter = ImageFormatter(font_name="DejaVu Sans Mono", line_numbers=True)
420
- img_bytes = highlight(code, lexer, formatter)
421
- img = Image.open(io.BytesIO(img_bytes))
422
- mem_add(f"CODE2IMG {lang}", {"type":"code2img"})
423
- return img
424
-
425
- def voice_to_code(audio_path: str, lang: str="python") -> str:
426
- spec = voice_to_text(audio_path)
427
- return text_to_code(spec, lang)
428
-
429
- def emoji_interpret(text: str) -> str:
430
- import emoji as em
431
- # Convert emoji to description
432
- return em.demojize(text, language='en')
433
-
434
- def emoji_generate(desc: str) -> Image.Image:
435
- # Generate sticker-like image via SD
436
- return text_to_image(f"high quality 2D sticker emoji of: {desc}, white background, bold outline, vector style", steps=25, guidance=8.5)
437
-
438
- def file_reader(file_path: str) -> str:
439
- ext = os.path.splitext(file_path)[1].lower()
440
- if ext in [".txt", ".md", ".py", ".json", ".csv"]:
441
- with open(file_path, "r", errors="ignore") as f:
442
- return f.read()
443
- if ext == ".pdf":
444
- reader = PdfReader(file_path)
445
- return "\n".join(page.extract_text() or "" for page in reader.pages)
446
- if ext in [".docx"]:
447
- d = docx.Document(file_path)
448
- return "\n".join(p.text for p in d.paragraphs)
449
- if ext in [".xlsx"]:
450
- df = pd.read_excel(file_path)
451
- return df.to_csv(index=False)
452
- return "Unsupported file type."
453
-
454
- def file_to_text(upload) -> str:
455
- with open(upload.name, "wb") as f:
456
- f.write(upload.read())
457
- return file_reader(upload.name)
458
 
459
- def text_to_file(text: str, ext: str="txt") -> str:
460
- out = os.path.join(DATA_DIR, f"file_{int(time.time())}.{ext}")
461
- if ext == "txt":
462
- with open(out, "w", encoding="utf-8") as f:
463
- f.write(text)
464
- elif ext == "docx":
465
- d = docx.Document()
466
- d.add_paragraph(text)
467
- d.save(out)
468
- elif ext == "csv":
469
- with open(out, "w", encoding="utf-8") as f:
470
- f.write(text)
 
 
 
 
 
 
 
 
 
471
  else:
472
- with open(out, "w", encoding="utf-8") as f:
473
- f.write(text)
474
- return out
475
-
476
- # =========================================
477
- # Start Chatting (All-in-One)
478
- # =========================================
479
- def auto_route(user_text: str, image: Optional[Image.Image], audio: Optional[str], video: Optional[str], file: Optional[str]):
480
- # Unified multimodal router. Decide intent and call proper module.
481
- reply_text = ""
482
- reply_image = None
483
- reply_audio = None
484
- reply_video = None
485
-
486
- # Learn simple facts
487
- if user_text:
488
- chb_store_user_fact(user_text)
489
-
490
- # If image present and no explicit instruction -> caption it
491
- if image is not None and (not user_text or "describe" in user_text.lower() or "what is" in user_text.lower()):
492
- reply_text = image_to_text(image)
493
-
494
- # If user asks to 'draw/make/generate' an image
495
- elif user_text and any(k in user_text.lower() for k in ["draw", "generate image", "make an image", "text to image", "create a picture", "illustrate"]):
496
- reply_image = text_to_image(user_text)
497
 
498
- # Voice present
499
- elif audio is not None:
500
- txt = voice_to_text(audio)
501
- # If user asked for code via voice
502
- if "code" in txt.lower() or "function" in txt.lower():
503
- reply_text = text_to_code(txt)
504
- else:
505
- reply_text = chb_generate_reply(txt)
506
 
507
- # Video present
508
- elif video is not None:
509
- reply_text = video_to_text(video)
 
 
 
 
 
 
 
510
 
511
- # File present
512
- elif file is not None:
513
- content = file_reader(file)
514
- reply_text = chb_generate_reply(f"Summarize this file:\n{content[:8000]}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
515
 
516
- # Pure text case
 
 
 
517
  else:
518
- # Emojis only?
519
- stripped = user_text.strip() if user_text else ""
520
- if stripped and all(ord(c) > 1000 or c in ":)-(" or c.isascii()==False for c in stripped):
521
- reply_text = emoji_interpret(stripped)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
522
  else:
523
- reply_text = chb_generate_reply(user_text or "Hello")
524
-
525
- return reply_text, reply_image, reply_audio, reply_video
526
-
527
- # =========================================
528
- # FastAPI App
529
- # =========================================
530
- api = FastAPI(title="Close-to-Human Multimodal API")
531
- api.add_middleware(
532
- CORSMiddleware,
533
- allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"]
534
- )
535
-
536
- @api.get("/api/ping")
537
- def ping():
538
- return {"ok": True, "device": DEVICE, "api_key_hint": get_default_api_key()[:6] + "***"}
539
-
540
- @api.post("/api/chat")
541
- def api_chat(message: str, x_api_key: Optional[str] = Header(None)):
542
- if not x_api_key or not verify_api_key(x_api_key):
543
- raise HTTPException(status_code=401, detail="Invalid API key")
544
- text, img, aud, vid = auto_route(message, None, None, None, None)
545
- return {"text": text}
546
-
547
- @api.post("/api/text-to-image")
548
- def api_t2i(prompt: str, x_api_key: Optional[str] = Header(None)):
549
- if not x_api_key or not verify_api_key(x_api_key):
550
- raise HTTPException(status_code=401, detail="Invalid API key")
551
- img = text_to_image(prompt)
552
- buf = io.BytesIO()
553
- img.save(buf, format="PNG")
554
- b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
555
- return {"image_base64": b64}
556
-
557
- # =========================================
558
- # Gradio UI
559
- # =========================================
560
- def ui_start_chat(user_text, image, audio, video, file):
561
- audio_path = None
562
- video_path = None
563
- file_path = None
564
- if audio is not None:
565
- audio_path = audio
566
- if video is not None:
567
- video_path = video
568
- if file is not None:
569
- # gradio gives a temp path
570
- file_path = file.name
571
- text, img, aud, vid = auto_route(user_text, image, audio_path, video_path, file_path)
572
- return text, img, aud, vid
573
-
574
- def ui_text_chat(prompt):
575
- return chb_generate_reply(prompt)
576
-
577
- def ui_image_to_text(image):
578
- return image_to_text(image)
579
-
580
- def ui_text_to_image(prompt, steps, guidance):
581
- return text_to_image(prompt, steps=steps, guidance=guidance)
582
-
583
- def ui_image_edit(image, prompt, strength, steps):
584
- return edit_image(image, prompt, strength=strength, steps=steps)
585
-
586
- def ui_inpaint(image, mask, prompt, steps):
587
- return inpaint_image(image, mask, prompt, steps=steps)
588
-
589
- def ui_voice_to_text(audio):
590
- return voice_to_text(audio)
591
-
592
- def ui_text_to_voice(text, ref_audio):
593
- out = text_to_voice(text, ref_audio=ref_audio)
594
- return out
595
-
596
- def ui_video_to_text(video):
597
- return video_to_text(video)
598
-
599
- def ui_text_to_video(prompt, seconds, fps):
600
- return text_to_video_clip(prompt, seconds=seconds, fps=fps)
601
-
602
- def ui_video_edit_caption(video, caption):
603
- return video_edit_caption(video, caption)
604
-
605
- def ui_text_to_code(text, lang):
606
- return text_to_code(text, lang)
607
-
608
- def ui_code_to_text(code, lang):
609
- return code_to_text(code, lang)
610
-
611
- def ui_code_to_image(code, lang):
612
- return code_to_image(code, lang)
613
-
614
- def ui_voice_to_code(audio, lang):
615
- return voice_to_code(audio, lang)
616
-
617
- def ui_emoji_interpret(text):
618
- return emoji_interpret(text)
619
-
620
- def ui_emoji_generate(desc):
621
- return emoji_generate(desc)
622
-
623
- def ui_file_reader(file):
624
- return file_reader(file.name)
625
-
626
- def ui_file_to_text(file):
627
- return file_reader(file.name)
628
-
629
- def ui_text_to_file(text, ext):
630
- return text_to_file(text, ext)
631
-
632
- def build_gradio():
633
- with gr.Blocks(title="Close-to-Human Multimodal AI") as demo:
634
- gr.Markdown("## Start chatting AI this — all-in-one, natural multimodal chat")
635
- with gr.Tab("Start Chatting AI"):
636
- with gr.Row():
637
- user_text = gr.Textbox(label="Say anything… (text, emojis, ask for code, etc.)")
638
- with gr.Row():
639
- image = gr.Image(label="Optional image", type="pil")
640
- audio = gr.Audio(label="Optional audio (wav/mp3)", type="filepath")
641
- with gr.Row():
642
- video = gr.Video(label="Optional video", format="mp4")
643
- file = gr.File(label="Optional file")
644
- go = gr.Button("Send")
645
- out_text = gr.Textbox(label="AI reply (text)")
646
- out_img = gr.Image(label="AI reply (image)")
647
- out_aud = gr.Audio(label="AI reply (audio)", type="filepath")
648
- out_vid = gr.Video(label="AI reply (video)")
649
- go.click(ui_start_chat, [user_text, image, audio, video, file], [out_text, out_img, out_aud, out_vid])
650
-
651
- with gr.Tab("Text Chat"):
652
- prompt = gr.Textbox(label="Prompt")
653
- btn = gr.Button("Ask")
654
- answer = gr.Textbox(label="Answer")
655
- btn.click(ui_text_chat, [prompt], [answer])
656
-
657
- with gr.Tab("Image → Text"):
658
- img = gr.Image(label="Image", type="pil")
659
- btn2 = gr.Button("Caption")
660
- cap = gr.Textbox(label="Caption")
661
- btn2.click(ui_image_to_text, [img], [cap])
662
-
663
- with gr.Tab("Text → Image"):
664
- ti = gr.Textbox(label="Prompt")
665
- steps = gr.Slider(5, 50, value=20, step=1, label="Steps")
666
- guidance = gr.Slider(1.0, 12.0, value=7.5, step=0.5, label="Guidance")
667
- btn3 = gr.Button("Generate")
668
- img_out = gr.Image(label="Image")
669
- btn3.click(ui_text_to_image, [ti, steps, guidance], [img_out])
670
-
671
- with gr.Tab("Image Editing & Painting"):
672
- base = gr.Image(label="Base image", type="pil")
673
- edp = gr.Textbox(label="Edit prompt")
674
- strength = gr.Slider(0.1, 1.0, value=0.6, step=0.1, label="Strength")
675
- steps_e = gr.Slider(5, 50, value=20, step=1, label="Steps")
676
- btn4 = gr.Button("Edit")
677
- out_e = gr.Image(label="Edited image")
678
- btn4.click(ui_image_edit, [base, edp, strength, steps_e], [out_e])
679
-
680
- with gr.Tab("Image Inpainting"):
681
- base2 = gr.Image(label="Base image", type="pil")
682
- mask = gr.Image(label="Mask (white=paint)", type="pil")
683
- inp = gr.Textbox(label="Inpaint prompt")
684
- steps_i = gr.Slider(5, 50, value=20, step=1, label="Steps")
685
- btn5 = gr.Button("Inpaint")
686
- out_i = gr.Image(label="Inpainted")
687
- btn5.click(ui_inpaint, [base2, mask, inp, steps_i], [out_i])
688
-
689
- with gr.Tab("Voice → Text"):
690
- a_in = gr.Audio(label="Audio", type="filepath")
691
- a_btn = gr.Button("Transcribe")
692
- a_out = gr.Textbox(label="Transcription")
693
- a_btn.click(ui_voice_to_text, [a_in], [a_out])
694
-
695
- with gr.Tab("Text → Voice"):
696
- ttv_text = gr.Textbox(label="Text")
697
- ref = gr.Audio(label="Reference voice (optional)", type="filepath")
698
- ttv_btn = gr.Button("Synthesize")
699
- ttv_out = gr.Audio(label="Speech", type="filepath")
700
- ttv_btn.click(ui_text_to_voice, [ttv_text, ref], [ttv_out])
701
-
702
- with gr.Tab("Voice Cloning → Code"):
703
- vcc_in = gr.Audio(label="Instruction audio", type="filepath")
704
- vcc_lang = gr.Dropdown(choices=["python","javascript","html","css","java","c","cpp","go","rust"], value="python", label="Language")
705
- vcc_btn = gr.Button("Transcribe & Code")
706
- vcc_out = gr.Code(label="Generated code")
707
- vcc_btn.click(ui_voice_to_code, [vcc_in, vcc_lang], [vcc_out])
708
-
709
- with gr.Tab("Video → Text"):
710
- v_in = gr.Video(label="Video")
711
- v_btn = gr.Button("Describe")
712
- v_out = gr.Textbox(label="Description")
713
- v_btn.click(ui_video_to_text, [v_in], [v_out])
714
-
715
- with gr.Tab("Text → Video Clip"):
716
- t2v_prompt = gr.Textbox(label="Prompt")
717
- t2v_sec = gr.Slider(1, 5, value=3, step=1, label="Seconds")
718
- t2v_fps = gr.Slider(4, 12, value=8, step=1, label="FPS")
719
- t2v_btn = gr.Button("Generate Clip")
720
- t2v_out = gr.Video(label="Video")
721
- t2v_btn.click(ui_text_to_video, [t2v_prompt, t2v_sec, t2v_fps], [t2v_out])
722
-
723
- with gr.Tab("Video Editing / Caption"):
724
- ve_in = gr.Video(label="Video")
725
- ve_text = gr.Textbox(label="Caption text")
726
- ve_btn = gr.Button("Overlay Caption")
727
- ve_out = gr.Video(label="Captioned Video")
728
- ve_btn.click(ui_video_edit_caption, [ve_in, ve_text], [ve_out])
729
-
730
- with gr.Tab("Text ↔ Code"):
731
- with gr.Row():
732
- t2c_text = gr.Textbox(label="Requirement → Code")
733
- t2c_lang = gr.Dropdown(["python","javascript","html","css","java","c","cpp","go","rust"], value="python")
734
- t2c_btn = gr.Button("Generate Code")
735
- t2c_out = gr.Code(label="Code")
736
- t2c_btn.click(ui_text_to_code, [t2c_text, t2c_lang], [t2c_out])
737
-
738
- gr.Markdown("---")
739
-
740
- with gr.Row():
741
- c2t_code = gr.Code(label="Code → Explain")
742
- c2t_lang = gr.Dropdown(["python","javascript","html","css","java","c","cpp","go","rust"], value="python")
743
- c2t_btn = gr.Button("Explain Code")
744
- c2t_out = gr.Textbox(label="Explanation")
745
- c2t_btn.click(ui_code_to_text, [c2t_code, c2t_lang], [c2t_out])
746
-
747
- gr.Markdown("---")
748
-
749
- with gr.Row():
750
- c2i_code = gr.Code(label="Code → Image (rendered)")
751
- c2i_lang = gr.Dropdown(["python","javascript","html","css","java","c","cpp","go","rust"], value="python")
752
- c2i_btn = gr.Button("Render Image")
753
- c2i_out = gr.Image(label="Code Image")
754
- c2i_btn.click(ui_code_to_image, [c2i_code, c2i_lang], [c2i_out])
755
-
756
- with gr.Tab("Emoji / Sticker / GIF"):
757
- em_text = gr.Textbox(label="Emoji/Sticker/GIF (interpret)")
758
- em_btn = gr.Button("Interpret")
759
- em_out = gr.Textbox(label="Meaning")
760
- em_btn.click(ui_emoji_interpret, [em_text], [em_out])
761
-
762
- gr.Markdown("---")
763
-
764
- em_gen = gr.Textbox(label="Describe a sticker to generate")
765
- em_gen_btn = gr.Button("Generate Sticker")
766
- em_gen_out = gr.Image(label="Sticker")
767
- em_gen_btn.click(ui_emoji_generate, [em_gen], [em_gen_out])
768
-
769
- with gr.Tab("File Reader / Convert"):
770
- fr_file = gr.File(label="File")
771
- fr_btn = gr.Button("Read File")
772
- fr_out = gr.Textbox(label="File Content", lines=15)
773
- fr_btn.click(ui_file_reader, [fr_file], [fr_out])
774
-
775
- gr.Markdown("---")
776
-
777
- ft_file = gr.File(label="File → Text")
778
- ft_btn = gr.Button("Convert")
779
- ft_out = gr.Textbox(label="Extracted Text", lines=15)
780
- ft_btn.click(ui_file_to_text, [ft_file], [ft_out])
781
-
782
- gr.Markdown("---")
783
-
784
- ttf_text = gr.Textbox(label="Text → File")
785
- ttf_ext = gr.Dropdown(["txt","docx","csv"], value="txt", label="File type")
786
- ttf_btn = gr.Button("Create File")
787
- ttf_out = gr.File(label="Download")
788
- ttf_btn.click(ui_text_to_file, [ttf_text, ttf_ext], [ttf_out])
789
-
790
- with gr.Tab("API & Keys"):
791
- gr.Markdown("### Your API Key")
792
- key_box = gr.Textbox(value=get_default_api_key(), label="X-API-Key", interactive=False)
793
- gr.Markdown("**Use with header `X-API-Key` on endpoints:** `/api/chat`, `/api/text-to-image`")
794
- gr.Markdown("**UI Port:** 7860 &nbsp;&nbsp; **API Port:** 7861")
795
- gr.Markdown("**Server Device:** " + DEVICE)
796
-
797
- return demo
798
-
799
- # =========================================
800
- # Launch FastAPI + Gradio together
801
- # =========================================
802
- def start_servers():
803
- demo = build_gradio()
804
- # Mount Gradio root hint
805
- @api.get("/")
806
- def root():
807
- return {"message": "Go to the Gradio UI on port 7860. API lives on port 7861."}
808
-
809
- # Run Gradio as background thread
810
- def run_gradio():
811
- demo.queue().launch(server_name="0.0.0.0", server_port=7860, show_api=False, share=False)
812
-
813
- th = threading.Thread(target=run_gradio, daemon=True)
814
- th.start()
815
- # Run FastAPI (uvicorn)
816
- uvicorn.run(api, host="0.0.0.0", port=7861)
817
 
818
  if __name__ == "__main__":
819
- print("Close-to-Human Multimodal AI — starting…")
820
- print("Device:", DEVICE)
821
- print("API key:", get_default_api_key())
822
- start_servers()
 
1
+ #!/usr/bin/env python3
2
+ # app.py - Front-end dashboard for Multimodular v7 (multimodal)
3
+ # Place this file alongside your multimodular module (compact or expanded).
4
+
5
+ import os, time, sys, json, pathlib
6
+
7
+ # ---- Config: brain module names to try ----
8
+ CANDIDATE_MODULES = [
9
+ "multimodular_modul_v7", # compact name used earlier
10
+ "multimodular_modul_v7_expanded", # expanded package name used earlier
11
+ "multimodular_modul version 7.0", # fallback if you saved exact name (unlikely)
12
+ ]
13
+
14
+ # ---- Boot splash ----
15
+ def boot_splash():
16
+ os.system("cls" if os.name == "nt" else "clear")
17
+ logo = r"""
18
+ ██████╗██╗ ██╗██████╗
19
+ ██╔════╝██║ ██║██╔══██╗
20
+ ██║ ███████║██████╔╝
21
+ ██║ ██╔══██║██╔═══╝
22
+ ╚██████╗██║ ██║██║
23
+ ╚═════╝╚═╝ ╚═╝╚═╝
24
+ Close-to-Human Brain v7.0
25
+ """
26
+ print(logo)
27
+ print("Initializing Universal Brain...")
28
+ steps = [
29
+ "Loading Core Modules",
30
+ "Starting Local DB",
31
+ "Bringing up CTB pipeline",
32
+ "Starting Global Sync (if configured)",
33
+ "Activating Creative Skill Vault",
34
+ "Launching Dashboard"
35
+ ]
36
+ for s in steps:
37
+ print(" →", s + "...")
38
+ time.sleep(0.6)
39
+ print("\n✅ Ready!\n")
40
+ time.sleep(0.3)
41
+
42
+ # ---- Adaptive loader for your brain module ----
43
+ def load_brain():
44
+ for name in CANDIDATE_MODULES:
45
+ try:
46
+ mod = __import__(name)
47
+ agent = None
48
+ # common exported instances/names:
49
+ if hasattr(mod, "AGENT"):
50
+ agent = getattr(mod, "AGENT")
51
+ elif hasattr(mod, "agent"):
52
+ agent = getattr(mod, "agent")
53
+ else:
54
+ # try to instantiate a class if present
55
+ cls_names = ["SuperAgentV7", "SuperAgent", "MultimodalBrain", "Agent", "Brain"]
56
+ for cls in cls_names:
57
+ if hasattr(mod, cls):
58
+ try:
59
+ agent = getattr(mod, cls)()
60
+ break
61
+ except Exception:
62
+ agent = None
63
+ # as last resort, if module defines functions, return module as agent
64
+ if agent is None:
65
+ agent = mod
66
+ print(f"[INFO] Loaded brain module: {name}")
67
+ return agent
68
+ except Exception:
69
+ continue
70
+ print("[WARN] Could not auto-import expected brain module names.")
71
+ print("Place your multimodular module in the same folder and name it one of:", ", ".join(CANDIDATE_MODULES))
72
+ return None
73
+
74
+ # ---- Helpers: flexible invocation for common brain actions ----
75
+ def brain_call(agent, fn_names, *args, **kwargs):
76
+ """Try to call first available function name on agent; return (ok, result)."""
77
+ if agent is None:
78
+ return False, "Brain not loaded"
79
+ for fn in fn_names:
80
+ if callable(getattr(agent, fn, None)):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  try:
82
+ return True, getattr(agent, fn)(*args, **kwargs)
83
+ except Exception as e:
84
+ return False, f"error calling {fn}: {e}"
85
+ # If agent itself exposes a 'ctb_handle' as attribute inside (e.g., agent.chb.ctb_handle)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  try:
87
+ # try nested common path: agent.chb.ctb_handle
88
+ chb = getattr(agent, "chb", None)
89
+ if chb:
90
+ for fn in fn_names:
91
+ if callable(getattr(chb, fn, None)):
92
+ try:
93
+ return True, getattr(chb, fn)(*args, **kwargs)
94
+ except Exception as e:
95
+ return False, f"error calling chb.{fn}: {e}"
96
  except Exception:
97
+ pass
98
+ return False, f"none of {fn_names} found on agent"
99
+
100
+ # ---- UI functions ----
101
+ menus = {
102
+ "1": "💬 Chat with AI (All Features in One Chat)",
103
+ "2": "🔎 Search Knowledge Base",
104
+ "3": "📤 Upload Media for Learning",
105
+ "4": "💾 Backup / Restore Brain (download backup)",
106
+ "5": "🎨 View Creative Skill Vault (top skills)",
107
+ "6": "🔁 Global Brain Sync Status",
108
+ "7": "🛠 Developer API Options",
109
+ "8": "📴 Offline Mode / Toggle",
110
+ "9": "❌ Exit"
111
+ }
112
+
113
+ def show_menu():
114
+ print("=== CHB v7.0 Main Menu ===")
115
+ for k in sorted(menus.keys(), key=int):
116
+ print(f"[{k}] {menus[k]}")
117
+
118
+ # ---- Media helpers (simple) ----
119
+ def read_file_as_payload(path):
120
+ p = pathlib.Path(path)
121
+ if not p.exists():
122
+ return None, f"file not found: {path}"
123
+ # minimal payload: path & size
124
+ try:
125
+ meta = {"path": str(p.resolve()), "size": p.stat().st_size}
126
+ return {"path": str(p.resolve()), "meta": meta}, None
127
+ except Exception as e:
128
+ return None, f"read error: {e}"
129
+
130
+ # ---- Menu 1: Multimodal chat loop ----
131
+ def multimodal_chat(agent):
132
+ print("\n=== Multimodal AI Chat ===")
133
+ print("Type naturally. Special commands:")
134
+ print(" /upload <path> - attach a file (image, video, audio)")
135
+ print(" /search <query> - run user-device search (plan + return style)")
136
+ print(" /skills <tag> - show top creative skills for tag")
137
+ print(" /backup - create a new backup and show path")
138
+ print(" /help - show this help")
139
+ print(" /exit - return to main menu\n")
140
+ while True:
141
+ try:
142
+ user = input("You: ").strip()
143
+ except (KeyboardInterrupt, EOFError):
144
+ print("\nReturning to main menu.")
145
+ return
146
+ if not user:
147
+ continue
148
+ if user.lower() in ("/exit", "exit", "quit"):
149
+ print("Returning to main menu.\n")
150
+ return
151
+ if user.startswith("/upload "):
152
+ path = user[len("/upload "):].strip().strip('"').strip("'")
153
+ payload, err = read_file_as_payload(path)
154
+ if err:
155
+ print("Error:", err); continue
156
+ # Build a simple plan_results-like structure and submit to brain
157
+ # plan_results should include images/videos/audios lists if agent expects that shape
158
+ plan_results = {}
159
+ suffix = pathlib.Path(path).suffix.lower()
160
+ if suffix in (".png", ".jpg", ".jpeg", ".webp", ".bmp"):
161
+ plan_results["images"] = [{"path": payload["path"], "quality_score": 0.9, "caption": "", "tags": []}]
162
+ elif suffix in (".mp4", ".mov", ".mkv", ".webm"):
163
+ plan_results["videos"] = [{"path": payload["path"], "quality_score": 0.8, "caption": "", "tags": []}]
164
+ elif suffix in (".mp3", ".wav", ".m4a", ".ogg"):
165
+ plan_results["audios"] = [{"path": payload["path"], "quality_score": 0.8, "caption": "", "tags": []}]
166
+ else:
167
+ plan_results["files"] = [{"path": payload["path"], "meta": payload["meta"]}]
168
+ ok, res = brain_call(agent, ["submit_plan_results", "handle_plan_results", "submit_results", "submit_plan"], plan_id="upload_"+str(int(time.time())), results=plan_results)
169
+ if ok:
170
+ print("AI: (processed upload) ->", res)
171
+ else:
172
+ print("AI: upload processed locally, but brain call failed:", res)
173
+ continue
174
+ if user.startswith("/search "):
175
+ q = user[len("/search "):].strip()
176
+ ok, plan = brain_call(agent, ["plan_search", "plan"], q)
177
+ if ok:
178
+ print("AI: Generated search plan. (Run this plan on client and submit results.)")
179
+ print(json.dumps(plan, indent=2) if isinstance(plan, dict) else plan)
180
+ else:
181
+ print("AI: search plan generation failed:", plan)
182
+ continue
183
+ if user.startswith("/skills "):
184
+ tag = user[len("/skills "):].strip()
185
+ ok, skills = brain_call(agent, ["top_skills", "top_skill", "top_by_tag"], tag, 5)
186
+ if ok:
187
+ print("Top skills for", tag, ":", skills)
188
+ else:
189
+ print("Could not fetch skills:", skills)
190
+ continue
191
+ if user.strip() == "/backup":
192
+ ok, path = brain_call(agent, ["download_latest_backup", "latest_backup", "get_latest_backup"])
193
+ if ok and path:
194
+ print("Latest backup path:", path)
195
+ else:
196
+ # try to create a new backup if method available
197
+ ok2, created = brain_call(agent, ["backup_create", "create_backup", "create_backup_zip"])
198
+ if ok2:
199
+ print("Created backup:", created)
200
+ else:
201
+ print("Backup not available:", path or created)
202
+ continue
203
+ if user.strip() == "/help":
204
+ print("Commands: /upload, /search, /skills, /backup, /exit")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
+ # Regular freeform input: call ctb_handle if present, else agent.chat or agent.chat()
208
+ # Prefer 'ctb_handle' (Close-to-Human Brain multimodal pipeline), fall back to 'chat' or 'plan_search'
209
+ ok, resp = brain_call(agent, ["ctb_handle", "handle_input", "chat", "chat_message", "chat_query"], input_data=user)
210
+ if not ok:
211
+ # try more permissive call signatures
212
+ try:
213
+ # some agents expect chat(text)
214
+ resp = agent.chat(user)
215
+ print("AI:", resp)
216
+ except Exception as e:
217
+ print("AI call failed:", resp)
218
+ else:
219
+ print("AI:", resp)
220
+
221
+ # ---- Menus 2..9 simple wrappers that call brain functions if present ----
222
+ def menu_search_kb(agent):
223
+ q = input("Enter search query: ").strip()
224
+ if not q: return
225
+ ok, res = brain_call(agent, ["search_facts", "facts_search", "query_facts"], q)
226
+ if ok:
227
+ print("Results:", res)
228
  else:
229
+ print("Search failed:", res)
230
+
231
+ def menu_upload_media(agent):
232
+ path = input("Path to media file: ").strip()
233
+ if not path: return
234
+ payload, err = read_file_as_payload(path)
235
+ if err:
236
+ print("Error:", err); return
237
+ # submit via same upload command as chat
238
+ plan_results = {}
239
+ suffix = pathlib.Path(path).suffix.lower()
240
+ if suffix in (".png", ".jpg", ".jpeg", ".webp", ".bmp"):
241
+ plan_results["images"] = [{"path": payload["path"], "quality_score": 0.9}]
242
+ elif suffix in (".mp4", ".mov", ".mkv"):
243
+ plan_results["videos"] = [{"path": payload["path"], "quality_score": 0.8}]
244
+ elif suffix in (".mp3", ".wav"):
245
+ plan_results["audios"] = [{"path": payload["path"], "quality_score": 0.8}]
246
+ else:
247
+ plan_results["files"] = [{"path": payload["path"], "meta": payload["meta"]}]
248
+ ok, res = brain_call(agent, ["submit_plan_results", "handle_plan_results"], plan_id="manual_upload_"+str(int(time.time())), results=plan_results)
249
+ if ok:
250
+ print("Upload processed:", res)
251
+ else:
252
+ print("Upload failed:", res)
 
253
 
254
+ def menu_backup_download(agent):
255
+ ok, p = brain_call(agent, ["download_latest_backup", "latest_backup", "get_latest_backup"])
256
+ if ok and p:
257
+ print("Latest backup:", p)
258
+ else:
259
+ print("No backup available or call failed:", p)
 
 
260
 
261
+ def menu_view_vault(agent):
262
+ tag = input("Enter skill tag (or blank to list all): ").strip()
263
+ if tag:
264
+ ok, s = brain_call(agent, ["top_skills", "top_by_tag"], tag, 10)
265
+ else:
266
+ ok, s = brain_call(agent, ["list_skills", "get_skills"], )
267
+ if ok:
268
+ print("Skills:", s)
269
+ else:
270
+ print("Failed to retrieve skills:", s)
271
 
272
+ def menu_sync_status(agent):
273
+ ok, st = brain_call(agent, ["global_sync_status", "sync_status", "get_sync_status"])
274
+ if ok:
275
+ print("Global Sync Status:", st)
276
+ else:
277
+ print("Global sync status not available:", st)
278
+
279
+ def menu_dev_api(agent):
280
+ print("Developer API options:")
281
+ print(" 1) Add/Integrate module from file")
282
+ print(" 2) List modules")
283
+ choice = input("choice: ").strip()
284
+ if choice == "1":
285
+ path = input("Path to module (py or base64-wasm): ").strip()
286
+ payload, err = read_file_as_payload(path)
287
+ if err:
288
+ print("Error:", err); return
289
+ code = ""
290
+ try:
291
+ code = open(payload["path"], "rb").read().decode("utf-8")
292
+ except Exception:
293
+ import base64
294
+ code = base64.b64encode(open(payload["path"], "rb").read()).decode()
295
+ name = input("Module name (short): ").strip() or f"mod_{int(time.time())}"
296
+ ok, res = brain_call(agent, ["add_module", "integrate_module"], name, code, None)
297
+ print("Result:", res)
298
+ elif choice == "2":
299
+ ok, res = brain_call(agent, ["list_modules", "get_modules"])
300
+ print("Modules:", res if ok else "failed:"+str(res))
301
+ else:
302
+ print("cancel")
303
 
304
+ def menu_offline_toggle(agent):
305
+ ok, st = brain_call(agent, ["toggle_offline", "set_offline", "offline_toggle"])
306
+ if ok:
307
+ print("Offline toggled:", st)
308
  else:
309
+ print("Offline toggle not available; try starting/stopping network in your environment.")
310
+
311
+ # ---- Main loop ----
312
+ def main():
313
+ boot_splash()
314
+ agent = load_brain()
315
+ if agent is None:
316
+ print("Brain not loaded. You can still use app UI, but brain-dependent actions will fail.")
317
+ while True:
318
+ show_menu()
319
+ choice = input("Select: ").strip()
320
+ if choice == "1":
321
+ multimodal_chat(agent)
322
+ elif choice == "2":
323
+ menu_search_kb(agent)
324
+ elif choice == "3":
325
+ menu_upload_media(agent)
326
+ elif choice == "4":
327
+ menu_backup_download(agent)
328
+ elif choice == "5":
329
+ menu_view_vault(agent)
330
+ elif choice == "6":
331
+ menu_sync_status(agent)
332
+ elif choice == "7":
333
+ menu_dev_api(agent)
334
+ elif choice == "8":
335
+ menu_offline_toggle(agent)
336
+ elif choice == "9":
337
+ print("Goodbye.")
338
+ break
339
  else:
340
+ print("Unknown option; try again.\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
 
342
  if __name__ == "__main__":
343
+ main()