Spaces:

Princeaka
/

multimodal_module

Running

App Files Files Community

Princeaka commited on 25 days ago

Commit

0da93e3

verified ·

1 Parent(s): 58ee346

Update app.py

Browse files

Files changed (1) hide show

app.py +329 -808

app.py CHANGED Viewed

@@ -1,822 +1,343 @@
-# app.py — Close-to-Human Multimodal AI (Gradio + FastAPI)
-# Single-file, offline-friendly, CPU/GPU, secure API key, CHB memory.
-# NOTE: Uses only free/open models. First run will download weights.
-import os
-import io
-import gc
-import sys
-import json
-import time
-import base64
-import random
-import string
-import hashlib
-import threading
-from typing import List, Dict, Optional, Tuple
-# ===== Runtime device selection =====
-import torch
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32
-# ===== Web server (FastAPI) + UI (Gradio) =====
-import gradio as gr
-from fastapi import FastAPI, Header, HTTPException, UploadFile, File
-from fastapi.middleware.cors import CORSMiddleware
-import uvicorn
-# ===== Core AI libs =====
-from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
-from sentence_transformers import SentenceTransformer
-import faiss
-import networkx as nx
-# Vision & Diffusion
-from PIL import Image, ImageOps, ImageFont, ImageDraw
-from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionInpaintPipeline
-from transformers import BlipProcessor, BlipForConditionalGeneration
-# Audio
-from faster_whisper import WhisperModel
-import soundfile as sf
-import librosa
-# TTS / Voice cloning (XTTS v2)
-from TTS.api import TTS
-# Video
-import imageio
-import cv2
-from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
-# Files
-import numpy as np
-import pandas as pd
-from PyPDF2 import PdfReader
-import docx
-import openpyxl
-# Security
-from cryptography.fernet import Fernet
-# =========================================
-#      Paths & lightweight persistence
-# =========================================
-ROOT = os.environ.get("APP_ROOT", ".")
-DATA_DIR = os.path.join(ROOT, "data")
-os.makedirs(DATA_DIR, exist_ok=True)
-KEYS_FILE = os.path.join(DATA_DIR, "api_keys.json")
-FACTS_FILE = os.path.join(DATA_DIR, "facts.jsonl")
-KG_FILE = os.path.join(DATA_DIR, "knowledge_graph.json")
-MEM_INDEX_FILE = os.path.join(DATA_DIR, "faiss.index")
-MEM_META_FILE = os.path.join(DATA_DIR, "faiss_meta.json")
-FERNET_KEY_FILE = os.path.join(DATA_DIR, "fernet.key")
-# =========================================
-#          Security: API Keys
-# =========================================
-def load_or_create_fernet_key():
-    if os.path.exists(FERNET_KEY_FILE):
-        with open(FERNET_KEY_FILE, "rb") as f:
-            return f.read()
-    key = Fernet.generate_key()
-    with open(FERNET_KEY_FILE, "wb") as f:
-        f.write(key)
-    return key
-FERNET = Fernet(load_or_create_fernet_key())
-def _rand_key(n=25):
-    chars = string.ascii_letters + string.digits
-    return "".join(random.choice(chars) for _ in range(n))
-def load_keys():
-    if os.path.exists(KEYS_FILE):
-        with open(KEYS_FILE, "rb") as f:
-            enc = f.read()
-            if not enc:
-                return {}
             try:
-                data = FERNET.decrypt(enc).decode("utf-8")
-                return json.loads(data)
-            except Exception:
-                return {}
-    return {}
-def save_keys(d):
-    enc = FERNET.encrypt(json.dumps(d).encode("utf-8"))
-    with open(KEYS_FILE, "wb") as f:
-        f.write(enc)
-API_KEYS = load_keys()
-if not API_KEYS:
-    # First-run bootstrap default user
-    api_key = _rand_key(25)
-    API_KEYS["default_user"] = {"api_key": api_key, "created_at": time.time()}
-    save_keys(API_KEYS)
-def get_default_api_key():
-    return API_KEYS["default_user"]["api_key"]
-def verify_api_key(header_key: str):
-    for user, rec in API_KEYS.items():
-        if rec.get("api_key") == header_key:
-            return True
-    return False
-# =========================================
-#        CHB Memory: FAISS + KG + Facts
-# =========================================
-EMBEDDER = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", device=DEVICE)
-# FAISS memory
-if os.path.exists(MEM_INDEX_FILE) and os.path.exists(MEM_META_FILE):
     try:
-        index = faiss.read_index(MEM_INDEX_FILE)
-        with open(MEM_META_FILE, "r") as f:
-            mem_meta = json.load(f)
     except Exception:
-        index = faiss.IndexFlatL2(384)
-        mem_meta = []
-else:
-    index = faiss.IndexFlatL2(384)
-    mem_meta = []
-def mem_add(text: str, meta: Dict):
-    vec = EMBEDDER.encode([text])
-    index.add(np.array(vec, dtype="float32"))
-    mem_meta.append({"text": text, "meta": meta, "ts": time.time()})
-    faiss.write_index(index, MEM_INDEX_FILE)
-    with open(MEM_META_FILE, "w") as f:
-        json.dump(mem_meta, f)
-def mem_search(query: str, top_k=5):
-    if index.ntotal == 0:
-        return []
-    q = EMBEDDER.encode([query])
-    D, I = index.search(np.array(q, dtype="float32"), top_k)
-    results = []
-    for d, i in zip(D[0], I[0]):
-        if i < len(mem_meta):
-            results.append((float(d), mem_meta[i]))
-    return results
-# Knowledge Graph
-if os.path.exists(KG_FILE):
-    with open(KG_FILE, "r") as f:
-        KG_data = json.load(f)
-        KG = nx.DiGraph()
-        KG.add_nodes_from(KG_data["nodes"])
-        KG.add_edges_from([(e["u"], e["v"], e["data"]) for e in KG_data["edges"]])
-else:
-    KG = nx.DiGraph()
-def kg_save():
-    data = {
-        "nodes": list(KG.nodes()),
-        "edges": [{"u": u, "v": v, "data": KG[u][v]} for u, v in KG.edges()]
-    }
-    with open(KG_FILE, "w") as f:
-        json.dump(data, f)
-def kg_add_fact(subject, predicate, obj, confidence=1.0, source="user"):
-    KG.add_node(subject)
-    KG.add_node(obj)
-    KG.add_edge(subject, obj, data={"predicate": predicate, "confidence": confidence, "source": source, "ts": time.time()})
-    kg_save()
-# Versioned facts (JSONL)
-def add_fact(subject, predicate, obj, confidence=1.0, source="system"):
-    record = {
-        "ts": time.time(),
-        "subject": subject,
-        "predicate": predicate,
-        "object": obj,
-        "confidence": confidence,
-        "source": source,
-    }
-    with open(FACTS_FILE, "a") as f:
-        f.write(json.dumps(record) + "\n")
-# =========================================
-#         Core Models (loaded lazily)
-# =========================================
-LLM_NAME = os.environ.get("LLM_NAME", "google/flan-t5-base")  # CPU-friendly
-TEXT_GEN = None
-BLIP_PROC = None
-BLIP_MODEL = None
-SD_TXT2IMG = None
-SD_IMG2IMG = None
-SD_INPAINT = None
-WHISPER = None
-TTS_MODEL = None
-def load_text_llm():
-    global TEXT_GEN
-    if TEXT_GEN is None:
-        TEXT_GEN = pipeline("text2text-generation", model=LLM_NAME, device=0 if DEVICE=="cuda" else -1)
-    return TEXT_GEN
-def load_blip():
-    global BLIP_PROC, BLIP_MODEL
-    if BLIP_MODEL is None:
-        BLIP_PROC = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-        BLIP_MODEL = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(DEVICE)
-    return BLIP_PROC, BLIP_MODEL
-def load_sd_txt2img():
-    global SD_TXT2IMG
-    if SD_TXT2IMG is None:
-        SD_TXT2IMG = StableDiffusionPipeline.from_pretrained(
-            "runwayml/stable-diffusion-v1-5",
-            torch_dtype=torch.float16 if DEVICE=="cuda" else torch.float32
-        )
-        SD_TXT2IMG = SD_TXT2IMG.to(DEVICE)
-    return SD_TXT2IMG
-def load_sd_img2img():
-    global SD_IMG2IMG
-    if SD_IMG2IMG is None:
-        SD_IMG2IMG = StableDiffusionImg2ImgPipeline.from_pretrained(
-            "runwayml/stable-diffusion-v1-5",
-            torch_dtype=torch.float16 if DEVICE=="cuda" else torch.float32
-        ).to(DEVICE)
-    return SD_IMG2IMG
-def load_sd_inpaint():
-    global SD_INPAINT
-    if SD_INPAINT is None:
-        SD_INPAINT = StableDiffusionInpaintPipeline.from_pretrained(
-            "runwayml/stable-diffusion-inpainting",
-            torch_dtype=torch.float16 if DEVICE=="cuda" else torch.float32
-        ).to(DEVICE)
-    return SD_INPAINT
-def load_whisper():
-    global WHISPER
-    if WHISPER is None:
-        # faster-whisper model names: tiny, base, small, medium, large-v3
-        model_size = os.environ.get("WHISPER_SIZE", "small")
-        WHISPER = WhisperModel(model_size, device=DEVICE, compute_type="float16" if DEVICE=="cuda" else "int8")
-    return WHISPER
-def load_tts():
-    global TTS_MODEL
-    if TTS_MODEL is None:
-        # Multilingual XTTS v2 (supports voice cloning)
-        TTS_MODEL = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2")
-    return TTS_MODEL
-# =========================================
-#             CHB Pipeline
-# =========================================
-def chb_enrich_context(query: str) -> str:
-    # Retrieve top-3 from FAISS to enrich prompt
-    hits = mem_search(query, top_k=3)
-    notes = []
-    for d, meta in hits:
-        notes.append(f"[mem@{time.strftime('%Y-%m-%d', time.localtime(meta['ts']))}] {meta['text']}")
-    return "\n".join(notes)
-def chb_generate_reply(user_text: str) -> str:
-    # Build prompt with memory enrichment
-    ctx = chb_enrich_context(user_text)
-    prompt = ('You are a helpful, warm assistant. Use the references if useful.\n'
-              'References:\n'
-              f'{ctx}\n\n'
-              f'User: {user_text}\n'
-              'Assistant:')
-    gen = load_text_llm()
-    out = gen(prompt, max_new_tokens=256)
-    reply = out[0]["generated_text"]
-    # Store interaction in memory
-    mem_add(user_text, {"type": "user"})
-    mem_add(reply, {"type": "assistant"})
-    return reply
-def chb_store_user_fact(text: str):
-    # Simple detection: "my name is X"
-    lower = text.lower()
-    if "my name is" in lower:
-        name = text.split("my name is", 1)[1].strip().split()[0]
-        add_fact("user", "name", name, confidence=1.0, source="user")
-        kg_add_fact("user", "name", name, confidence=1.0, source="user")
-        mem_add(f"User name = {name}", {"type":"fact"})
-# =========================================
-#         Multimodal Feature Functions
-# =========================================
-def image_to_text(img: Image.Image) -> str:
-    proc, model = load_blip()
-    inputs = proc(images=img, return_tensors="pt").to(DEVICE)
-    out = model.generate(**inputs, max_new_tokens=64)
-    caption = proc.decode(out[0], skip_special_tokens=True)
-    mem_add(f"IMG2TXT: {caption}", {"type":"img2txt"})
-    return caption
-def text_to_image(prompt: str, steps: int=20, guidance: float=7.5, seed: Optional[int]=None) -> Image.Image:
-    pipe = load_sd_txt2img()
-    if seed is None:
-        seed = random.randint(0, 2**32-1)
-    generator = torch.Generator(device=DEVICE).manual_seed(seed)
-    img = pipe(prompt, num_inference_steps=steps, guidance_scale=guidance, generator=generator).images[0]
-    mem_add(f"TXT2IMG prompt: {prompt}", {"type":"txt2img"})
-    return img
-def edit_image(img: Image.Image, prompt: str, strength: float=0.6, steps: int=20) -> Image.Image:
-    pipe = load_sd_img2img()
-    img = img.convert("RGB")
-    edited = pipe(prompt=prompt, image=img, strength=strength, num_inference_steps=steps).images[0]
-    mem_add(f"IMGEDIT: {prompt}", {"type":"imgedit"})
-    return edited
-def inpaint_image(img: Image.Image, mask: Image.Image, prompt: str, steps: int=20) -> Image.Image:
-    pipe = load_sd_inpaint()
-    img = img.convert("RGB")
-    mask = mask.convert("RGB")
-    out = pipe(prompt=prompt, image=img, mask_image=mask, num_inference_steps=steps).images[0]
-    mem_add(f"INPAINT: {prompt}", {"type":"inpaint"})
-    return out
-def voice_to_text(audio_path: str) -> str:
-    model = load_whisper()
-    segments, info = model.transcribe(audio_path, beam_size=5)
-    text = " ".join([seg.text for seg in segments])
-    mem_add(f"ASR: {text[:200]}", {"type":"asr"})
-    return text
-def text_to_voice(text: str, ref_audio: Optional[str]=None, speaker: Optional[str]=None, out_path: Optional[str]=None) -> str:
-    tts = load_tts()
-    if out_path is None:
-        out_path = os.path.join(DATA_DIR, f"tts_{int(time.time())}.wav")
-    if ref_audio:
-        tts.tts_to_file(text=text, file_path=out_path, speaker_wav=ref_audio, language="en")
-    else:
-        # default voice
-        tts.tts_to_file(text=text, file_path=out_path, speaker=speaker or "female-en-5", language="en")
-    mem_add(f"TTS: {text[:120]}", {"type":"tts"})
-    return out_path
-def video_to_text(video_path: str, frames: int=8) -> str:
-    # Sample frames evenly, caption via BLIP, join
-    proc, model = load_blip()
-    cap = cv2.VideoCapture(video_path)
-    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or 1
-    idxs = np.linspace(0, total-1, num=min(frames, total), dtype=int)
-    captions = []
-    for i in idxs:
-        cap.set(cv2.CAP_PROP_POS_FRAMES, int(i))
-        ok, frame = cap.read()
-        if not ok:
             continue
-        img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
-        inputs = proc(images=img, return_tensors="pt").to(DEVICE)
-        out = model.generate(**inputs, max_new_tokens=32)
-        cap_text = proc.decode(out[0], skip_special_tokens=True)
-        captions.append(cap_text)
-    cap.release()
-    summary = " | ".join(captions) if captions else "No frames read."
-    mem_add(f"VID2TXT: {summary[:200]}", {"type":"vid2txt"})
-    return summary
-def text_to_video_clip(prompt: str, seconds: int=3, fps: int=8) -> str:
-    # Lightweight approach: generate N images via SD and stitch into GIF/MP4
-    frames = []
-    n = seconds * fps
-    for i in range(n):
-        seed = random.randint(0, 2**32-1)
-        img = text_to_image(prompt + f", cinematic frame {i+1}", steps=15, guidance=7.0, seed=seed)
-        frames.append(np.array(img))
-    out_path = os.path.join(DATA_DIR, f"t2v_{int(time.time())}.mp4")
-    imageio.mimwrite(out_path, frames, fps=fps, quality=7)
-    mem_add(f"T2V: {prompt}", {"type":"t2v"})
-    return out_path
-def video_edit_caption(video_path: str, caption_text: str) -> str:
-    clip = VideoFileClip(video_path)
-    txt = TextClip(caption_text, fontsize=40, color="white").set_duration(clip.duration).set_position(("center", "bottom"))
-    out = CompositeVideoClip([clip, txt])
-    out_path = os.path.join(DATA_DIR, f"captioned_{int(time.time())}.mp4")
-    out.write_videofile(out_path, codec="libx264", audio_codec="aac", verbose=False, logger=None)
-    mem_add(f"VIDCAP: {caption_text[:120]}", {"type":"vidcap"})
-    return out_path
-def code_to_text(code: str, lang: str="python") -> str:
-    prompt = f"Explain this {lang} code step by step. Be concise.\n\n```{lang}\n{code}\n```"
-    return chb_generate_reply(prompt)
-def text_to_code(spec: str, lang: str="python") -> str:
-    prompt = f"Write {lang} code that satisfies the following requirement. Provide only code:\n{spec}"
-    return chb_generate_reply(prompt)
-def code_to_image(code: str, lang: str="python") -> Image.Image:
-    from pygments import highlight
-    from pygments.lexers import get_lexer_by_name
-    from pygments.formatters import ImageFormatter
-    lexer = get_lexer_by_name(lang, stripall=True)
-    formatter = ImageFormatter(font_name="DejaVu Sans Mono", line_numbers=True)
-    img_bytes = highlight(code, lexer, formatter)
-    img = Image.open(io.BytesIO(img_bytes))
-    mem_add(f"CODE2IMG {lang}", {"type":"code2img"})
-    return img
-def voice_to_code(audio_path: str, lang: str="python") -> str:
-    spec = voice_to_text(audio_path)
-    return text_to_code(spec, lang)
-def emoji_interpret(text: str) -> str:
-    import emoji as em
-    # Convert emoji to description
-    return em.demojize(text, language='en')
-def emoji_generate(desc: str) -> Image.Image:
-    # Generate sticker-like image via SD
-    return text_to_image(f"high quality 2D sticker emoji of: {desc}, white background, bold outline, vector style", steps=25, guidance=8.5)
-def file_reader(file_path: str) -> str:
-    ext = os.path.splitext(file_path)[1].lower()
-    if ext in [".txt", ".md", ".py", ".json", ".csv"]:
-        with open(file_path, "r", errors="ignore") as f:
-            return f.read()
-    if ext == ".pdf":
-        reader = PdfReader(file_path)
-        return "\n".join(page.extract_text() or "" for page in reader.pages)
-    if ext in [".docx"]:
-        d = docx.Document(file_path)
-        return "\n".join(p.text for p in d.paragraphs)
-    if ext in [".xlsx"]:
-        df = pd.read_excel(file_path)
-        return df.to_csv(index=False)
-    return "Unsupported file type."
-def file_to_text(upload) -> str:
-    with open(upload.name, "wb") as f:
-        f.write(upload.read())
-    return file_reader(upload.name)
-def text_to_file(text: str, ext: str="txt") -> str:
-    out = os.path.join(DATA_DIR, f"file_{int(time.time())}.{ext}")
-    if ext == "txt":
-        with open(out, "w", encoding="utf-8") as f:
-            f.write(text)
-    elif ext == "docx":
-        d = docx.Document()
-        d.add_paragraph(text)
-        d.save(out)
-    elif ext == "csv":
-        with open(out, "w", encoding="utf-8") as f:
-            f.write(text)
     else:
-        with open(out, "w", encoding="utf-8") as f:
-            f.write(text)
-    return out
-# =========================================
-#           Start Chatting (All-in-One)
-# =========================================
-def auto_route(user_text: str, image: Optional[Image.Image], audio: Optional[str], video: Optional[str], file: Optional[str]):
-    # Unified multimodal router. Decide intent and call proper module.
-    reply_text = ""
-    reply_image = None
-    reply_audio = None
-    reply_video = None
-    # Learn simple facts
-    if user_text:
-        chb_store_user_fact(user_text)
-    # If image present and no explicit instruction -> caption it
-    if image is not None and (not user_text or "describe" in user_text.lower() or "what is" in user_text.lower()):
-        reply_text = image_to_text(image)
-    # If user asks to 'draw/make/generate' an image
-    elif user_text and any(k in user_text.lower() for k in ["draw", "generate image", "make an image", "text to image", "create a picture", "illustrate"]):
-        reply_image = text_to_image(user_text)
-    # Voice present
-    elif audio is not None:
-        txt = voice_to_text(audio)
-        # If user asked for code via voice
-        if "code" in txt.lower() or "function" in txt.lower():
-            reply_text = text_to_code(txt)
-        else:
-            reply_text = chb_generate_reply(txt)
-    # Video present
-    elif video is not None:
-        reply_text = video_to_text(video)
-    # File present
-    elif file is not None:
-        content = file_reader(file)
-        reply_text = chb_generate_reply(f"Summarize this file:\n{content[:8000]}")
-    # Pure text case
     else:
-        # Emojis only?
-        stripped = user_text.strip() if user_text else ""
-        if stripped and all(ord(c) > 1000 or c in ":)-(" or c.isascii()==False for c in stripped):
-            reply_text = emoji_interpret(stripped)
         else:
-            reply_text = chb_generate_reply(user_text or "Hello")
-    return reply_text, reply_image, reply_audio, reply_video
-# =========================================
-#               FastAPI App
-# =========================================
-api = FastAPI(title="Close-to-Human Multimodal API")
-api.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"]
-)
-@api.get("/api/ping")
-def ping():
-    return {"ok": True, "device": DEVICE, "api_key_hint": get_default_api_key()[:6] + "***"}
-@api.post("/api/chat")
-def api_chat(message: str, x_api_key: Optional[str] = Header(None)):
-    if not x_api_key or not verify_api_key(x_api_key):
-        raise HTTPException(status_code=401, detail="Invalid API key")
-    text, img, aud, vid = auto_route(message, None, None, None, None)
-    return {"text": text}
-@api.post("/api/text-to-image")
-def api_t2i(prompt: str, x_api_key: Optional[str] = Header(None)):
-    if not x_api_key or not verify_api_key(x_api_key):
-        raise HTTPException(status_code=401, detail="Invalid API key")
-    img = text_to_image(prompt)
-    buf = io.BytesIO()
-    img.save(buf, format="PNG")
-    b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
-    return {"image_base64": b64}
-# =========================================
-#                Gradio UI
-# =========================================
-def ui_start_chat(user_text, image, audio, video, file):
-    audio_path = None
-    video_path = None
-    file_path = None
-    if audio is not None:
-        audio_path = audio
-    if video is not None:
-        video_path = video
-    if file is not None:
-        # gradio gives a temp path
-        file_path = file.name
-    text, img, aud, vid = auto_route(user_text, image, audio_path, video_path, file_path)
-    return text, img, aud, vid
-def ui_text_chat(prompt):
-    return chb_generate_reply(prompt)
-def ui_image_to_text(image):
-    return image_to_text(image)
-def ui_text_to_image(prompt, steps, guidance):
-    return text_to_image(prompt, steps=steps, guidance=guidance)
-def ui_image_edit(image, prompt, strength, steps):
-    return edit_image(image, prompt, strength=strength, steps=steps)
-def ui_inpaint(image, mask, prompt, steps):
-    return inpaint_image(image, mask, prompt, steps=steps)
-def ui_voice_to_text(audio):
-    return voice_to_text(audio)
-def ui_text_to_voice(text, ref_audio):
-    out = text_to_voice(text, ref_audio=ref_audio)
-    return out
-def ui_video_to_text(video):
-    return video_to_text(video)
-def ui_text_to_video(prompt, seconds, fps):
-    return text_to_video_clip(prompt, seconds=seconds, fps=fps)
-def ui_video_edit_caption(video, caption):
-    return video_edit_caption(video, caption)
-def ui_text_to_code(text, lang):
-    return text_to_code(text, lang)
-def ui_code_to_text(code, lang):
-    return code_to_text(code, lang)
-def ui_code_to_image(code, lang):
-    return code_to_image(code, lang)
-def ui_voice_to_code(audio, lang):
-    return voice_to_code(audio, lang)
-def ui_emoji_interpret(text):
-    return emoji_interpret(text)
-def ui_emoji_generate(desc):
-    return emoji_generate(desc)
-def ui_file_reader(file):
-    return file_reader(file.name)
-def ui_file_to_text(file):
-    return file_reader(file.name)
-def ui_text_to_file(text, ext):
-    return text_to_file(text, ext)
-def build_gradio():
-    with gr.Blocks(title="Close-to-Human Multimodal AI") as demo:
-        gr.Markdown("## Start chatting AI this — all-in-one, natural multimodal chat")
-        with gr.Tab("Start Chatting AI"):
-            with gr.Row():
-                user_text = gr.Textbox(label="Say anything… (text, emojis, ask for code, etc.)")
-            with gr.Row():
-                image = gr.Image(label="Optional image", type="pil")
-                audio = gr.Audio(label="Optional audio (wav/mp3)", type="filepath")
-            with gr.Row():
-                video = gr.Video(label="Optional video", format="mp4")
-                file = gr.File(label="Optional file")
-            go = gr.Button("Send")
-            out_text = gr.Textbox(label="AI reply (text)")
-            out_img = gr.Image(label="AI reply (image)")
-            out_aud = gr.Audio(label="AI reply (audio)", type="filepath")
-            out_vid = gr.Video(label="AI reply (video)")
-            go.click(ui_start_chat, [user_text, image, audio, video, file], [out_text, out_img, out_aud, out_vid])
-        with gr.Tab("Text Chat"):
-            prompt = gr.Textbox(label="Prompt")
-            btn = gr.Button("Ask")
-            answer = gr.Textbox(label="Answer")
-            btn.click(ui_text_chat, [prompt], [answer])
-        with gr.Tab("Image → Text"):
-            img = gr.Image(label="Image", type="pil")
-            btn2 = gr.Button("Caption")
-            cap = gr.Textbox(label="Caption")
-            btn2.click(ui_image_to_text, [img], [cap])
-        with gr.Tab("Text → Image"):
-            ti = gr.Textbox(label="Prompt")
-            steps = gr.Slider(5, 50, value=20, step=1, label="Steps")
-            guidance = gr.Slider(1.0, 12.0, value=7.5, step=0.5, label="Guidance")
-            btn3 = gr.Button("Generate")
-            img_out = gr.Image(label="Image")
-            btn3.click(ui_text_to_image, [ti, steps, guidance], [img_out])
-        with gr.Tab("Image Editing & Painting"):
-            base = gr.Image(label="Base image", type="pil")
-            edp = gr.Textbox(label="Edit prompt")
-            strength = gr.Slider(0.1, 1.0, value=0.6, step=0.1, label="Strength")
-            steps_e = gr.Slider(5, 50, value=20, step=1, label="Steps")
-            btn4 = gr.Button("Edit")
-            out_e = gr.Image(label="Edited image")
-            btn4.click(ui_image_edit, [base, edp, strength, steps_e], [out_e])
-        with gr.Tab("Image Inpainting"):
-            base2 = gr.Image(label="Base image", type="pil")
-            mask = gr.Image(label="Mask (white=paint)", type="pil")
-            inp = gr.Textbox(label="Inpaint prompt")
-            steps_i = gr.Slider(5, 50, value=20, step=1, label="Steps")
-            btn5 = gr.Button("Inpaint")
-            out_i = gr.Image(label="Inpainted")
-            btn5.click(ui_inpaint, [base2, mask, inp, steps_i], [out_i])
-        with gr.Tab("Voice → Text"):
-            a_in = gr.Audio(label="Audio", type="filepath")
-            a_btn = gr.Button("Transcribe")
-            a_out = gr.Textbox(label="Transcription")
-            a_btn.click(ui_voice_to_text, [a_in], [a_out])
-        with gr.Tab("Text → Voice"):
-            ttv_text = gr.Textbox(label="Text")
-            ref = gr.Audio(label="Reference voice (optional)", type="filepath")
-            ttv_btn = gr.Button("Synthesize")
-            ttv_out = gr.Audio(label="Speech", type="filepath")
-            ttv_btn.click(ui_text_to_voice, [ttv_text, ref], [ttv_out])
-        with gr.Tab("Voice Cloning → Code"):
-            vcc_in = gr.Audio(label="Instruction audio", type="filepath")
-            vcc_lang = gr.Dropdown(choices=["python","javascript","html","css","java","c","cpp","go","rust"], value="python", label="Language")
-            vcc_btn = gr.Button("Transcribe & Code")
-            vcc_out = gr.Code(label="Generated code")
-            vcc_btn.click(ui_voice_to_code, [vcc_in, vcc_lang], [vcc_out])
-        with gr.Tab("Video → Text"):
-            v_in = gr.Video(label="Video")
-            v_btn = gr.Button("Describe")
-            v_out = gr.Textbox(label="Description")
-            v_btn.click(ui_video_to_text, [v_in], [v_out])
-        with gr.Tab("Text → Video Clip"):
-            t2v_prompt = gr.Textbox(label="Prompt")
-            t2v_sec = gr.Slider(1, 5, value=3, step=1, label="Seconds")
-            t2v_fps = gr.Slider(4, 12, value=8, step=1, label="FPS")
-            t2v_btn = gr.Button("Generate Clip")
-            t2v_out = gr.Video(label="Video")
-            t2v_btn.click(ui_text_to_video, [t2v_prompt, t2v_sec, t2v_fps], [t2v_out])
-        with gr.Tab("Video Editing / Caption"):
-            ve_in = gr.Video(label="Video")
-            ve_text = gr.Textbox(label="Caption text")
-            ve_btn = gr.Button("Overlay Caption")
-            ve_out = gr.Video(label="Captioned Video")
-            ve_btn.click(ui_video_edit_caption, [ve_in, ve_text], [ve_out])
-        with gr.Tab("Text ↔ Code"):
-            with gr.Row():
-                t2c_text = gr.Textbox(label="Requirement → Code")
-                t2c_lang = gr.Dropdown(["python","javascript","html","css","java","c","cpp","go","rust"], value="python")
-            t2c_btn = gr.Button("Generate Code")
-            t2c_out = gr.Code(label="Code")
-            t2c_btn.click(ui_text_to_code, [t2c_text, t2c_lang], [t2c_out])
-            gr.Markdown("---")
-            with gr.Row():
-                c2t_code = gr.Code(label="Code → Explain")
-                c2t_lang = gr.Dropdown(["python","javascript","html","css","java","c","cpp","go","rust"], value="python")
-            c2t_btn = gr.Button("Explain Code")
-            c2t_out = gr.Textbox(label="Explanation")
-            c2t_btn.click(ui_code_to_text, [c2t_code, c2t_lang], [c2t_out])
-            gr.Markdown("---")
-            with gr.Row():
-                c2i_code = gr.Code(label="Code → Image (rendered)")
-                c2i_lang = gr.Dropdown(["python","javascript","html","css","java","c","cpp","go","rust"], value="python")
-            c2i_btn = gr.Button("Render Image")
-            c2i_out = gr.Image(label="Code Image")
-            c2i_btn.click(ui_code_to_image, [c2i_code, c2i_lang], [c2i_out])
-        with gr.Tab("Emoji / Sticker / GIF"):
-            em_text = gr.Textbox(label="Emoji/Sticker/GIF (interpret)")
-            em_btn = gr.Button("Interpret")
-            em_out = gr.Textbox(label="Meaning")
-            em_btn.click(ui_emoji_interpret, [em_text], [em_out])
-            gr.Markdown("---")
-            em_gen = gr.Textbox(label="Describe a sticker to generate")
-            em_gen_btn = gr.Button("Generate Sticker")
-            em_gen_out = gr.Image(label="Sticker")
-            em_gen_btn.click(ui_emoji_generate, [em_gen], [em_gen_out])
-        with gr.Tab("File Reader / Convert"):
-            fr_file = gr.File(label="File")
-            fr_btn = gr.Button("Read File")
-            fr_out = gr.Textbox(label="File Content", lines=15)
-            fr_btn.click(ui_file_reader, [fr_file], [fr_out])
-            gr.Markdown("---")
-            ft_file = gr.File(label="File → Text")
-            ft_btn = gr.Button("Convert")
-            ft_out = gr.Textbox(label="Extracted Text", lines=15)
-            ft_btn.click(ui_file_to_text, [ft_file], [ft_out])
-            gr.Markdown("---")
-            ttf_text = gr.Textbox(label="Text → File")
-            ttf_ext = gr.Dropdown(["txt","docx","csv"], value="txt", label="File type")
-            ttf_btn = gr.Button("Create File")
-            ttf_out = gr.File(label="Download")
-            ttf_btn.click(ui_text_to_file, [ttf_text, ttf_ext], [ttf_out])
-        with gr.Tab("API & Keys"):
-            gr.Markdown("### Your API Key")
-            key_box = gr.Textbox(value=get_default_api_key(), label="X-API-Key", interactive=False)
-            gr.Markdown("**Use with header `X-API-Key` on endpoints:** `/api/chat`, `/api/text-to-image`")
-            gr.Markdown("**UI Port:** 7860 &nbsp;&nbsp; **API Port:** 7861")
-            gr.Markdown("**Server Device:** " + DEVICE)
-    return demo
-# =========================================
-#        Launch FastAPI + Gradio together
-# =========================================
-def start_servers():
-    demo = build_gradio()
-    # Mount Gradio root hint
-    @api.get("/")
-    def root():
-        return {"message": "Go to the Gradio UI on port 7860. API lives on port 7861."}
-    # Run Gradio as background thread
-    def run_gradio():
-        demo.queue().launch(server_name="0.0.0.0", server_port=7860, show_api=False, share=False)
-    th = threading.Thread(target=run_gradio, daemon=True)
-    th.start()
-    # Run FastAPI (uvicorn)
-    uvicorn.run(api, host="0.0.0.0", port=7861)
 if __name__ == "__main__":
-    print("Close-to-Human Multimodal AI — starting…")
-    print("Device:", DEVICE)
-    print("API key:", get_default_api_key())
-    start_servers()

+#!/usr/bin/env python3
+# app.py - Front-end dashboard for Multimodular v7 (multimodal)
+# Place this file alongside your multimodular module (compact or expanded).
+import os, time, sys, json, pathlib
+# ---- Config: brain module names to try ----
+CANDIDATE_MODULES = [
+    "multimodular_modul_v7",           # compact name used earlier
+    "multimodular_modul_v7_expanded",  # expanded package name used earlier
+    "multimodular_modul version 7.0",  # fallback if you saved exact name (unlikely)
+]
+# ---- Boot splash ----
+def boot_splash():
+    os.system("cls" if os.name == "nt" else "clear")
+    logo = r"""
+ ██████╗██╗  ██╗██████╗
+██╔════╝██║  ██║██╔══██╗
+██║     ███████║██████╔╝
+██║     ██╔══██║██╔═══╝
+╚██████╗██║  ██║██║
+ ╚═════╝╚═╝  ╚═╝╚═╝
+ Close-to-Human Brain v7.0
+"""
+    print(logo)
+    print("Initializing Universal Brain...")
+    steps = [
+        "Loading Core Modules",
+        "Starting Local DB",
+        "Bringing up CTB pipeline",
+        "Starting Global Sync (if configured)",
+        "Activating Creative Skill Vault",
+        "Launching Dashboard"
+    ]
+    for s in steps:
+        print(" →", s + "...")
+        time.sleep(0.6)
+    print("\n✅ Ready!\n")
+    time.sleep(0.3)
+# ---- Adaptive loader for your brain module ----
+def load_brain():
+    for name in CANDIDATE_MODULES:
+        try:
+            mod = __import__(name)
+            agent = None
+            # common exported instances/names:
+            if hasattr(mod, "AGENT"):
+                agent = getattr(mod, "AGENT")
+            elif hasattr(mod, "agent"):
+                agent = getattr(mod, "agent")
+            else:
+                # try to instantiate a class if present
+                cls_names = ["SuperAgentV7", "SuperAgent", "MultimodalBrain", "Agent", "Brain"]
+                for cls in cls_names:
+                    if hasattr(mod, cls):
+                        try:
+                            agent = getattr(mod, cls)()
+                            break
+                        except Exception:
+                            agent = None
+                # as last resort, if module defines functions, return module as agent
+                if agent is None:
+                    agent = mod
+            print(f"[INFO] Loaded brain module: {name}")
+            return agent
+        except Exception:
+            continue
+    print("[WARN] Could not auto-import expected brain module names.")
+    print("Place your multimodular module in the same folder and name it one of:", ", ".join(CANDIDATE_MODULES))
+    return None
+# ---- Helpers: flexible invocation for common brain actions ----
+def brain_call(agent, fn_names, *args, **kwargs):
+    """Try to call first available function name on agent; return (ok, result)."""
+    if agent is None:
+        return False, "Brain not loaded"
+    for fn in fn_names:
+        if callable(getattr(agent, fn, None)):
             try:
+                return True, getattr(agent, fn)(*args, **kwargs)
+            except Exception as e:
+                return False, f"error calling {fn}: {e}"
+    # If agent itself exposes a 'ctb_handle' as attribute inside (e.g., agent.chb.ctb_handle)
     try:
+        # try nested common path: agent.chb.ctb_handle
+        chb = getattr(agent, "chb", None)
+        if chb:
+            for fn in fn_names:
+                if callable(getattr(chb, fn, None)):
+                    try:
+                        return True, getattr(chb, fn)(*args, **kwargs)
+                    except Exception as e:
+                        return False, f"error calling chb.{fn}: {e}"
     except Exception:
+        pass
+    return False, f"none of {fn_names} found on agent"
+# ---- UI functions ----
+menus = {
+    "1": "💬 Chat with AI (All Features in One Chat)",
+    "2": "🔎 Search Knowledge Base",
+    "3": "📤 Upload Media for Learning",
+    "4": "💾 Backup / Restore Brain (download backup)",
+    "5": "🎨 View Creative Skill Vault (top skills)",
+    "6": "🔁 Global Brain Sync Status",
+    "7": "🛠 Developer API Options",
+    "8": "📴 Offline Mode / Toggle",
+    "9": "❌ Exit"
+}
+def show_menu():
+    print("=== CHB v7.0 Main Menu ===")
+    for k in sorted(menus.keys(), key=int):
+        print(f"[{k}] {menus[k]}")
+# ---- Media helpers (simple) ----
+def read_file_as_payload(path):
+    p = pathlib.Path(path)
+    if not p.exists():
+        return None, f"file not found: {path}"
+    # minimal payload: path & size
+    try:
+        meta = {"path": str(p.resolve()), "size": p.stat().st_size}
+        return {"path": str(p.resolve()), "meta": meta}, None
+    except Exception as e:
+        return None, f"read error: {e}"
+# ---- Menu 1: Multimodal chat loop ----
+def multimodal_chat(agent):
+    print("\n=== Multimodal AI Chat ===")
+    print("Type naturally. Special commands:")
+    print("  /upload <path>   - attach a file (image, video, audio)")
+    print("  /search <query>  - run user-device search (plan + return style)")
+    print("  /skills <tag>    - show top creative skills for tag")
+    print("  /backup          - create a new backup and show path")
+    print("  /help            - show this help")
+    print("  /exit            - return to main menu\n")
+    while True:
+        try:
+            user = input("You: ").strip()
+        except (KeyboardInterrupt, EOFError):
+            print("\nReturning to main menu.")
+            return
+        if not user:
+            continue
+        if user.lower() in ("/exit", "exit", "quit"):
+            print("Returning to main menu.\n")
+            return
+        if user.startswith("/upload "):
+            path = user[len("/upload "):].strip().strip('"').strip("'")
+            payload, err = read_file_as_payload(path)
+            if err:
+                print("Error:", err); continue
+            # Build a simple plan_results-like structure and submit to brain
+            # plan_results should include images/videos/audios lists if agent expects that shape
+            plan_results = {}
+            suffix = pathlib.Path(path).suffix.lower()
+            if suffix in (".png", ".jpg", ".jpeg", ".webp", ".bmp"):
+                plan_results["images"] = [{"path": payload["path"], "quality_score": 0.9, "caption": "", "tags": []}]
+            elif suffix in (".mp4", ".mov", ".mkv", ".webm"):
+                plan_results["videos"] = [{"path": payload["path"], "quality_score": 0.8, "caption": "", "tags": []}]
+            elif suffix in (".mp3", ".wav", ".m4a", ".ogg"):
+                plan_results["audios"] = [{"path": payload["path"], "quality_score": 0.8, "caption": "", "tags": []}]
+            else:
+                plan_results["files"] = [{"path": payload["path"], "meta": payload["meta"]}]
+            ok, res = brain_call(agent, ["submit_plan_results", "handle_plan_results", "submit_results", "submit_plan"], plan_id="upload_"+str(int(time.time())), results=plan_results)
+            if ok:
+                print("AI: (processed upload) ->", res)
+            else:
+                print("AI: upload processed locally, but brain call failed:", res)
+            continue
+        if user.startswith("/search "):
+            q = user[len("/search "):].strip()
+            ok, plan = brain_call(agent, ["plan_search", "plan"], q)
+            if ok:
+                print("AI: Generated search plan. (Run this plan on client and submit results.)")
+                print(json.dumps(plan, indent=2) if isinstance(plan, dict) else plan)
+            else:
+                print("AI: search plan generation failed:", plan)
+            continue
+        if user.startswith("/skills "):
+            tag = user[len("/skills "):].strip()
+            ok, skills = brain_call(agent, ["top_skills", "top_skill", "top_by_tag"], tag, 5)
+            if ok:
+                print("Top skills for", tag, ":", skills)
+            else:
+                print("Could not fetch skills:", skills)
+            continue
+        if user.strip() == "/backup":
+            ok, path = brain_call(agent, ["download_latest_backup", "latest_backup", "get_latest_backup"])
+            if ok and path:
+                print("Latest backup path:", path)
+            else:
+                # try to create a new backup if method available
+                ok2, created = brain_call(agent, ["backup_create", "create_backup", "create_backup_zip"])
+                if ok2:
+                    print("Created backup:", created)
+                else:
+                    print("Backup not available:", path or created)
+            continue
+        if user.strip() == "/help":
+            print("Commands: /upload, /search, /skills, /backup, /exit")
             continue
+        # Regular freeform input: call ctb_handle if present, else agent.chat or agent.chat()
+        # Prefer 'ctb_handle' (Close-to-Human Brain multimodal pipeline), fall back to 'chat' or 'plan_search'
+        ok, resp = brain_call(agent, ["ctb_handle", "handle_input", "chat", "chat_message", "chat_query"], input_data=user)
+        if not ok:
+            # try more permissive call signatures
+            try:
+                # some agents expect chat(text)
+                resp = agent.chat(user)
+                print("AI:", resp)
+            except Exception as e:
+                print("AI call failed:", resp)
+        else:
+            print("AI:", resp)
+# ---- Menus 2..9 simple wrappers that call brain functions if present ----
+def menu_search_kb(agent):
+    q = input("Enter search query: ").strip()
+    if not q: return
+    ok, res = brain_call(agent, ["search_facts", "facts_search", "query_facts"], q)
+    if ok:
+        print("Results:", res)
     else:
+        print("Search failed:", res)
+def menu_upload_media(agent):
+    path = input("Path to media file: ").strip()
+    if not path: return
+    payload, err = read_file_as_payload(path)
+    if err:
+        print("Error:", err); return
+    # submit via same upload command as chat
+    plan_results = {}
+    suffix = pathlib.Path(path).suffix.lower()
+    if suffix in (".png", ".jpg", ".jpeg", ".webp", ".bmp"):
+        plan_results["images"] = [{"path": payload["path"], "quality_score": 0.9}]
+    elif suffix in (".mp4", ".mov", ".mkv"):
+        plan_results["videos"] = [{"path": payload["path"], "quality_score": 0.8}]
+    elif suffix in (".mp3", ".wav"):
+        plan_results["audios"] = [{"path": payload["path"], "quality_score": 0.8}]
+    else:
+        plan_results["files"] = [{"path": payload["path"], "meta": payload["meta"]}]
+    ok, res = brain_call(agent, ["submit_plan_results", "handle_plan_results"], plan_id="manual_upload_"+str(int(time.time())), results=plan_results)
+    if ok:
+        print("Upload processed:", res)
+    else:
+        print("Upload failed:", res)
+def menu_backup_download(agent):
+    ok, p = brain_call(agent, ["download_latest_backup", "latest_backup", "get_latest_backup"])
+    if ok and p:
+        print("Latest backup:", p)
+    else:
+        print("No backup available or call failed:", p)
+def menu_view_vault(agent):
+    tag = input("Enter skill tag (or blank to list all): ").strip()
+    if tag:
+        ok, s = brain_call(agent, ["top_skills", "top_by_tag"], tag, 10)
+    else:
+        ok, s = brain_call(agent, ["list_skills", "get_skills"], )
+    if ok:
+        print("Skills:", s)
+    else:
+        print("Failed to retrieve skills:", s)
+def menu_sync_status(agent):
+    ok, st = brain_call(agent, ["global_sync_status", "sync_status", "get_sync_status"])
+    if ok:
+        print("Global Sync Status:", st)
+    else:
+        print("Global sync status not available:", st)
+def menu_dev_api(agent):
+    print("Developer API options:")
+    print(" 1) Add/Integrate module from file")
+    print(" 2) List modules")
+    choice = input("choice: ").strip()
+    if choice == "1":
+        path = input("Path to module (py or base64-wasm): ").strip()
+        payload, err = read_file_as_payload(path)
+        if err:
+            print("Error:", err); return
+        code = ""
+        try:
+            code = open(payload["path"], "rb").read().decode("utf-8")
+        except Exception:
+            import base64
+            code = base64.b64encode(open(payload["path"], "rb").read()).decode()
+        name = input("Module name (short): ").strip() or f"mod_{int(time.time())}"
+        ok, res = brain_call(agent, ["add_module", "integrate_module"], name, code, None)
+        print("Result:", res)
+    elif choice == "2":
+        ok, res = brain_call(agent, ["list_modules", "get_modules"])
+        print("Modules:", res if ok else "failed:"+str(res))
+    else:
+        print("cancel")
+def menu_offline_toggle(agent):
+    ok, st = brain_call(agent, ["toggle_offline", "set_offline", "offline_toggle"])
+    if ok:
+        print("Offline toggled:", st)
     else:
+        print("Offline toggle not available; try starting/stopping network in your environment.")
+# ---- Main loop ----
+def main():
+    boot_splash()
+    agent = load_brain()
+    if agent is None:
+        print("Brain not loaded. You can still use app UI, but brain-dependent actions will fail.")
+    while True:
+        show_menu()
+        choice = input("Select: ").strip()
+        if choice == "1":
+            multimodal_chat(agent)
+        elif choice == "2":
+            menu_search_kb(agent)
+        elif choice == "3":
+            menu_upload_media(agent)
+        elif choice == "4":
+            menu_backup_download(agent)
+        elif choice == "5":
+            menu_view_vault(agent)
+        elif choice == "6":
+            menu_sync_status(agent)
+        elif choice == "7":
+            menu_dev_api(agent)
+        elif choice == "8":
+            menu_offline_toggle(agent)
+        elif choice == "9":
+            print("Goodbye.")
+            break
         else:
+            print("Unknown option; try again.\n")
 if __name__ == "__main__":
+    main()