Robo-Beam

Running

File size: 40,580 Bytes

d48cdf4
 
 
 
 
b9f6a1d
d48cdf4
 
0ea9032
 
d48cdf4
 
 
 
 
 
 
507fad1
b9f6a1d
8fa1eef
b9f6a1d
c6c9a50
 
9dddfec
b9f6a1d
9dddfec
 
b9f6a1d
9dddfec
 
 
 
5510c43
0ea9032
5510c43
ac92569
5510c43
1e394d0
b9f6a1d
1e394d0
 
a12c96b
b9f6a1d
a12c96b
 
1e394d0
 
 
 
5510c43
b9f6a1d
5510c43
 
 
b9f6a1d
5510c43
 
 
d07df81
444ad96
5510c43
 
 
b9f6a1d
5510c43
444ad96
b9f6a1d
5510c43
d07df81
 
 
 
 
b9f6a1d
 
d07df81
b9f6a1d
9aac221
d07df81
 
b9f6a1d
d07df81
 
b9f6a1d
5510c43
d07df81
 
b9f6a1d
d07df81
 
 
b9f6a1d
d07df81
 
 
 
b9f6a1d
d07df81
 
 
5510c43
b9f6a1d
 
d07df81
b9f6a1d
d07df81
5510c43
b9f6a1d
444ad96
 
 
b9f6a1d
5510c43
444ad96
 
 
 
f7748ac
444ad96
b9f6a1d
444ad96
f7748ac
 
b9f6a1d
f7748ac
444ad96
d07df81
b9f6a1d
f7748ac
54917ae
b9f6a1d
f7748ac
 
 
b9f6a1d
f7748ac
d07df81
5510c43
 
 
 
 
ac92569
b9f6a1d
ac92569
42893c3
b9f6a1d
a85231d
6977531
d48cdf4
 
2b2c22c
 
 
b9f6a1d
d48cdf4
 
 
507fad1
ac92569
b9f6a1d
ac92569
8fa1eef
ac92569
b9f6a1d
ac92569
8fa1eef
 
5ff87dd
 
507fad1
8fa1eef
 
2b2c22c
8fa1eef
 
 
 
 
ac92569
b9f6a1d
ac92569
8fa1eef
 
507fad1
8fa1eef
 
2b2c22c
8fa1eef
 
 
 
c6c9a50
ac92569
b9f6a1d
ac92569
507fad1
c6c9a50
 
 
5ff87dd
 
 
507fad1
 
 
5ff87dd
 
 
 
 
c6c9a50
 
 
507fad1
 
 
 
 
 
c6c9a50
ac92569
b9f6a1d
ac92569
d48cdf4
 
 
 
 
 
5ff87dd
d48cdf4
 
 
 
 
 
 
 
 
 
5ff87dd
 
 
 
 
 
 
d48cdf4
 
 
 
8fa1eef
 
2b2c22c
8fa1eef
 
 
d48cdf4
 
 
77b4bdf
d48cdf4
 
 
 
 
 
 
 
 
 
 
 
 
5ff87dd
 
 
 
 
 
 
77b4bdf
d48cdf4
 
 
ac92569
b9f6a1d
ac92569
d48cdf4
 
 
 
ac92569
77f7fca
507fad1
d48cdf4
 
 
 
 
b9f6a1d
9dddfec
d48cdf4
 
 
5ff87dd
 
507fad1
d48cdf4
 
 
 
9dddfec
d48cdf4
b9f6a1d
9dddfec
d48cdf4
507fad1
 
d48cdf4
 
b9f6a1d
d48cdf4
 
9dddfec
 
d48cdf4
 
ac92569
b9f6a1d
ac92569
d48cdf4
 
 
 
5ff87dd
 
 
d48cdf4
5ff87dd
 
d48cdf4
 
 
c6c9a50
 
 
d48cdf4
 
 
ac92569
b9f6a1d
ac92569
5ff87dd
 
 
 
 
 
 
ac92569
 
 
 
 
 
5ff87dd
9dddfec
b9f6a1d
9dddfec
d48cdf4
9dddfec
d48cdf4
5ff87dd
 
8fa1eef
 
c6c9a50
77b4bdf
507fad1
77b4bdf
8fa1eef
 
 
77b4bdf
8fa1eef
 
 
d48cdf4
c6c9a50
507fad1
 
c6c9a50
77b4bdf
9dddfec
 
 
 
77b4bdf
5ff87dd
 
ac92569
1e394d0
9dddfec
c6c9a50
8fa1eef
 
77b4bdf
9dddfec
d48cdf4
5ff87dd
ac92569
b9f6a1d
ac92569
d48cdf4
 
507fad1
d48cdf4
 
 
 
 
 
 
 
 
 
5ff87dd
 
 
 
 
 
1e394d0
5ff87dd
 
 
d48cdf4
 
 
9dddfec
b9f6a1d
9dddfec
 
 
b9f6a1d
9dddfec
 
 
 
 
b9f6a1d
 
9dddfec
 
b9f6a1d
9dddfec
 
 
ac92569
b9f6a1d
ac92569
d48cdf4
5510c43
 
 
 
 
 
 
 
1e394d0
d48cdf4
 
 
 
b9f6a1d
9dddfec
5ff87dd
0ea9032
 
b9f6a1d
0ea9032
 
 
1e394d0
 
 
0ea9032
 
 
54917ae
1e394d0
0ea9032
5510c43
5ff87dd
0ea9032
 
 
 
 
 
5ff87dd
0ea9032
9dddfec
b9f6a1d
9dddfec
5ff87dd
 
 
 
 
 
 
 
 
 
 
 
9dddfec
b9f6a1d
9dddfec
 
 
 
 
5ff87dd
 
0d4c8dd
5ff87dd
 
 
0ea9032
6977531
5ff87dd
 
 
 
 
 
ac92569
5ff87dd
 
b9f6a1d
9dddfec
 
b9f6a1d
9dddfec
 
 
 
 
 
 
 
b9f6a1d
9dddfec
 
 
 
 
 
6977531
 
ac92569
b9f6a1d
ac92569
5510c43
a028900
 
 
 
 
5510c43
a028900
0e3a388
 
 
a028900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5510c43
a028900
 
5510c43
a028900
 
 
 
 
 
 
 
 
 
 
 
 
 
5510c43
a028900
5510c43
a028900
 
 
f76e5e4
 
a028900
 
f76e5e4
 
a028900
 
 
 
 
 
 
 
 
 
 
54917ae
a028900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a65c126
f76e5e4
fffa979
9c04458
 
 
 
 
 
 
 
 
 
fffa979
9c04458
 
 
 
 
 
 
 
 
 
 
fffa979
 
 
9c04458
 
 
 
a028900
 
 
a65c126
f76e5e4
a028900
 
 
f76e5e4
a028900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5510c43
b018faf
fffa979
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f2dd94
 
5510c43
a028900
 
 
ec6517e
a028900
 
b018faf
5510c43
0d4c8dd
54917ae
2a84822
5510c43
2a84822
 
b9f6a1d
2a84822
 
d48cdf4
b9f6a1d
2a84822
 
38facb1
b5c09f2
 
59d4592
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5c09f2
59d4592
 
2a84822
 
59d4592
 
2a84822
 
 
 
 
 
b9f6a1d
2a84822
 
 
 
 
b9f6a1d
 
2a84822
 
 
 
 
 
 
 
 
 
 
 
 
 
b9f6a1d
2a84822
 
 
 
 
 
 
5a98a93
2a84822
5f2dd94
2a84822
 
 
 
 
 
f937240
5510c43
a12c96b
5a98a93
cdd2c63
54917ae

#!/usr/bin/env python

import os
import re
import tempfile
import gc  # garbage collector
from collections.abc import Iterator
from threading import Thread
import json
import requests
import cv2
import gradio as gr
import spaces
import torch
from loguru import logger
from PIL import Image
from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer

# CSV/TXT analysis
import pandas as pd
# PDF text extraction
import PyPDF2

##############################################################################
# Memory cleanup function
##############################################################################
def clear_cuda_cache():
    """Clear CUDA cache explicitly."""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        gc.collect()

##############################################################################
# SERPHouse API key from environment variable
##############################################################################
SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")

##############################################################################
# Simple keyword extraction function
##############################################################################
def extract_keywords(text: str, top_k: int = 5) -> str:
    """
    Extract keywords from text
    """
    text = re.sub(r"[^a-zA-Z0-9가-힣\s]", "", text)
    tokens = text.split()
    key_tokens = tokens[:top_k]
    return " ".join(key_tokens)

##############################################################################
# SerpHouse Live endpoint call
##############################################################################
def do_web_search(query: str) -> str:
    """
    Return top 20 'organic' results as JSON string
    """
    try:
        url = "https://api.serphouse.com/serp/live"
        
        # 기본 GET 방식으로 파라미터 간소화하고 결과 수를 20개로 제한
        params = {
            "q": query,
            "domain": "google.com",
            "serp_type": "web",  # Basic web search
            "device": "desktop",
            "lang": "en",
            "num": "20"  # Request max 20 results
        }
        
        headers = {
            "Authorization": f"Bearer {SERPHOUSE_API_KEY}"
        }
        
        logger.info(f"SerpHouse API call... query: {query}")
        logger.info(f"Request URL: {url} - params: {params}")
        
        # GET request
        response = requests.get(url, headers=headers, params=params, timeout=60)
        response.raise_for_status()
        
        logger.info(f"SerpHouse API response status: {response.status_code}")
        data = response.json()
        
        # Handle various response structures
        results = data.get("results", {})
        organic = None
        
        # Possible response structure 1
        if isinstance(results, dict) and "organic" in results:
            organic = results["organic"]
        
        # Possible response structure 2 (nested results)
        elif isinstance(results, dict) and "results" in results:
            if isinstance(results["results"], dict) and "organic" in results["results"]:
                organic = results["results"]["organic"]
        
        # Possible response structure 3 (top-level organic)
        elif "organic" in data:
            organic = data["organic"]
            
        if not organic:
            logger.warning("No organic results found in response.")
            logger.debug(f"Response structure: {list(data.keys())}")
            if isinstance(results, dict):
                logger.debug(f"results structure: {list(results.keys())}")
            return "No web search results found or unexpected API response structure."

        # Limit results and optimize context length
        max_results = min(20, len(organic))
        limited_organic = organic[:max_results]
        
        # Format results for better readability
        summary_lines = []
        for idx, item in enumerate(limited_organic, start=1):
            title = item.get("title", "No title")
            link = item.get("link", "#")
            snippet = item.get("snippet", "No description")
            displayed_link = item.get("displayed_link", link)
            
            # Markdown format
            summary_lines.append(
                f"### Result {idx}: {title}\n\n"
                f"{snippet}\n\n"
                f"**Source**: [{displayed_link}]({link})\n\n"
                f"---\n"
            )
        
        # Add simple instructions for model
        instructions = """
# X-RAY Security Scanning Reference Results
Use this information to enhance your analysis.
"""
        
        search_results = instructions + "\n".join(summary_lines)
        logger.info(f"Processed {len(limited_organic)} search results")
        return search_results
    
    except Exception as e:
        logger.error(f"Web search failed: {e}")
        return f"Web search failed: {str(e)}"


##############################################################################
# Model/Processor loading
##############################################################################
MAX_CONTENT_CHARS = 2000
MAX_INPUT_LENGTH = 2096  # Max input token limit
model_id = os.getenv("MODEL_ID", "VIDraft/Gemma-3-R1984-4B")

processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
model = Gemma3ForConditionalGeneration.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    attn_implementation="eager"  # Change to "flash_attention_2" if available
)
MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))


##############################################################################
# CSV, TXT, PDF analysis functions
##############################################################################
def analyze_csv_file(path: str) -> str:
    """
    Convert CSV file to string. Truncate if too long.
    """
    try:
        df = pd.read_csv(path)
        if df.shape[0] > 50 or df.shape[1] > 10:
            df = df.iloc[:50, :10]
        df_str = df.to_string()
        if len(df_str) > MAX_CONTENT_CHARS:
            df_str = df_str[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
        return f"**[CSV File: {os.path.basename(path)}]**\n\n{df_str}"
    except Exception as e:
        return f"Failed to read CSV ({os.path.basename(path)}): {str(e)}"


def analyze_txt_file(path: str) -> str:
    """
    Read TXT file. Truncate if too long.
    """
    try:
        with open(path, "r", encoding="utf-8") as f:
            text = f.read()
        if len(text) > MAX_CONTENT_CHARS:
            text = text[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
        return f"**[TXT File: {os.path.basename(path)}]**\n\n{text}"
    except Exception as e:
        return f"Failed to read TXT ({os.path.basename(path)}): {str(e)}"


def pdf_to_markdown(pdf_path: str) -> str:
    """
    Convert PDF text to Markdown. Extract text by pages.
    """
    text_chunks = []
    try:
        with open(pdf_path, "rb") as f:
            reader = PyPDF2.PdfReader(f)
            max_pages = min(5, len(reader.pages))
            for page_num in range(max_pages):
                page = reader.pages[page_num]
                page_text = page.extract_text() or ""
                page_text = page_text.strip()
                if page_text:
                    if len(page_text) > MAX_CONTENT_CHARS // max_pages:
                        page_text = page_text[:MAX_CONTENT_CHARS // max_pages] + "...(truncated)"
                    text_chunks.append(f"## Page {page_num+1}\n\n{page_text}\n")
            if len(reader.pages) > max_pages:
                text_chunks.append(f"\n...(Showing {max_pages} of {len(reader.pages)} pages)...")
    except Exception as e:
        return f"Failed to read PDF ({os.path.basename(pdf_path)}): {str(e)}"

    full_text = "\n".join(text_chunks)
    if len(full_text) > MAX_CONTENT_CHARS:
        full_text = full_text[:MAX_CONTENT_CHARS] + "\n...(truncated)..."

    return f"**[PDF File: {os.path.basename(pdf_path)}]**\n\n{full_text}"


##############################################################################
# Image/Video upload limit check
##############################################################################
def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
    image_count = 0
    video_count = 0
    for path in paths:
        if path.endswith(".mp4"):
            video_count += 1
        elif re.search(r"\.(png|jpg|jpeg|gif|webp)$", path, re.IGNORECASE):
            image_count += 1
    return image_count, video_count


def count_files_in_history(history: list[dict]) -> tuple[int, int]:
    image_count = 0
    video_count = 0
    for item in history:
        if item["role"] != "user" or isinstance(item["content"], str):
            continue
        if isinstance(item["content"], list) and len(item["content"]) > 0:
            file_path = item["content"][0]
            if isinstance(file_path, str):
                if file_path.endswith(".mp4"):
                    video_count += 1
                elif re.search(r"\.(png|jpg|jpeg|gif|webp)$", file_path, re.IGNORECASE):
                    image_count += 1
    return image_count, video_count


def validate_media_constraints(message: dict, history: list[dict]) -> bool:
    media_files = []
    for f in message["files"]:
        if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE) or f.endswith(".mp4"):
            media_files.append(f)

    new_image_count, new_video_count = count_files_in_new_message(media_files)
    history_image_count, history_video_count = count_files_in_history(history)
    image_count = history_image_count + new_image_count
    video_count = history_video_count + new_video_count

    if video_count > 1:
        gr.Warning("Only one video is supported.")
        return False
    if video_count == 1:
        if image_count > 0:
            gr.Warning("Mixing images and videos is not allowed.")
            return False
        if "<image>" in message["text"]:
            gr.Warning("Using <image> tags with video files is not supported.")
            return False
    if video_count == 0 and image_count > MAX_NUM_IMAGES:
        gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
        return False
    
    if "<image>" in message["text"]:
        image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
        image_tag_count = message["text"].count("<image>")
        if image_tag_count != len(image_files):
            gr.Warning("The number of <image> tags in the text does not match the number of image files.")
            return False

    return True


##############################################################################
# Video processing - with temp file tracking
##############################################################################
def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
    vidcap = cv2.VideoCapture(video_path)
    fps = vidcap.get(cv2.CAP_PROP_FPS)
    total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_interval = max(int(fps), int(total_frames / 10))
    frames = []

    for i in range(0, total_frames, frame_interval):
        vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
        success, image = vidcap.read()
        if success:
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            # Resize image
            image = cv2.resize(image, (0, 0), fx=0.5, fy=0.5)
            pil_image = Image.fromarray(image)
            timestamp = round(i / fps, 2)
            frames.append((pil_image, timestamp))
            if len(frames) >= 5:
                break

    vidcap.release()
    return frames


def process_video(video_path: str) -> tuple[list[dict], list[str]]:
    content = []
    temp_files = []  # List for tracking temp files
    
    frames = downsample_video(video_path)
    for frame in frames:
        pil_image, timestamp = frame
        with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
            pil_image.save(temp_file.name)
            temp_files.append(temp_file.name)  # Track for deletion later
            content.append({"type": "text", "text": f"Frame {timestamp}:"})
            content.append({"type": "image", "url": temp_file.name})
    
    return content, temp_files


##############################################################################
# interleaved <image> processing
##############################################################################
def process_interleaved_images(message: dict) -> list[dict]:
    parts = re.split(r"(<image>)", message["text"])
    content = []
    image_index = 0
    
    image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
    
    for part in parts:
        if part == "<image>" and image_index < len(image_files):
            content.append({"type": "image", "url": image_files[image_index]})
            image_index += 1
        elif part.strip():
            content.append({"type": "text", "text": part.strip()})
        else:
            if isinstance(part, str) and part != "<image>":
                content.append({"type": "text", "text": part})
    return content


##############################################################################
# PDF + CSV + TXT + Image/Video
##############################################################################
def is_image_file(file_path: str) -> bool:
    return bool(re.search(r"\.(png|jpg|jpeg|gif|webp)$", file_path, re.IGNORECASE))

def is_video_file(file_path: str) -> bool:
    return file_path.endswith(".mp4")

def is_document_file(file_path: str) -> bool:
    return (
        file_path.lower().endswith(".pdf")
        or file_path.lower().endswith(".csv")
        or file_path.lower().endswith(".txt")
    )


def process_new_user_message(message: dict) -> tuple[list[dict], list[str]]:
    temp_files = []  # List for tracking temp files
    
    if not message["files"]:
        return [{"type": "text", "text": message["text"]}], temp_files

    video_files = [f for f in message["files"] if is_video_file(f)]
    image_files = [f for f in message["files"] if is_image_file(f)]
    csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
    txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
    pdf_files = [f for f in message["files"] if f.lower().endswith(".pdf")]

    content_list = [{"type": "text", "text": message["text"]}]

    for csv_path in csv_files:
        csv_analysis = analyze_csv_file(csv_path)
        content_list.append({"type": "text", "text": csv_analysis})

    for txt_path in txt_files:
        txt_analysis = analyze_txt_file(txt_path)
        content_list.append({"type": "text", "text": txt_analysis})

    for pdf_path in pdf_files:
        pdf_markdown = pdf_to_markdown(pdf_path)
        content_list.append({"type": "text", "text": pdf_markdown})

    if video_files:
        video_content, video_temp_files = process_video(video_files[0])
        content_list += video_content
        temp_files.extend(video_temp_files)
        return content_list, temp_files

    if "<image>" in message["text"] and image_files:
        interleaved_content = process_interleaved_images({"text": message["text"], "files": image_files})
        if content_list and content_list[0]["type"] == "text":
            content_list = content_list[1:]
        return interleaved_content + content_list, temp_files
    else:
        for img_path in image_files:
            content_list.append({"type": "image", "url": img_path})

    return content_list, temp_files


##############################################################################
# history -> LLM message conversion
##############################################################################
def process_history(history: list[dict]) -> list[dict]:
    messages = []
    current_user_content: list[dict] = []
    for item in history:
        if item["role"] == "assistant":
            if current_user_content:
                messages.append({"role": "user", "content": current_user_content})
                current_user_content = []
            messages.append({"role": "assistant", "content": [{"type": "text", "text": item["content"]}]})
        else:
            content = item["content"]
            if isinstance(content, str):
                current_user_content.append({"type": "text", "text": content})
            elif isinstance(content, list) and len(content) > 0:
                file_path = content[0]
                if is_image_file(file_path):
                    current_user_content.append({"type": "image", "url": file_path})
                else:
                    current_user_content.append({"type": "text", "text": f"[File: {os.path.basename(file_path)}]"})

    if current_user_content:
        messages.append({"role": "user", "content": current_user_content})
        
    return messages


##############################################################################
# Model generation function with OOM catch
##############################################################################
def _model_gen_with_oom_catch(**kwargs):
    """
    Catch OutOfMemoryError in separate thread
    """
    try:
        model.generate(**kwargs)
    except torch.cuda.OutOfMemoryError:
        raise RuntimeError(
            "[OutOfMemoryError] GPU memory insufficient. "
            "Please reduce Max New Tokens or prompt length."
        )
    finally:
        # Clear cache after generation
        clear_cuda_cache()


##############################################################################
# Main inference function (with auto web search)
##############################################################################
@spaces.GPU(duration=120)
def run(
    message: dict,
    history: list[dict],
    system_prompt: str = "",
    max_new_tokens: int = 512,
    use_web_search: bool = False,
    web_search_query: str = "",
) -> Iterator[str]:

    if not validate_media_constraints(message, history):
        yield ""
        return

    temp_files = []  # For tracking temp files
    
    try:
        combined_system_msg = ""

        # Used internally only (hidden from UI)
        if system_prompt.strip():
            combined_system_msg += f"[System Prompt]\n{system_prompt.strip()}\n\n"

        if use_web_search:
            user_text = message["text"]
            ws_query = extract_keywords(user_text, top_k=5)
            if ws_query.strip():
                logger.info(f"[Auto WebSearch Keyword] {ws_query!r}")
                ws_result = do_web_search(ws_query)
                combined_system_msg += f"[X-RAY Security Reference Data]\n{ws_result}\n\n"
            else:
                combined_system_msg += "[No valid keywords found, skipping WebSearch]\n\n"

        messages = []
        if combined_system_msg.strip():
            messages.append({
                "role": "system",
                "content": [{"type": "text", "text": combined_system_msg.strip()}],
            })

        messages.extend(process_history(history))

        user_content, user_temp_files = process_new_user_message(message)
        temp_files.extend(user_temp_files)  # Track temp files
        
        for item in user_content:
            if item["type"] == "text" and len(item["text"]) > MAX_CONTENT_CHARS:
                item["text"] = item["text"][:MAX_CONTENT_CHARS] + "\n...(truncated)..."
        messages.append({"role": "user", "content": user_content})

        inputs = processor.apply_chat_template(
            messages,
            add_generation_prompt=True,
            tokenize=True,
            return_dict=True,
            return_tensors="pt",
        ).to(device=model.device, dtype=torch.bfloat16)
        
        # Limit input token count
        if inputs.input_ids.shape[1] > MAX_INPUT_LENGTH:
            inputs.input_ids = inputs.input_ids[:, -MAX_INPUT_LENGTH:]
            if 'attention_mask' in inputs:
                inputs.attention_mask = inputs.attention_mask[:, -MAX_INPUT_LENGTH:]
        
        streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
        gen_kwargs = dict(
            inputs,
            streamer=streamer,
            max_new_tokens=max_new_tokens,
        )

        t = Thread(target=_model_gen_with_oom_catch, kwargs=gen_kwargs)
        t.start()

        output = ""
        for new_text in streamer:
            output += new_text
            yield output

    except Exception as e:
        logger.error(f"Error in run: {str(e)}")
        yield f"Error occurred: {str(e)}"
    
    finally:
        # Delete temp files
        for temp_file in temp_files:
            try:
                if os.path.exists(temp_file):
                    os.unlink(temp_file)
                    logger.info(f"Deleted temp file: {temp_file}")
            except Exception as e:
                logger.warning(f"Failed to delete temp file {temp_file}: {e}")
        
        # Explicit memory cleanup
        try:
            del inputs, streamer
        except:
            pass
        
        clear_cuda_cache()


##############################################################################
# Gradio UI (Blocks) 구성
##############################################################################
css = """
/* Global Styles */
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');

* {
    box-sizing: border-box;
}

body {
    margin: 0;
    padding: 0;
    font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
    min-height: 100vh;
    color: #2d3748;
}

/* Container Styling */
.gradio-container {
    background: rgba(255, 255, 255, 0.95);
    backdrop-filter: blur(20px);
    border-radius: 24px;
    padding: 40px;
    margin: 30px auto;
    width: 95% !important;
    max-width: 1400px !important;
    box-shadow: 
        0 25px 50px -12px rgba(0, 0, 0, 0.25),
        0 0 0 1px rgba(255, 255, 255, 0.05);
    border: 1px solid rgba(255, 255, 255, 0.2);
}

/* Header Styling */
.header-container {
    text-align: center;
    margin-bottom: 2rem;
    padding: 2rem 0;
    background: linear-gradient(135deg, #f093fb 0%, #f5576c 50%, #4facfe 100%);
    background-clip: text;
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
}

/* Button Styling */
button, .btn {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
    border: none !important;
    color: white !important;
    padding: 12px 28px !important;
    border-radius: 12px !important;
    font-weight: 600 !important;
    font-size: 14px !important;
    text-transform: none !important;
    letter-spacing: 0.5px !important;
    cursor: pointer !important;
    transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
    box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important;
    position: relative !important;
    overflow: hidden !important;
}

button:hover, .btn:hover {
    transform: translateY(-2px) !important;
    box-shadow: 0 8px 25px rgba(102, 126, 234, 0.6) !important;
    background: linear-gradient(135deg, #764ba2 0%, #667eea 100%) !important;
}

button:active, .btn:active {
    transform: translateY(0) !important;
}

/* Primary Action Button */
button[variant="primary"], .primary-btn {
    background: linear-gradient(135deg, #ff6b6b 0%, #ee5a52 100%) !important;
    box-shadow: 0 4px 15px rgba(255, 107, 107, 0.4) !important;
}

button[variant="primary"]:hover, .primary-btn:hover {
    box-shadow: 0 8px 25px rgba(255, 107, 107, 0.6) !important;
}

/* Input Fields */
.multimodal-textbox, textarea, input {
    background: rgba(255, 255, 255, 0.8) !important;
    backdrop-filter: blur(10px) !important;
    border: 2px solid rgba(102, 126, 234, 0.2) !important;
    border-radius: 16px !important;
    color: #2d3748 !important;
    font-family: 'Inter', sans-serif !important;
    padding: 16px 20px !important;
    transition: all 0.3s ease !important;
    box-shadow: 0 4px 20px rgba(0, 0, 0, 0.1) !important;
}

.multimodal-textbox:focus, textarea:focus, input:focus {
    border-color: #667eea !important;
    box-shadow: 0 0 0 4px rgba(102, 126, 234, 0.1), 0 8px 30px rgba(0, 0, 0, 0.15) !important;
    outline: none !important;
    background: rgba(255, 255, 255, 0.95) !important;
}

/* Chat Interface */
.chatbox, .chatbot {
    background: rgba(255, 255, 255, 0.6) !important;
    backdrop-filter: blur(15px) !important;
    border-radius: 20px !important;
    border: 1px solid rgba(255, 255, 255, 0.3) !important;
    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1) !important;
    padding: 24px !important;
}

.message {
    background: rgba(255, 255, 255, 0.9) !important;
    border-radius: 16px !important;
    padding: 16px 20px !important;
    margin: 8px 0 !important;
    border: 1px solid rgba(102, 126, 234, 0.1) !important;
    box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05) !important;
    transition: all 0.3s ease !important;
}

.message:hover {
    transform: translateY(-1px) !important;
    box-shadow: 0 4px 16px rgba(0, 0, 0, 0.1) !important;
}

/* Assistant Message Styling */
.message.assistant {
    background: linear-gradient(135deg, rgba(102, 126, 234, 0.1) 0%, rgba(118, 75, 162, 0.1) 100%) !important;
    border-left: 4px solid #667eea !important;
}

/* User Message Styling */
.message.user {
    background: linear-gradient(135deg, rgba(255, 107, 107, 0.1) 0%, rgba(238, 90, 82, 0.1) 100%) !important;
    border-left: 4px solid #ff6b6b !important;
}

/* Cards and Panels */
.card, .panel {
    background: rgba(255, 255, 255, 0.8) !important;
    backdrop-filter: blur(15px) !important;
    border-radius: 20px !important;
    padding: 24px !important;
    border: 1px solid rgba(255, 255, 255, 0.3) !important;
    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1) !important;
    transition: all 0.3s ease !important;
}

.card:hover, .panel:hover {
    transform: translateY(-4px) !important;
    box-shadow: 0 16px 40px rgba(0, 0, 0, 0.15) !important;
}

/* Checkbox Styling */
input[type="checkbox"] {
    appearance: none !important;
    width: 20px !important;
    height: 20px !important;
    border: 2px solid #667eea !important;
    border-radius: 6px !important;
    background: rgba(255, 255, 255, 0.8) !important;
    cursor: pointer !important;
    transition: all 0.3s ease !important;
    position: relative !important;
}

input[type="checkbox"]:checked {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
    border-color: #667eea !important;
}

input[type="checkbox"]:checked::after {
    content: "✓" !important;
    color: white !important;
    font-size: 14px !important;
    font-weight: bold !important;
    position: absolute !important;
    top: 50% !important;
    left: 50% !important;
    transform: translate(-50%, -50%) !important;
}

/* Progress Indicators */
.progress {
    background: linear-gradient(90deg, #667eea 0%, #764ba2 100%) !important;
    border-radius: 10px !important;
    height: 8px !important;
}

/* Tooltips */
.tooltip {
    background: rgba(45, 55, 72, 0.95) !important;
    backdrop-filter: blur(10px) !important;
    color: white !important;
    border-radius: 8px !important;
    padding: 8px 12px !important;
    font-size: 12px !important;
    box-shadow: 0 4px 20px rgba(0, 0, 0, 0.3) !important;
}

/* Slider Styling */
input[type="range"] {
    appearance: none !important;
    height: 8px !important;
    border-radius: 4px !important;
    background: linear-gradient(90deg, #e2e8f0 0%, #667eea 100%) !important;
    outline: none !important;
}

input[type="range"]::-webkit-slider-thumb {
    appearance: none !important;
    width: 20px !important;
    height: 20px !important;
    border-radius: 50% !important;
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
    cursor: pointer !important;
    box-shadow: 0 2px 8px rgba(102, 126, 234, 0.4) !important;
}

/* File Upload Area */
.file-upload {
    border: 2px dashed #667eea !important;
    border-radius: 16px !important;
    background: rgba(102, 126, 234, 0.05) !important;
    padding: 40px !important;
    text-align: center !important;
    transition: all 0.3s ease !important;
}

.file-upload:hover {
    border-color: #764ba2 !important;
    background: rgba(102, 126, 234, 0.1) !important;
    transform: scale(1.02) !important;
}

/* Animations */
@keyframes fadeInUp {
    from {
        opacity: 0;
        transform: translateY(30px);
    }
    to {
        opacity: 1;
        transform: translateY(0);
    }
}

@keyframes slideIn {
    from {
        opacity: 0;
        transform: translateX(-20px);
    }
    to {
        opacity: 1;
        transform: translateX(0);
    }
}

.animate-fade-in {
    animation: fadeInUp 0.6s ease-out !important;
}

.animate-slide-in {
    animation: slideIn 0.4s ease-out !important;
}

/* Responsive Design */
@media (max-width: 768px) {
    .gradio-container {
        margin: 15px !important;
        padding: 24px !important;
        width: calc(100% - 30px) !important;
    }
    
    button, .btn {
        padding: 10px 20px !important;
        font-size: 13px !important;
    }
}

/* Dark Mode Support */
@media (prefers-color-scheme: dark) {
    .gradio-container {
        background: rgba(26, 32, 44, 0.95) !important;
        color: #e2e8f0 !important;
    }
    
    .message {
        background: rgba(45, 55, 72, 0.8) !important;
        color: #e2e8f0 !important;
    }
}

/* Hide Footer - Safe and Specific Selectors */
footer {
    visibility: hidden !important;
    display: none !important;
}

.footer {
    visibility: hidden !important;
    display: none !important;
}

/* Hide only Gradio attribution footer specifically */
footer[class*="svelte"] {
    visibility: hidden !important;
    display: none !important;
}

/* Hide Gradio attribution links */
a[href*="gradio.app"] {
    visibility: hidden !important;
    display: none !important;
}

/* More specific footer hiding for Gradio */
.gradio-container footer,
.gradio-container .footer {
    visibility: hidden !important;
    display: none !important;
}

/* Custom Scrollbar */
::-webkit-scrollbar {
    width: 8px !important;
}

::-webkit-scrollbar-track {
    background: rgba(226, 232, 240, 0.3) !important;
    border-radius: 4px !important;
}

::-webkit-scrollbar-thumb {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
    border-radius: 4px !important;
}

::-webkit-scrollbar-thumb:hover {
    background: linear-gradient(135deg, #764ba2 0%, #667eea 100%) !important;
}
"""

title_html = """
<div align="center" style="margin-bottom: 2em; padding: 2rem 0;" class="animate-fade-in">
    <div style="
        background: linear-gradient(135deg, #667eea 0%, #764ba2 50%, #f093fb 100%);
        background-clip: text;
        -webkit-background-clip: text;
        -webkit-text-fill-color: transparent;
        margin-bottom: 1rem;
    ">
        <h1 style="
            margin: 0; 
            font-size: 3.5em; 
            font-weight: 700; 
            letter-spacing: -0.02em;
            text-shadow: 0 4px 20px rgba(102, 126, 234, 0.3);
        ">
            🤖 Robo Beam-Search
        </h1>
    </div>
    
    <div style="
        background: rgba(255, 255, 255, 0.9);
        backdrop-filter: blur(15px);
        border-radius: 16px;
        padding: 1.5rem 2rem;
        margin: 1rem auto;
        max-width: 700px;
        border: 1px solid rgba(102, 126, 234, 0.2);
        box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1);
    ">
        <p style="
            margin: 0.5em 0; 
            font-size: 1.1em; 
            color: #4a5568; 
            font-weight: 500;
        ">
            <span style="
                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                background-clip: text;
                -webkit-background-clip: text;
                -webkit-text-fill-color: transparent;
                font-weight: 600;
            ">Base LLM:</span> VIDraft/Gemma-3-R1984-4B
        </p>
        <p style="
            margin: 1em 0 0 0; 
            font-size: 1em; 
            color: #718096; 
            line-height: 1.6;
            font-weight: 400;
        ">
            비파괴 X-RAY 검사/조사 이미지에 대한 위험 요소 식별/분석 기반 대화형 온프레미스 AI 플랫폼
        </p>
    </div>
    
    <div style="
        display: flex;
        justify-content: center;
        gap: 1rem;
        margin-top: 2rem;
        flex-wrap: wrap;
    ">
        <div style="
            background: rgba(102, 126, 234, 0.1);
            border: 1px solid rgba(102, 126, 234, 0.3);
            border-radius: 12px;
            padding: 0.5rem 1rem;
            font-size: 0.9em;
            color: #667eea;
            font-weight: 500;
        ">
            🔍 X-RAY 분석
        </div>
        <div style="
            background: rgba(118, 75, 162, 0.1);
            border: 1px solid rgba(118, 75, 162, 0.3);
            border-radius: 12px;
            padding: 0.5rem 1rem;
            font-size: 0.9em;
            color: #764ba2;
            font-weight: 500;
        ">
            🛡️ 보안 스캐닝
        </div>
        <div style="
            background: rgba(240, 147, 251, 0.1);
            border: 1px solid rgba(240, 147, 251, 0.3);
            border-radius: 12px;
            padding: 0.5rem 1rem;
            font-size: 0.9em;
            color: #f093fb;
            font-weight: 500;
        ">
            🌐 웹 검색
        </div>
    </div>
</div>
"""

title_html = """
<div align="center" style="margin-bottom: 2em; padding: 2rem 0;" class="animate-fade-in">
    <div style="
        background: linear-gradient(135deg, #667eea 0%, #764ba2 50%, #f093fb 100%);
        background-clip: text;
        -webkit-background-clip: text;
        -webkit-text-fill-color: transparent;
        margin-bottom: 1rem;
    ">
        <h1 style="
            margin: 0; 
            font-size: 3.5em; 
            font-weight: 700; 
            letter-spacing: -0.02em;
            text-shadow: 0 4px 20px rgba(102, 126, 234, 0.3);
        ">
            🤖 Robo Beam-Search
        </h1>
    </div>
    
    <div style="
        background: rgba(255, 255, 255, 0.9);
        backdrop-filter: blur(15px);
        border-radius: 16px;
        padding: 1.5rem 2rem;
        margin: 1rem auto;
        max-width: 700px;
        border: 1px solid rgba(102, 126, 234, 0.2);
        box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1);
    ">
        <p style="
            margin: 0.5em 0; 
            font-size: 1.1em; 
            color: #4a5568; 
            font-weight: 500;
        ">
            <span style="
                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                background-clip: text;
                -webkit-background-clip: text;
                -webkit-text-fill-color: transparent;
                font-weight: 600;
            ">Base LLM:</span> VIDraft/Gemma-3-R1984-4B
        </p>
        <p style="
            margin: 1em 0 0 0; 
            font-size: 1em; 
            color: #718096; 
            line-height: 1.6;
            font-weight: 400;
        ">
            비파괴 X-RAY 검사/조사 이미지에 대한 위험 요소 식별/분석 기반 대화형 온프레미스 AI 플랫폼
        </p>
    </div>
    
    <div style="
        display: flex;
        justify-content: center;
        gap: 1rem;
        margin-top: 2rem;
        flex-wrap: wrap;
    ">
        <div style="
            background: rgba(102, 126, 234, 0.1);
            border: 1px solid rgba(102, 126, 234, 0.3);
            border-radius: 12px;
            padding: 0.5rem 1rem;
            font-size: 0.9em;
            color: #667eea;
            font-weight: 500;
        ">
            🔍 X-RAY 분석
        </div>
        <div style="
            background: rgba(118, 75, 162, 0.1);
            border: 1px solid rgba(118, 75, 162, 0.3);
            border-radius: 12px;
            padding: 0.5rem 1rem;
            font-size: 0.9em;
            color: #764ba2;
            font-weight: 500;
        ">
            🛡️ 보안 스캐닝
        </div>
        <div style="
            background: rgba(240, 147, 251, 0.1);
            border: 1px solid rgba(240, 147, 251, 0.3);
            border-radius: 12px;
            padding: 0.5rem 1rem;
            font-size: 0.9em;
            color: #f093fb;
            font-weight: 500;
        ">
            🌐 웹 검색
        </div>
    </div>
</div>
"""



title_html = """
<div align="center" style="margin-bottom: 1em;">
    <h1 style="margin-bottom: 0.2em; font-size: 1.8em; color: #333;">🤖 Robo Beam-Search</h1>
    <p style="margin: 0.5em 0; font-size: 0.9em; color: #888; max-width: 600px; margin-left: auto; margin-right: auto;">
        비파괴 X-RAY 검사/조사 이미지에 대한 위험 요소 식별/분석 기반 대화형 온프레미스 AI 플랫폼 <strong>Base LLM:</strong> Gemma-3-R1984-4B / 12B/ 27B @Powered by VIDraft 
    </p>
</div>
"""


with gr.Blocks(css=css, title="Gemma-3-R1984-4B-BEAM - X-RAY Security Scanner") as demo:
    gr.Markdown(title_html)

    # Display the web search option (while the system prompt and token slider remain hidden)
    web_search_checkbox = gr.Checkbox(
        label="Deep Research",
        value=False
    )

    # X-RAY security scanning system prompt
    system_prompt_box = gr.Textbox(
        lines=3,
        value="""반드시 한글로 답변하라. 당신은 위협 탐지와 항공 보안에 특화된 첨단 X-RAY 보안 스캐닝 AI입니다. 당신의 주 임무는 X-RAY 이미지에서 모든 잠재적 보안 위협을 최상의 정확도로 식별하는 것입니다.

    중요: 보고서에 날짜, 시간, 또는 현재 일시를 절대 포함하지 마십시오.
    
    탐지 우선순위:
    1. **무기**: 화기(권총, 소총 등), 칼·날붙이·예리한 물체, 호신용·격투 무기
    2. **폭발물**: 폭탄, 기폭장치, 폭발성 물질, 의심스러운 전자 장치, 배터리가 연결된 전선
    3. **반입 금지 물품**: 가위, 대용량 배터리, 스프링(무기 부품 가능), 공구류
    4. **액체**: 100 ml 이상 용기에 담긴 모든 액체(화학 위협 가능)
    5. **EOD 구성품**: 폭발물로 조립될 수 있는 모든 부품
    
    분석 프로토콜:
    - 좌상단에서 우하단으로 체계적으로 스캔
    - 위협 위치를 격자 기준으로 보고(예: “좌상단 사분면”)
    - 위협 심각도 분류  
      - **HIGH** : 즉각적 위험  
      - **MEDIUM** : 반입 금지  
      - **LOW** : 추가 검사 필요
    - 전문 보안 용어 사용
    - 각 위협 항목별 권장 조치 제시
    - 보고서에는 분석 결과만 포함하고 날짜/시간 정보는 포함하지 않음
    
    ⚠️ 중대한 사항: 잠재적 위협을 절대 놓치지 마십시오. 의심스러울 경우 반드시 수동 검사를 요청하십시오.""",
        visible=False  # hidden from view
    )


    
    max_tokens_slider = gr.Slider(
        label="Max New Tokens",
        minimum=100,
        maximum=8000,
        step=50,
        value=1000,
        visible=False  # hidden from view
    )
    
    web_search_text = gr.Textbox(
        lines=1,
        label="Web Search Query",
        placeholder="",
        visible=False  # hidden from view
    )
    
    # Configure the chat interface
    chat = gr.ChatInterface(
        fn=run,
        type="messages",
        chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
        textbox=gr.MultimodalTextbox(
            file_types=[
                ".webp", ".png", ".jpg", ".jpeg", ".gif",
                ".mp4", ".csv", ".txt", ".pdf"
            ],
            file_count="multiple",
            autofocus=True
        ),
        multimodal=True,
        additional_inputs=[
            system_prompt_box,
            max_tokens_slider,
            web_search_checkbox,
            web_search_text,
        ],
        stop_btn=False,

        run_examples_on_click=False,
        cache_examples=False,
        css_paths=None,
        delete_cache=(1800, 1800),
    )




if __name__ == "__main__":
    # Run locally
    demo.launch()