Robo-Beam / app.py
openfree's picture
Update app.py
ec6517e verified
raw
history blame
40.6 kB
#!/usr/bin/env python
import os
import re
import tempfile
import gc # garbage collector
from collections.abc import Iterator
from threading import Thread
import json
import requests
import cv2
import gradio as gr
import spaces
import torch
from loguru import logger
from PIL import Image
from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer
# CSV/TXT analysis
import pandas as pd
# PDF text extraction
import PyPDF2
##############################################################################
# Memory cleanup function
##############################################################################
def clear_cuda_cache():
"""Clear CUDA cache explicitly."""
if torch.cuda.is_available():
torch.cuda.empty_cache()
gc.collect()
##############################################################################
# SERPHouse API key from environment variable
##############################################################################
SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")
##############################################################################
# Simple keyword extraction function
##############################################################################
def extract_keywords(text: str, top_k: int = 5) -> str:
"""
Extract keywords from text
"""
text = re.sub(r"[^a-zA-Z0-9๊ฐ€-ํžฃ\s]", "", text)
tokens = text.split()
key_tokens = tokens[:top_k]
return " ".join(key_tokens)
##############################################################################
# SerpHouse Live endpoint call
##############################################################################
def do_web_search(query: str) -> str:
"""
Return top 20 'organic' results as JSON string
"""
try:
url = "https://api.serphouse.com/serp/live"
# ๊ธฐ๋ณธ GET ๋ฐฉ์‹์œผ๋กœ ํŒŒ๋ผ๋ฏธํ„ฐ ๊ฐ„์†Œํ™”ํ•˜๊ณ  ๊ฒฐ๊ณผ ์ˆ˜๋ฅผ 20๊ฐœ๋กœ ์ œํ•œ
params = {
"q": query,
"domain": "google.com",
"serp_type": "web", # Basic web search
"device": "desktop",
"lang": "en",
"num": "20" # Request max 20 results
}
headers = {
"Authorization": f"Bearer {SERPHOUSE_API_KEY}"
}
logger.info(f"SerpHouse API call... query: {query}")
logger.info(f"Request URL: {url} - params: {params}")
# GET request
response = requests.get(url, headers=headers, params=params, timeout=60)
response.raise_for_status()
logger.info(f"SerpHouse API response status: {response.status_code}")
data = response.json()
# Handle various response structures
results = data.get("results", {})
organic = None
# Possible response structure 1
if isinstance(results, dict) and "organic" in results:
organic = results["organic"]
# Possible response structure 2 (nested results)
elif isinstance(results, dict) and "results" in results:
if isinstance(results["results"], dict) and "organic" in results["results"]:
organic = results["results"]["organic"]
# Possible response structure 3 (top-level organic)
elif "organic" in data:
organic = data["organic"]
if not organic:
logger.warning("No organic results found in response.")
logger.debug(f"Response structure: {list(data.keys())}")
if isinstance(results, dict):
logger.debug(f"results structure: {list(results.keys())}")
return "No web search results found or unexpected API response structure."
# Limit results and optimize context length
max_results = min(20, len(organic))
limited_organic = organic[:max_results]
# Format results for better readability
summary_lines = []
for idx, item in enumerate(limited_organic, start=1):
title = item.get("title", "No title")
link = item.get("link", "#")
snippet = item.get("snippet", "No description")
displayed_link = item.get("displayed_link", link)
# Markdown format
summary_lines.append(
f"### Result {idx}: {title}\n\n"
f"{snippet}\n\n"
f"**Source**: [{displayed_link}]({link})\n\n"
f"---\n"
)
# Add simple instructions for model
instructions = """
# X-RAY Security Scanning Reference Results
Use this information to enhance your analysis.
"""
search_results = instructions + "\n".join(summary_lines)
logger.info(f"Processed {len(limited_organic)} search results")
return search_results
except Exception as e:
logger.error(f"Web search failed: {e}")
return f"Web search failed: {str(e)}"
##############################################################################
# Model/Processor loading
##############################################################################
MAX_CONTENT_CHARS = 2000
MAX_INPUT_LENGTH = 2096 # Max input token limit
model_id = os.getenv("MODEL_ID", "VIDraft/Gemma-3-R1984-4B")
processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
model = Gemma3ForConditionalGeneration.from_pretrained(
model_id,
device_map="auto",
torch_dtype=torch.bfloat16,
attn_implementation="eager" # Change to "flash_attention_2" if available
)
MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
##############################################################################
# CSV, TXT, PDF analysis functions
##############################################################################
def analyze_csv_file(path: str) -> str:
"""
Convert CSV file to string. Truncate if too long.
"""
try:
df = pd.read_csv(path)
if df.shape[0] > 50 or df.shape[1] > 10:
df = df.iloc[:50, :10]
df_str = df.to_string()
if len(df_str) > MAX_CONTENT_CHARS:
df_str = df_str[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
return f"**[CSV File: {os.path.basename(path)}]**\n\n{df_str}"
except Exception as e:
return f"Failed to read CSV ({os.path.basename(path)}): {str(e)}"
def analyze_txt_file(path: str) -> str:
"""
Read TXT file. Truncate if too long.
"""
try:
with open(path, "r", encoding="utf-8") as f:
text = f.read()
if len(text) > MAX_CONTENT_CHARS:
text = text[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
return f"**[TXT File: {os.path.basename(path)}]**\n\n{text}"
except Exception as e:
return f"Failed to read TXT ({os.path.basename(path)}): {str(e)}"
def pdf_to_markdown(pdf_path: str) -> str:
"""
Convert PDF text to Markdown. Extract text by pages.
"""
text_chunks = []
try:
with open(pdf_path, "rb") as f:
reader = PyPDF2.PdfReader(f)
max_pages = min(5, len(reader.pages))
for page_num in range(max_pages):
page = reader.pages[page_num]
page_text = page.extract_text() or ""
page_text = page_text.strip()
if page_text:
if len(page_text) > MAX_CONTENT_CHARS // max_pages:
page_text = page_text[:MAX_CONTENT_CHARS // max_pages] + "...(truncated)"
text_chunks.append(f"## Page {page_num+1}\n\n{page_text}\n")
if len(reader.pages) > max_pages:
text_chunks.append(f"\n...(Showing {max_pages} of {len(reader.pages)} pages)...")
except Exception as e:
return f"Failed to read PDF ({os.path.basename(pdf_path)}): {str(e)}"
full_text = "\n".join(text_chunks)
if len(full_text) > MAX_CONTENT_CHARS:
full_text = full_text[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
return f"**[PDF File: {os.path.basename(pdf_path)}]**\n\n{full_text}"
##############################################################################
# Image/Video upload limit check
##############################################################################
def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
image_count = 0
video_count = 0
for path in paths:
if path.endswith(".mp4"):
video_count += 1
elif re.search(r"\.(png|jpg|jpeg|gif|webp)$", path, re.IGNORECASE):
image_count += 1
return image_count, video_count
def count_files_in_history(history: list[dict]) -> tuple[int, int]:
image_count = 0
video_count = 0
for item in history:
if item["role"] != "user" or isinstance(item["content"], str):
continue
if isinstance(item["content"], list) and len(item["content"]) > 0:
file_path = item["content"][0]
if isinstance(file_path, str):
if file_path.endswith(".mp4"):
video_count += 1
elif re.search(r"\.(png|jpg|jpeg|gif|webp)$", file_path, re.IGNORECASE):
image_count += 1
return image_count, video_count
def validate_media_constraints(message: dict, history: list[dict]) -> bool:
media_files = []
for f in message["files"]:
if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE) or f.endswith(".mp4"):
media_files.append(f)
new_image_count, new_video_count = count_files_in_new_message(media_files)
history_image_count, history_video_count = count_files_in_history(history)
image_count = history_image_count + new_image_count
video_count = history_video_count + new_video_count
if video_count > 1:
gr.Warning("Only one video is supported.")
return False
if video_count == 1:
if image_count > 0:
gr.Warning("Mixing images and videos is not allowed.")
return False
if "<image>" in message["text"]:
gr.Warning("Using <image> tags with video files is not supported.")
return False
if video_count == 0 and image_count > MAX_NUM_IMAGES:
gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
return False
if "<image>" in message["text"]:
image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
image_tag_count = message["text"].count("<image>")
if image_tag_count != len(image_files):
gr.Warning("The number of <image> tags in the text does not match the number of image files.")
return False
return True
##############################################################################
# Video processing - with temp file tracking
##############################################################################
def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
vidcap = cv2.VideoCapture(video_path)
fps = vidcap.get(cv2.CAP_PROP_FPS)
total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
frame_interval = max(int(fps), int(total_frames / 10))
frames = []
for i in range(0, total_frames, frame_interval):
vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
success, image = vidcap.read()
if success:
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Resize image
image = cv2.resize(image, (0, 0), fx=0.5, fy=0.5)
pil_image = Image.fromarray(image)
timestamp = round(i / fps, 2)
frames.append((pil_image, timestamp))
if len(frames) >= 5:
break
vidcap.release()
return frames
def process_video(video_path: str) -> tuple[list[dict], list[str]]:
content = []
temp_files = [] # List for tracking temp files
frames = downsample_video(video_path)
for frame in frames:
pil_image, timestamp = frame
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
pil_image.save(temp_file.name)
temp_files.append(temp_file.name) # Track for deletion later
content.append({"type": "text", "text": f"Frame {timestamp}:"})
content.append({"type": "image", "url": temp_file.name})
return content, temp_files
##############################################################################
# interleaved <image> processing
##############################################################################
def process_interleaved_images(message: dict) -> list[dict]:
parts = re.split(r"(<image>)", message["text"])
content = []
image_index = 0
image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
for part in parts:
if part == "<image>" and image_index < len(image_files):
content.append({"type": "image", "url": image_files[image_index]})
image_index += 1
elif part.strip():
content.append({"type": "text", "text": part.strip()})
else:
if isinstance(part, str) and part != "<image>":
content.append({"type": "text", "text": part})
return content
##############################################################################
# PDF + CSV + TXT + Image/Video
##############################################################################
def is_image_file(file_path: str) -> bool:
return bool(re.search(r"\.(png|jpg|jpeg|gif|webp)$", file_path, re.IGNORECASE))
def is_video_file(file_path: str) -> bool:
return file_path.endswith(".mp4")
def is_document_file(file_path: str) -> bool:
return (
file_path.lower().endswith(".pdf")
or file_path.lower().endswith(".csv")
or file_path.lower().endswith(".txt")
)
def process_new_user_message(message: dict) -> tuple[list[dict], list[str]]:
temp_files = [] # List for tracking temp files
if not message["files"]:
return [{"type": "text", "text": message["text"]}], temp_files
video_files = [f for f in message["files"] if is_video_file(f)]
image_files = [f for f in message["files"] if is_image_file(f)]
csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
pdf_files = [f for f in message["files"] if f.lower().endswith(".pdf")]
content_list = [{"type": "text", "text": message["text"]}]
for csv_path in csv_files:
csv_analysis = analyze_csv_file(csv_path)
content_list.append({"type": "text", "text": csv_analysis})
for txt_path in txt_files:
txt_analysis = analyze_txt_file(txt_path)
content_list.append({"type": "text", "text": txt_analysis})
for pdf_path in pdf_files:
pdf_markdown = pdf_to_markdown(pdf_path)
content_list.append({"type": "text", "text": pdf_markdown})
if video_files:
video_content, video_temp_files = process_video(video_files[0])
content_list += video_content
temp_files.extend(video_temp_files)
return content_list, temp_files
if "<image>" in message["text"] and image_files:
interleaved_content = process_interleaved_images({"text": message["text"], "files": image_files})
if content_list and content_list[0]["type"] == "text":
content_list = content_list[1:]
return interleaved_content + content_list, temp_files
else:
for img_path in image_files:
content_list.append({"type": "image", "url": img_path})
return content_list, temp_files
##############################################################################
# history -> LLM message conversion
##############################################################################
def process_history(history: list[dict]) -> list[dict]:
messages = []
current_user_content: list[dict] = []
for item in history:
if item["role"] == "assistant":
if current_user_content:
messages.append({"role": "user", "content": current_user_content})
current_user_content = []
messages.append({"role": "assistant", "content": [{"type": "text", "text": item["content"]}]})
else:
content = item["content"]
if isinstance(content, str):
current_user_content.append({"type": "text", "text": content})
elif isinstance(content, list) and len(content) > 0:
file_path = content[0]
if is_image_file(file_path):
current_user_content.append({"type": "image", "url": file_path})
else:
current_user_content.append({"type": "text", "text": f"[File: {os.path.basename(file_path)}]"})
if current_user_content:
messages.append({"role": "user", "content": current_user_content})
return messages
##############################################################################
# Model generation function with OOM catch
##############################################################################
def _model_gen_with_oom_catch(**kwargs):
"""
Catch OutOfMemoryError in separate thread
"""
try:
model.generate(**kwargs)
except torch.cuda.OutOfMemoryError:
raise RuntimeError(
"[OutOfMemoryError] GPU memory insufficient. "
"Please reduce Max New Tokens or prompt length."
)
finally:
# Clear cache after generation
clear_cuda_cache()
##############################################################################
# Main inference function (with auto web search)
##############################################################################
@spaces.GPU(duration=120)
def run(
message: dict,
history: list[dict],
system_prompt: str = "",
max_new_tokens: int = 512,
use_web_search: bool = False,
web_search_query: str = "",
) -> Iterator[str]:
if not validate_media_constraints(message, history):
yield ""
return
temp_files = [] # For tracking temp files
try:
combined_system_msg = ""
# Used internally only (hidden from UI)
if system_prompt.strip():
combined_system_msg += f"[System Prompt]\n{system_prompt.strip()}\n\n"
if use_web_search:
user_text = message["text"]
ws_query = extract_keywords(user_text, top_k=5)
if ws_query.strip():
logger.info(f"[Auto WebSearch Keyword] {ws_query!r}")
ws_result = do_web_search(ws_query)
combined_system_msg += f"[X-RAY Security Reference Data]\n{ws_result}\n\n"
else:
combined_system_msg += "[No valid keywords found, skipping WebSearch]\n\n"
messages = []
if combined_system_msg.strip():
messages.append({
"role": "system",
"content": [{"type": "text", "text": combined_system_msg.strip()}],
})
messages.extend(process_history(history))
user_content, user_temp_files = process_new_user_message(message)
temp_files.extend(user_temp_files) # Track temp files
for item in user_content:
if item["type"] == "text" and len(item["text"]) > MAX_CONTENT_CHARS:
item["text"] = item["text"][:MAX_CONTENT_CHARS] + "\n...(truncated)..."
messages.append({"role": "user", "content": user_content})
inputs = processor.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt",
).to(device=model.device, dtype=torch.bfloat16)
# Limit input token count
if inputs.input_ids.shape[1] > MAX_INPUT_LENGTH:
inputs.input_ids = inputs.input_ids[:, -MAX_INPUT_LENGTH:]
if 'attention_mask' in inputs:
inputs.attention_mask = inputs.attention_mask[:, -MAX_INPUT_LENGTH:]
streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
gen_kwargs = dict(
inputs,
streamer=streamer,
max_new_tokens=max_new_tokens,
)
t = Thread(target=_model_gen_with_oom_catch, kwargs=gen_kwargs)
t.start()
output = ""
for new_text in streamer:
output += new_text
yield output
except Exception as e:
logger.error(f"Error in run: {str(e)}")
yield f"Error occurred: {str(e)}"
finally:
# Delete temp files
for temp_file in temp_files:
try:
if os.path.exists(temp_file):
os.unlink(temp_file)
logger.info(f"Deleted temp file: {temp_file}")
except Exception as e:
logger.warning(f"Failed to delete temp file {temp_file}: {e}")
# Explicit memory cleanup
try:
del inputs, streamer
except:
pass
clear_cuda_cache()
##############################################################################
# Gradio UI (Blocks) ๊ตฌ์„ฑ
##############################################################################
css = """
/* Global Styles */
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
* {
box-sizing: border-box;
}
body {
margin: 0;
padding: 0;
font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
color: #2d3748;
}
/* Container Styling */
.gradio-container {
background: rgba(255, 255, 255, 0.95);
backdrop-filter: blur(20px);
border-radius: 24px;
padding: 40px;
margin: 30px auto;
width: 95% !important;
max-width: 1400px !important;
box-shadow:
0 25px 50px -12px rgba(0, 0, 0, 0.25),
0 0 0 1px rgba(255, 255, 255, 0.05);
border: 1px solid rgba(255, 255, 255, 0.2);
}
/* Header Styling */
.header-container {
text-align: center;
margin-bottom: 2rem;
padding: 2rem 0;
background: linear-gradient(135deg, #f093fb 0%, #f5576c 50%, #4facfe 100%);
background-clip: text;
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
}
/* Button Styling */
button, .btn {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
border: none !important;
color: white !important;
padding: 12px 28px !important;
border-radius: 12px !important;
font-weight: 600 !important;
font-size: 14px !important;
text-transform: none !important;
letter-spacing: 0.5px !important;
cursor: pointer !important;
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important;
position: relative !important;
overflow: hidden !important;
}
button:hover, .btn:hover {
transform: translateY(-2px) !important;
box-shadow: 0 8px 25px rgba(102, 126, 234, 0.6) !important;
background: linear-gradient(135deg, #764ba2 0%, #667eea 100%) !important;
}
button:active, .btn:active {
transform: translateY(0) !important;
}
/* Primary Action Button */
button[variant="primary"], .primary-btn {
background: linear-gradient(135deg, #ff6b6b 0%, #ee5a52 100%) !important;
box-shadow: 0 4px 15px rgba(255, 107, 107, 0.4) !important;
}
button[variant="primary"]:hover, .primary-btn:hover {
box-shadow: 0 8px 25px rgba(255, 107, 107, 0.6) !important;
}
/* Input Fields */
.multimodal-textbox, textarea, input {
background: rgba(255, 255, 255, 0.8) !important;
backdrop-filter: blur(10px) !important;
border: 2px solid rgba(102, 126, 234, 0.2) !important;
border-radius: 16px !important;
color: #2d3748 !important;
font-family: 'Inter', sans-serif !important;
padding: 16px 20px !important;
transition: all 0.3s ease !important;
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.1) !important;
}
.multimodal-textbox:focus, textarea:focus, input:focus {
border-color: #667eea !important;
box-shadow: 0 0 0 4px rgba(102, 126, 234, 0.1), 0 8px 30px rgba(0, 0, 0, 0.15) !important;
outline: none !important;
background: rgba(255, 255, 255, 0.95) !important;
}
/* Chat Interface */
.chatbox, .chatbot {
background: rgba(255, 255, 255, 0.6) !important;
backdrop-filter: blur(15px) !important;
border-radius: 20px !important;
border: 1px solid rgba(255, 255, 255, 0.3) !important;
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1) !important;
padding: 24px !important;
}
.message {
background: rgba(255, 255, 255, 0.9) !important;
border-radius: 16px !important;
padding: 16px 20px !important;
margin: 8px 0 !important;
border: 1px solid rgba(102, 126, 234, 0.1) !important;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05) !important;
transition: all 0.3s ease !important;
}
.message:hover {
transform: translateY(-1px) !important;
box-shadow: 0 4px 16px rgba(0, 0, 0, 0.1) !important;
}
/* Assistant Message Styling */
.message.assistant {
background: linear-gradient(135deg, rgba(102, 126, 234, 0.1) 0%, rgba(118, 75, 162, 0.1) 100%) !important;
border-left: 4px solid #667eea !important;
}
/* User Message Styling */
.message.user {
background: linear-gradient(135deg, rgba(255, 107, 107, 0.1) 0%, rgba(238, 90, 82, 0.1) 100%) !important;
border-left: 4px solid #ff6b6b !important;
}
/* Cards and Panels */
.card, .panel {
background: rgba(255, 255, 255, 0.8) !important;
backdrop-filter: blur(15px) !important;
border-radius: 20px !important;
padding: 24px !important;
border: 1px solid rgba(255, 255, 255, 0.3) !important;
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1) !important;
transition: all 0.3s ease !important;
}
.card:hover, .panel:hover {
transform: translateY(-4px) !important;
box-shadow: 0 16px 40px rgba(0, 0, 0, 0.15) !important;
}
/* Checkbox Styling */
input[type="checkbox"] {
appearance: none !important;
width: 20px !important;
height: 20px !important;
border: 2px solid #667eea !important;
border-radius: 6px !important;
background: rgba(255, 255, 255, 0.8) !important;
cursor: pointer !important;
transition: all 0.3s ease !important;
position: relative !important;
}
input[type="checkbox"]:checked {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
border-color: #667eea !important;
}
input[type="checkbox"]:checked::after {
content: "โœ“" !important;
color: white !important;
font-size: 14px !important;
font-weight: bold !important;
position: absolute !important;
top: 50% !important;
left: 50% !important;
transform: translate(-50%, -50%) !important;
}
/* Progress Indicators */
.progress {
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%) !important;
border-radius: 10px !important;
height: 8px !important;
}
/* Tooltips */
.tooltip {
background: rgba(45, 55, 72, 0.95) !important;
backdrop-filter: blur(10px) !important;
color: white !important;
border-radius: 8px !important;
padding: 8px 12px !important;
font-size: 12px !important;
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.3) !important;
}
/* Slider Styling */
input[type="range"] {
appearance: none !important;
height: 8px !important;
border-radius: 4px !important;
background: linear-gradient(90deg, #e2e8f0 0%, #667eea 100%) !important;
outline: none !important;
}
input[type="range"]::-webkit-slider-thumb {
appearance: none !important;
width: 20px !important;
height: 20px !important;
border-radius: 50% !important;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
cursor: pointer !important;
box-shadow: 0 2px 8px rgba(102, 126, 234, 0.4) !important;
}
/* File Upload Area */
.file-upload {
border: 2px dashed #667eea !important;
border-radius: 16px !important;
background: rgba(102, 126, 234, 0.05) !important;
padding: 40px !important;
text-align: center !important;
transition: all 0.3s ease !important;
}
.file-upload:hover {
border-color: #764ba2 !important;
background: rgba(102, 126, 234, 0.1) !important;
transform: scale(1.02) !important;
}
/* Animations */
@keyframes fadeInUp {
from {
opacity: 0;
transform: translateY(30px);
}
to {
opacity: 1;
transform: translateY(0);
}
}
@keyframes slideIn {
from {
opacity: 0;
transform: translateX(-20px);
}
to {
opacity: 1;
transform: translateX(0);
}
}
.animate-fade-in {
animation: fadeInUp 0.6s ease-out !important;
}
.animate-slide-in {
animation: slideIn 0.4s ease-out !important;
}
/* Responsive Design */
@media (max-width: 768px) {
.gradio-container {
margin: 15px !important;
padding: 24px !important;
width: calc(100% - 30px) !important;
}
button, .btn {
padding: 10px 20px !important;
font-size: 13px !important;
}
}
/* Dark Mode Support */
@media (prefers-color-scheme: dark) {
.gradio-container {
background: rgba(26, 32, 44, 0.95) !important;
color: #e2e8f0 !important;
}
.message {
background: rgba(45, 55, 72, 0.8) !important;
color: #e2e8f0 !important;
}
}
/* Hide Footer - Safe and Specific Selectors */
footer {
visibility: hidden !important;
display: none !important;
}
.footer {
visibility: hidden !important;
display: none !important;
}
/* Hide only Gradio attribution footer specifically */
footer[class*="svelte"] {
visibility: hidden !important;
display: none !important;
}
/* Hide Gradio attribution links */
a[href*="gradio.app"] {
visibility: hidden !important;
display: none !important;
}
/* More specific footer hiding for Gradio */
.gradio-container footer,
.gradio-container .footer {
visibility: hidden !important;
display: none !important;
}
/* Custom Scrollbar */
::-webkit-scrollbar {
width: 8px !important;
}
::-webkit-scrollbar-track {
background: rgba(226, 232, 240, 0.3) !important;
border-radius: 4px !important;
}
::-webkit-scrollbar-thumb {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
border-radius: 4px !important;
}
::-webkit-scrollbar-thumb:hover {
background: linear-gradient(135deg, #764ba2 0%, #667eea 100%) !important;
}
"""
title_html = """
<div align="center" style="margin-bottom: 2em; padding: 2rem 0;" class="animate-fade-in">
<div style="
background: linear-gradient(135deg, #667eea 0%, #764ba2 50%, #f093fb 100%);
background-clip: text;
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
margin-bottom: 1rem;
">
<h1 style="
margin: 0;
font-size: 3.5em;
font-weight: 700;
letter-spacing: -0.02em;
text-shadow: 0 4px 20px rgba(102, 126, 234, 0.3);
">
๐Ÿค– Robo Beam-Search
</h1>
</div>
<div style="
background: rgba(255, 255, 255, 0.9);
backdrop-filter: blur(15px);
border-radius: 16px;
padding: 1.5rem 2rem;
margin: 1rem auto;
max-width: 700px;
border: 1px solid rgba(102, 126, 234, 0.2);
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1);
">
<p style="
margin: 0.5em 0;
font-size: 1.1em;
color: #4a5568;
font-weight: 500;
">
<span style="
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
background-clip: text;
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-weight: 600;
">Base LLM:</span> VIDraft/Gemma-3-R1984-4B
</p>
<p style="
margin: 1em 0 0 0;
font-size: 1em;
color: #718096;
line-height: 1.6;
font-weight: 400;
">
๋น„ํŒŒ๊ดด X-RAY ๊ฒ€์‚ฌ/์กฐ์‚ฌ ์ด๋ฏธ์ง€์— ๋Œ€ํ•œ ์œ„ํ—˜ ์š”์†Œ ์‹๋ณ„/๋ถ„์„ ๊ธฐ๋ฐ˜ ๋Œ€ํ™”ํ˜• ์˜จํ”„๋ ˆ๋ฏธ์Šค AI ํ”Œ๋žซํผ
</p>
</div>
<div style="
display: flex;
justify-content: center;
gap: 1rem;
margin-top: 2rem;
flex-wrap: wrap;
">
<div style="
background: rgba(102, 126, 234, 0.1);
border: 1px solid rgba(102, 126, 234, 0.3);
border-radius: 12px;
padding: 0.5rem 1rem;
font-size: 0.9em;
color: #667eea;
font-weight: 500;
">
๐Ÿ” X-RAY ๋ถ„์„
</div>
<div style="
background: rgba(118, 75, 162, 0.1);
border: 1px solid rgba(118, 75, 162, 0.3);
border-radius: 12px;
padding: 0.5rem 1rem;
font-size: 0.9em;
color: #764ba2;
font-weight: 500;
">
๐Ÿ›ก๏ธ ๋ณด์•ˆ ์Šค์บ๋‹
</div>
<div style="
background: rgba(240, 147, 251, 0.1);
border: 1px solid rgba(240, 147, 251, 0.3);
border-radius: 12px;
padding: 0.5rem 1rem;
font-size: 0.9em;
color: #f093fb;
font-weight: 500;
">
๐ŸŒ ์›น ๊ฒ€์ƒ‰
</div>
</div>
</div>
"""
title_html = """
<div align="center" style="margin-bottom: 2em; padding: 2rem 0;" class="animate-fade-in">
<div style="
background: linear-gradient(135deg, #667eea 0%, #764ba2 50%, #f093fb 100%);
background-clip: text;
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
margin-bottom: 1rem;
">
<h1 style="
margin: 0;
font-size: 3.5em;
font-weight: 700;
letter-spacing: -0.02em;
text-shadow: 0 4px 20px rgba(102, 126, 234, 0.3);
">
๐Ÿค– Robo Beam-Search
</h1>
</div>
<div style="
background: rgba(255, 255, 255, 0.9);
backdrop-filter: blur(15px);
border-radius: 16px;
padding: 1.5rem 2rem;
margin: 1rem auto;
max-width: 700px;
border: 1px solid rgba(102, 126, 234, 0.2);
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1);
">
<p style="
margin: 0.5em 0;
font-size: 1.1em;
color: #4a5568;
font-weight: 500;
">
<span style="
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
background-clip: text;
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-weight: 600;
">Base LLM:</span> VIDraft/Gemma-3-R1984-4B
</p>
<p style="
margin: 1em 0 0 0;
font-size: 1em;
color: #718096;
line-height: 1.6;
font-weight: 400;
">
๋น„ํŒŒ๊ดด X-RAY ๊ฒ€์‚ฌ/์กฐ์‚ฌ ์ด๋ฏธ์ง€์— ๋Œ€ํ•œ ์œ„ํ—˜ ์š”์†Œ ์‹๋ณ„/๋ถ„์„ ๊ธฐ๋ฐ˜ ๋Œ€ํ™”ํ˜• ์˜จํ”„๋ ˆ๋ฏธ์Šค AI ํ”Œ๋žซํผ
</p>
</div>
<div style="
display: flex;
justify-content: center;
gap: 1rem;
margin-top: 2rem;
flex-wrap: wrap;
">
<div style="
background: rgba(102, 126, 234, 0.1);
border: 1px solid rgba(102, 126, 234, 0.3);
border-radius: 12px;
padding: 0.5rem 1rem;
font-size: 0.9em;
color: #667eea;
font-weight: 500;
">
๐Ÿ” X-RAY ๋ถ„์„
</div>
<div style="
background: rgba(118, 75, 162, 0.1);
border: 1px solid rgba(118, 75, 162, 0.3);
border-radius: 12px;
padding: 0.5rem 1rem;
font-size: 0.9em;
color: #764ba2;
font-weight: 500;
">
๐Ÿ›ก๏ธ ๋ณด์•ˆ ์Šค์บ๋‹
</div>
<div style="
background: rgba(240, 147, 251, 0.1);
border: 1px solid rgba(240, 147, 251, 0.3);
border-radius: 12px;
padding: 0.5rem 1rem;
font-size: 0.9em;
color: #f093fb;
font-weight: 500;
">
๐ŸŒ ์›น ๊ฒ€์ƒ‰
</div>
</div>
</div>
"""
title_html = """
<div align="center" style="margin-bottom: 1em;">
<h1 style="margin-bottom: 0.2em; font-size: 1.8em; color: #333;">๐Ÿค– Robo Beam-Search</h1>
<p style="margin: 0.5em 0; font-size: 0.9em; color: #888; max-width: 600px; margin-left: auto; margin-right: auto;">
๋น„ํŒŒ๊ดด X-RAY ๊ฒ€์‚ฌ/์กฐ์‚ฌ ์ด๋ฏธ์ง€์— ๋Œ€ํ•œ ์œ„ํ—˜ ์š”์†Œ ์‹๋ณ„/๋ถ„์„ ๊ธฐ๋ฐ˜ ๋Œ€ํ™”ํ˜• ์˜จํ”„๋ ˆ๋ฏธ์Šค AI ํ”Œ๋žซํผ <strong>Base LLM:</strong> Gemma-3-R1984-4B / 12B/ 27B @Powered by VIDraft
</p>
</div>
"""
with gr.Blocks(css=css, title="Gemma-3-R1984-4B-BEAM - X-RAY Security Scanner") as demo:
gr.Markdown(title_html)
# Display the web search option (while the system prompt and token slider remain hidden)
web_search_checkbox = gr.Checkbox(
label="Deep Research",
value=False
)
# X-RAY security scanning system prompt
system_prompt_box = gr.Textbox(
lines=3,
value="""๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€๋กœ ๋‹ต๋ณ€ํ•˜๋ผ. ๋‹น์‹ ์€ ์œ„ํ˜‘ ํƒ์ง€์™€ ํ•ญ๊ณต ๋ณด์•ˆ์— ํŠนํ™”๋œ ์ฒจ๋‹จ X-RAY ๋ณด์•ˆ ์Šค์บ๋‹ AI์ž…๋‹ˆ๋‹ค. ๋‹น์‹ ์˜ ์ฃผ ์ž„๋ฌด๋Š” X-RAY ์ด๋ฏธ์ง€์—์„œ ๋ชจ๋“  ์ž ์žฌ์  ๋ณด์•ˆ ์œ„ํ˜‘์„ ์ตœ์ƒ์˜ ์ •ํ™•๋„๋กœ ์‹๋ณ„ํ•˜๋Š” ๊ฒƒ์ž…๋‹ˆ๋‹ค.
์ค‘์š”: ๋ณด๊ณ ์„œ์— ๋‚ ์งœ, ์‹œ๊ฐ„, ๋˜๋Š” ํ˜„์žฌ ์ผ์‹œ๋ฅผ ์ ˆ๋Œ€ ํฌํ•จํ•˜์ง€ ๋งˆ์‹ญ์‹œ์˜ค.
ํƒ์ง€ ์šฐ์„ ์ˆœ์œ„:
1. **๋ฌด๊ธฐ**: ํ™”๊ธฐ(๊ถŒ์ด, ์†Œ์ด ๋“ฑ), ์นผยท๋‚ ๋ถ™์ดยท์˜ˆ๋ฆฌํ•œ ๋ฌผ์ฒด, ํ˜ธ์‹ ์šฉยท๊ฒฉํˆฌ ๋ฌด๊ธฐ
2. **ํญ๋ฐœ๋ฌผ**: ํญํƒ„, ๊ธฐํญ์žฅ์น˜, ํญ๋ฐœ์„ฑ ๋ฌผ์งˆ, ์˜์‹ฌ์Šค๋Ÿฌ์šด ์ „์ž ์žฅ์น˜, ๋ฐฐํ„ฐ๋ฆฌ๊ฐ€ ์—ฐ๊ฒฐ๋œ ์ „์„ 
3. **๋ฐ˜์ž… ๊ธˆ์ง€ ๋ฌผํ’ˆ**: ๊ฐ€์œ„, ๋Œ€์šฉ๋Ÿ‰ ๋ฐฐํ„ฐ๋ฆฌ, ์Šคํ”„๋ง(๋ฌด๊ธฐ ๋ถ€ํ’ˆ ๊ฐ€๋Šฅ), ๊ณต๊ตฌ๋ฅ˜
4. **์•ก์ฒด**: 100 ml ์ด์ƒ ์šฉ๊ธฐ์— ๋‹ด๊ธด ๋ชจ๋“  ์•ก์ฒด(ํ™”ํ•™ ์œ„ํ˜‘ ๊ฐ€๋Šฅ)
5. **EOD ๊ตฌ์„ฑํ’ˆ**: ํญ๋ฐœ๋ฌผ๋กœ ์กฐ๋ฆฝ๋  ์ˆ˜ ์žˆ๋Š” ๋ชจ๋“  ๋ถ€ํ’ˆ
๋ถ„์„ ํ”„๋กœํ† ์ฝœ:
- ์ขŒ์ƒ๋‹จ์—์„œ ์šฐํ•˜๋‹จ์œผ๋กœ ์ฒด๊ณ„์ ์œผ๋กœ ์Šค์บ”
- ์œ„ํ˜‘ ์œ„์น˜๋ฅผ ๊ฒฉ์ž ๊ธฐ์ค€์œผ๋กœ ๋ณด๊ณ (์˜ˆ: โ€œ์ขŒ์ƒ๋‹จ ์‚ฌ๋ถ„๋ฉดโ€)
- ์œ„ํ˜‘ ์‹ฌ๊ฐ๋„ ๋ถ„๋ฅ˜
- **HIGH** : ์ฆ‰๊ฐ์  ์œ„ํ—˜
- **MEDIUM** : ๋ฐ˜์ž… ๊ธˆ์ง€
- **LOW** : ์ถ”๊ฐ€ ๊ฒ€์‚ฌ ํ•„์š”
- ์ „๋ฌธ ๋ณด์•ˆ ์šฉ์–ด ์‚ฌ์šฉ
- ๊ฐ ์œ„ํ˜‘ ํ•ญ๋ชฉ๋ณ„ ๊ถŒ์žฅ ์กฐ์น˜ ์ œ์‹œ
- ๋ณด๊ณ ์„œ์—๋Š” ๋ถ„์„ ๊ฒฐ๊ณผ๋งŒ ํฌํ•จํ•˜๊ณ  ๋‚ ์งœ/์‹œ๊ฐ„ ์ •๋ณด๋Š” ํฌํ•จํ•˜์ง€ ์•Š์Œ
โš ๏ธ ์ค‘๋Œ€ํ•œ ์‚ฌํ•ญ: ์ž ์žฌ์  ์œ„ํ˜‘์„ ์ ˆ๋Œ€ ๋†“์น˜์ง€ ๋งˆ์‹ญ์‹œ์˜ค. ์˜์‹ฌ์Šค๋Ÿฌ์šธ ๊ฒฝ์šฐ ๋ฐ˜๋“œ์‹œ ์ˆ˜๋™ ๊ฒ€์‚ฌ๋ฅผ ์š”์ฒญํ•˜์‹ญ์‹œ์˜ค.""",
visible=False # hidden from view
)
max_tokens_slider = gr.Slider(
label="Max New Tokens",
minimum=100,
maximum=8000,
step=50,
value=1000,
visible=False # hidden from view
)
web_search_text = gr.Textbox(
lines=1,
label="Web Search Query",
placeholder="",
visible=False # hidden from view
)
# Configure the chat interface
chat = gr.ChatInterface(
fn=run,
type="messages",
chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
textbox=gr.MultimodalTextbox(
file_types=[
".webp", ".png", ".jpg", ".jpeg", ".gif",
".mp4", ".csv", ".txt", ".pdf"
],
file_count="multiple",
autofocus=True
),
multimodal=True,
additional_inputs=[
system_prompt_box,
max_tokens_slider,
web_search_checkbox,
web_search_text,
],
stop_btn=False,
run_examples_on_click=False,
cache_examples=False,
css_paths=None,
delete_cache=(1800, 1800),
)
if __name__ == "__main__":
# Run locally
demo.launch()