Robo-Beam

Running

App Files Files Community

openfree commited on Jun 13

Commit

b9f6a1d

verified ·

1 Parent(s): 54917ae

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -148

app.py CHANGED Viewed

@@ -3,7 +3,7 @@
 import os
 import re
 import tempfile
-import gc  # garbage collector 추가
 from collections.abc import Iterator
 from threading import Thread
 import json
@@ -16,16 +16,16 @@ from loguru import logger
 from PIL import Image
 from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer
-# CSV/TXT 분석
 import pandas as pd
-# PDF 텍스트 추출
 import PyPDF2
 ##############################################################################
-# 메모리 정리 함수 추가
 ##############################################################################
 def clear_cuda_cache():
-    """CUDA 캐시를 명시적으로 비웁니다."""
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
         gc.collect()
@@ -36,13 +36,11 @@ def clear_cuda_cache():
 SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")
 ##############################################################################
-# 간단한 키워드 추출 함수 (한글 + 알파벳 + 숫자 + 공백 보존)
 ##############################################################################
 def extract_keywords(text: str, top_k: int = 5) -> str:
     """
-    1) 한글(가-힣), 영어(a-zA-Z), 숫자(0-9), 공백만 남김
-    2) 공백 기준 토큰 분리
-    3) 최대 top_k개만
     """
     text = re.sub(r"[^a-zA-Z0-9가-힣\s]", "", text)
     tokens = text.split()
@@ -50,13 +48,11 @@ def extract_keywords(text: str, top_k: int = 5) -> str:
     return " ".join(key_tokens)
 ##############################################################################
-# SerpHouse Live endpoint 호출
-# - 상위 20개 결과 JSON을 LLM에 넘길 때 link, snippet 등 모두 포함
 ##############################################################################
 def do_web_search(query: str) -> str:
     """
-    상위 20개 'organic' 결과 item 전체(제목, link, snippet 등)를
-    JSON 문자열 형태로 반환
     """
     try:
         url = "https://api.serphouse.com/serp/live"
@@ -65,55 +61,55 @@ def do_web_search(query: str) -> str:
         params = {
             "q": query,
             "domain": "google.com",
-            "serp_type": "web",  # 기본 웹 검색
             "device": "desktop",
             "lang": "en",
-            "num": "20"  # 최대 20개 결과만 요청
         }
         headers = {
             "Authorization": f"Bearer {SERPHOUSE_API_KEY}"
         }
-        logger.info(f"SerpHouse API 호출 중... 검색어: {query}")
-        logger.info(f"요청 URL: {url} - 파라미터: {params}")
-        # GET 요청 수행
         response = requests.get(url, headers=headers, params=params, timeout=60)
         response.raise_for_status()
-        logger.info(f"SerpHouse API 응답 상태 코드: {response.status_code}")
         data = response.json()
-        # 다양한 응답 구조 처리
         results = data.get("results", {})
         organic = None
-        # 가능한 응답 구조 1
         if isinstance(results, dict) and "organic" in results:
             organic = results["organic"]
-        # 가능한 응답 구조 2 (중첩된 results)
         elif isinstance(results, dict) and "results" in results:
             if isinstance(results["results"], dict) and "organic" in results["results"]:
                 organic = results["results"]["organic"]
-        # 가능한 응답 구조 3 (최상위 organic)
         elif "organic" in data:
             organic = data["organic"]
         if not organic:
-            logger.warning("응답에서 organic 결과를 찾을 수 없습니다.")
-            logger.debug(f"응답 구조: {list(data.keys())}")
             if isinstance(results, dict):
-                logger.debug(f"results 구조: {list(results.keys())}")
             return "No web search results found or unexpected API response structure."
-        # 결과 수 제한 및 컨텍스트 길이 최적화
         max_results = min(20, len(organic))
         limited_organic = organic[:max_results]
-        # 결과 형식 개선 - 마크다운 형식으로 출력하여 가독성 향상
         summary_lines = []
         for idx, item in enumerate(limited_organic, start=1):
             title = item.get("title", "No title")
@@ -121,26 +117,22 @@ def do_web_search(query: str) -> str:
             snippet = item.get("snippet", "No description")
             displayed_link = item.get("displayed_link", link)
-            # 마크다운 형식 (링크 클릭 가능)
             summary_lines.append(
                 f"### Result {idx}: {title}\n\n"
                 f"{snippet}\n\n"
-                f"**출처**: [{displayed_link}]({link})\n\n"
                 f"---\n"
             )
-        # 모델에게 명확한 지침 추가
         instructions = """
 # X-RAY Security Scanning Reference Results
-Below are search results about X-RAY security scanning and threat detection. Use this information to enhance your analysis:
-1. Reference security protocols and standards from the results
-2. Compare findings with known threat patterns
-3. Cite relevant security guidelines when applicable
-4. Use multiple sources to verify threat assessments
 """
         search_results = instructions + "\n".join(summary_lines)
-        logger.info(f"검색 결과 {len(limited_organic)}개 처리 완료")
         return search_results
     except Exception as e:
@@ -149,10 +141,10 @@ Below are search results about X-RAY security scanning and threat detection. Use
 ##############################################################################
-# 모델/프로세서 로딩
 ##############################################################################
 MAX_CONTENT_CHARS = 2000
-MAX_INPUT_LENGTH = 2096  # 최대 입력 토큰 수 제한 추가
 model_id = os.getenv("MODEL_ID", "VIDraft/Gemma-3-R1984-4B")
 processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
@@ -160,17 +152,17 @@ model = Gemma3ForConditionalGeneration.from_pretrained(
     model_id,
     device_map="auto",
     torch_dtype=torch.bfloat16,
-    attn_implementation="eager"  # 가능하다면 "flash_attention_2"로 변경
 )
 MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
 ##############################################################################
-# CSV, TXT, PDF 분석 함수
 ##############################################################################
 def analyze_csv_file(path: str) -> str:
     """
-    CSV 파일을 전체 문자열로 변환. 너무 길 경우 일부만 표시.
     """
     try:
         df = pd.read_csv(path)
@@ -186,7 +178,7 @@ def analyze_csv_file(path: str) -> str:
 def analyze_txt_file(path: str) -> str:
     """
-    TXT 파일 전문 읽기. 너무 길면 일부만 표시.
     """
     try:
         with open(path, "r", encoding="utf-8") as f:
@@ -200,7 +192,7 @@ def analyze_txt_file(path: str) -> str:
 def pdf_to_markdown(pdf_path: str) -> str:
     """
-    PDF 텍스트를 Markdown으로 변환. 페이지별로 간단히 텍스트 추출.
     """
     text_chunks = []
     try:
@@ -228,7 +220,7 @@ def pdf_to_markdown(pdf_path: str) -> str:
 ##############################################################################
-# 이미지/비디오 업로드 제한 검사
 ##############################################################################
 def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
     image_count = 0
@@ -293,7 +285,7 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
 ##############################################################################
-# 비디오 처리 - 임시 파일 추적 코드 추가
 ##############################################################################
 def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
     vidcap = cv2.VideoCapture(video_path)
@@ -307,7 +299,7 @@ def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
         success, image = vidcap.read()
         if success:
             image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-            # 이미지 크기 줄이기 추가
             image = cv2.resize(image, (0, 0), fx=0.5, fy=0.5)
             pil_image = Image.fromarray(image)
             timestamp = round(i / fps, 2)
@@ -321,14 +313,14 @@ def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
 def process_video(video_path: str) -> tuple[list[dict], list[str]]:
     content = []
-    temp_files = []  # 임시 파일 추적을 위한 리스트
     frames = downsample_video(video_path)
     for frame in frames:
         pil_image, timestamp = frame
         with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
             pil_image.save(temp_file.name)
-            temp_files.append(temp_file.name)  # 추적을 위해 경로 저장
             content.append({"type": "text", "text": f"Frame {timestamp}:"})
             content.append({"type": "image", "url": temp_file.name})
@@ -336,7 +328,7 @@ def process_video(video_path: str) -> tuple[list[dict], list[str]]:
 ##############################################################################
-# interleaved <image> 처리
 ##############################################################################
 def process_interleaved_images(message: dict) -> list[dict]:
     parts = re.split(r"(<image>)", message["text"])
@@ -358,7 +350,7 @@ def process_interleaved_images(message: dict) -> list[dict]:
 ##############################################################################
-# PDF + CSV + TXT + 이미지/비디오
 ##############################################################################
 def is_image_file(file_path: str) -> bool:
     return bool(re.search(r"\.(png|jpg|jpeg|gif|webp)$", file_path, re.IGNORECASE))
@@ -375,7 +367,7 @@ def is_document_file(file_path: str) -> bool:
 def process_new_user_message(message: dict) -> tuple[list[dict], list[str]]:
-    temp_files = []  # 임시 파일 추적용 리스트
     if not message["files"]:
         return [{"type": "text", "text": message["text"]}], temp_files
@@ -419,7 +411,7 @@ def process_new_user_message(message: dict) -> tuple[list[dict], list[str]]:
 ##############################################################################
-# history -> LLM 메시지 변환
 ##############################################################################
 def process_history(history: list[dict]) -> list[dict]:
     messages = []
@@ -448,26 +440,26 @@ def process_history(history: list[dict]) -> list[dict]:
 ##############################################################################
-# 모델 생성 함수에서 OOM 캐치
 ##############################################################################
 def _model_gen_with_oom_catch(**kwargs):
     """
-    별도 스레드에서 OutOfMemoryError를 잡아주기 위해
     """
     try:
         model.generate(**kwargs)
     except torch.cuda.OutOfMemoryError:
         raise RuntimeError(
-            "[OutOfMemoryError] GPU 메모리가 부족합니다. "
-            "Max New Tokens을 줄이거나, 프롬프트 길이를 줄여주세요."
         )
     finally:
-        # 생성 완료 후 한번 더 캐시 비우기
         clear_cuda_cache()
 ##############################################################################
-# 메인 추론 함수 (web search 체크 시 자동 키워드추출->검색->결과 system msg)
 ##############################################################################
 @spaces.GPU(duration=120)
 def run(
@@ -483,12 +475,12 @@ def run(
         yield ""
         return
-    temp_files = []  # 임시 파일 추적용
     try:
         combined_system_msg = ""
-        # 내부적으로만 사용 (UI에서는 보이지 않음)
         if system_prompt.strip():
             combined_system_msg += f"[System Prompt]\n{system_prompt.strip()}\n\n"
@@ -499,14 +491,6 @@ def run(
                 logger.info(f"[Auto WebSearch Keyword] {ws_query!r}")
                 ws_result = do_web_search(ws_query)
                 combined_system_msg += f"[X-RAY Security Reference Data]\n{ws_result}\n\n"
-                combined_system_msg += """
-[IMPORTANT SECURITY ANALYSIS GUIDELINES]
-1. Systematically scan and identify ALL potential threats in the X-RAY image
-2. Reference security protocols and threat detection standards from search results
-3. Use proper threat classification terminology
-4. Provide threat severity levels (HIGH/MEDIUM/LOW)
-5. Suggest appropriate security response actions
-"""
             else:
                 combined_system_msg += "[No valid keywords found, skipping WebSearch]\n\n"
@@ -520,7 +504,7 @@ def run(
         messages.extend(process_history(history))
         user_content, user_temp_files = process_new_user_message(message)
-        temp_files.extend(user_temp_files)  # 임시 파일 추적
         for item in user_content:
             if item["type"] == "text" and len(item["text"]) > MAX_CONTENT_CHARS:
@@ -535,7 +519,7 @@ def run(
             return_tensors="pt",
         ).to(device=model.device, dtype=torch.bfloat16)
-        # 입력 토큰 수 제한 추가
         if inputs.input_ids.shape[1] > MAX_INPUT_LENGTH:
             inputs.input_ids = inputs.input_ids[:, -MAX_INPUT_LENGTH:]
             if 'attention_mask' in inputs:
@@ -558,10 +542,10 @@ def run(
     except Exception as e:
         logger.error(f"Error in run: {str(e)}")
-        yield f"죄송합니다. 오류가 발생했습니다: {str(e)}"
     finally:
-        # 임시 파일 삭제
         for temp_file in temp_files:
             try:
                 if os.path.exists(temp_file):
@@ -570,7 +554,7 @@ def run(
             except Exception as e:
                 logger.warning(f"Failed to delete temp file {temp_file}: {e}")
-        # 명시적 메모리 정리
         try:
             del inputs, streamer
         except:
@@ -581,7 +565,7 @@ def run(
 ##############################################################################
-# X-RAY 보안 검사 전용 예시들
 ##############################################################################
 examples = [
     [
@@ -647,108 +631,64 @@ examples = [
 ]
 ##############################################################################
-# Gradio UI (Blocks) 구성 (X-RAY 보안 검사 테마)
 ##############################################################################
 css = """
-/* X-RAY Security Scanner Theme */
 .gradio-container {
-    background: linear-gradient(135deg, rgba(0, 20, 40, 0.9), rgba(0, 40, 80, 0.9));
     padding: 30px 40px;
     margin: 20px auto;
     width: 100% !important;
     max-width: none !important;
-    border: 2px solid #00ff00;
-    box-shadow: 0 0 20px rgba(0, 255, 0, 0.3);
 }
 .fillable {
     width: 100% !important;
     max-width: 100% !important;
 }
 body {
-    background: #000;
     margin: 0;
     padding: 0;
-    font-family: 'Courier New', monospace;
-    color: #00ff00;
 }
-/* Security-themed buttons */
 button, .btn {
-    background: rgba(0, 255, 0, 0.1) !important;
-    border: 1px solid #00ff00;
-    color: #00ff00;
     padding: 12px 24px;
     text-transform: uppercase;
     font-weight: bold;
     letter-spacing: 1px;
     cursor: pointer;
-    transition: all 0.3s;
 }
 button:hover, .btn:hover {
-    background: rgba(0, 255, 0, 0.2) !important;
-    box-shadow: 0 0 10px rgba(0, 255, 0, 0.5);
 }
-/* Alert-style headers */
 h1, h2, h3 {
-    color: #00ff00;
-    text-shadow: 0 0 10px rgba(0, 255, 0, 0.5);
 }
-/* Input fields with security theme */
 .multimodal-textbox, textarea, input {
-    background: rgba(0, 40, 80, 0.8) !important;
-    border: 1px solid #00ff00;
-    color: #00ff00;
 }
-/* Chat interface security styling */
 .chatbox, .chatbot, .message {
-    background: rgba(0, 20, 40, 0.8) !important;
-    border: 1px solid #00ff00;
 }
-/* Example section styling */
 #examples_container, .examples-container {
     margin: auto;
     width: 90%;
-    background: rgba(0, 40, 80, 0.5) !important;
-    border: 1px solid #00ff00;
-    padding: 20px;
-}
-/* Security alert animation */
-@keyframes security-pulse {
-    0% { box-shadow: 0 0 10px rgba(0, 255, 0, 0.5); }
-    50% { box-shadow: 0 0 20px rgba(0, 255, 0, 0.8); }
-    100% { box-shadow: 0 0 10px rgba(0, 255, 0, 0.5); }
-}
-.gradio-container {
-    animation: security-pulse 2s infinite;
 }
-/* Threat level indicators */
-.threat-high { color: #ff0000; font-weight: bold; }
-.threat-medium { color: #ffaa00; font-weight: bold; }
-.threat-low { color: #00ff00; font-weight: bold; }
 """
 title_html = """
-<h1 align="center" style="margin-bottom: 0.2em; font-size: 1.6em; color: #00ff00; text-shadow: 0 0 20px #00ff00;">
-    🔍 Gemma-3-R1984-4B-BEAM 🔍
-</h1>
-<p align="center" style="font-size:1.1em; color:#00ff00; text-shadow: 0 0 10px #00ff00;">
-    ⚡ X-RAY Security Threat Detection System ⚡<br>
-    ✅ Real-time Weapon Detection ✅ Explosive Material Identification<br>
-    ✅ Prohibited Item Classification ✅ Multi-threat Analysis<br>
-    <span style="color: #ffaa00;">⚠️ Detects: Guns, Knives, Bombs, Batteries, Scissors, Springs, Liquids >100ml, EOD Components ⚠️</span><br>
-    <span style="font-size: 0.9em;">Powered by Advanced AI Vision Model • Based on Google Gemma-3-4b • Enhanced by VIDRAFT</span>
-</p>
-<div align="center" style="margin: 10px 0; padding: 10px; border: 2px solid #ff0000; background: rgba(255, 0, 0, 0.1);">
-    <p style="color: #ff0000; margin: 0; font-weight: bold;">
-        🚨 SECURITY ALERT: This system is for authorized security personnel only 🚨
-    </p>
-</div>
 """
@@ -757,11 +697,11 @@ with gr.Blocks(css=css, title="Gemma-3-R1984-4B-BEAM - X-RAY Security Scanner")
     # Display the web search option (while the system prompt and token slider remain hidden)
     web_search_checkbox = gr.Checkbox(
-        label="Enable Security Database Search (TSA/Aviation Security Standards)",
         value=False
     )
-    # X-RAY 보안 검사 전용 시스템 프롬프트
     system_prompt_box = gr.Textbox(
         lines=3,
         value="""You are an advanced X-RAY security scanning AI specialized in threat detection and aviation security. Your primary mission is to identify ALL potential security threats in X-RAY images with extreme precision.
@@ -789,14 +729,14 @@ CRITICAL: Never miss a potential threat. When in doubt, flag for manual inspecti
         minimum=100,
         maximum=8000,
         step=50,
-        value=1500,  # Increased for detailed security analysis
         visible=False  # hidden from view
     )
     web_search_text = gr.Textbox(
         lines=1,
-        label="(Unused) Web Search Query",
-        placeholder="No direct input needed",
         visible=False  # hidden from view
     )
@@ -811,8 +751,7 @@ CRITICAL: Never miss a potential threat. When in doubt, flag for manual inspecti
                 ".mp4", ".csv", ".txt", ".pdf"
             ],
             file_count="multiple",
-            autofocus=True,
-            placeholder="Upload X-RAY images for security analysis..."
         ),
         multimodal=True,
         additional_inputs=[
@@ -822,7 +761,7 @@ CRITICAL: Never miss a potential threat. When in doubt, flag for manual inspecti
             web_search_text,
         ],
         stop_btn=False,
-        title='<a href="https://discord.gg/openfreeai" target="_blank" style="color: #00ff00;">Security Support: https://discord.gg/openfreeai</a>',
         examples=examples,
         run_examples_on_click=False,
         cache_examples=False,
@@ -833,11 +772,7 @@ CRITICAL: Never miss a potential threat. When in doubt, flag for manual inspecti
     # Example section - since examples are already set in ChatInterface, this is for display only
     with gr.Row(elem_id="examples_row"):
         with gr.Column(scale=12, elem_id="examples_container"):
-            gr.Markdown("""
-### 🔍 X-RAY Security Scanning Examples
-Click any example below to load a pre-configured security scan scenario.
-Each example demonstrates different threat detection capabilities of the BEAM system.
-            """)
 if __name__ == "__main__":

 import os
 import re
 import tempfile
+import gc  # garbage collector
 from collections.abc import Iterator
 from threading import Thread
 import json
 from PIL import Image
 from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer
+# CSV/TXT analysis
 import pandas as pd
+# PDF text extraction
 import PyPDF2
 ##############################################################################
+# Memory cleanup function
 ##############################################################################
 def clear_cuda_cache():
+    """Clear CUDA cache explicitly."""
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
         gc.collect()
 SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")
 ##############################################################################
+# Simple keyword extraction function
 ##############################################################################
 def extract_keywords(text: str, top_k: int = 5) -> str:
     """
+    Extract keywords from text
     """
     text = re.sub(r"[^a-zA-Z0-9가-힣\s]", "", text)
     tokens = text.split()
     return " ".join(key_tokens)
 ##############################################################################
+# SerpHouse Live endpoint call
 ##############################################################################
 def do_web_search(query: str) -> str:
     """
+    Return top 20 'organic' results as JSON string
     """
     try:
         url = "https://api.serphouse.com/serp/live"
         params = {
             "q": query,
             "domain": "google.com",
+            "serp_type": "web",  # Basic web search
             "device": "desktop",
             "lang": "en",
+            "num": "20"  # Request max 20 results
         }
         headers = {
             "Authorization": f"Bearer {SERPHOUSE_API_KEY}"
         }
+        logger.info(f"SerpHouse API call... query: {query}")
+        logger.info(f"Request URL: {url} - params: {params}")
+        # GET request
         response = requests.get(url, headers=headers, params=params, timeout=60)
         response.raise_for_status()
+        logger.info(f"SerpHouse API response status: {response.status_code}")
         data = response.json()
+        # Handle various response structures
         results = data.get("results", {})
         organic = None
+        # Possible response structure 1
         if isinstance(results, dict) and "organic" in results:
             organic = results["organic"]
+        # Possible response structure 2 (nested results)
         elif isinstance(results, dict) and "results" in results:
             if isinstance(results["results"], dict) and "organic" in results["results"]:
                 organic = results["results"]["organic"]
+        # Possible response structure 3 (top-level organic)
         elif "organic" in data:
             organic = data["organic"]
         if not organic:
+            logger.warning("No organic results found in response.")
+            logger.debug(f"Response structure: {list(data.keys())}")
             if isinstance(results, dict):
+                logger.debug(f"results structure: {list(results.keys())}")
             return "No web search results found or unexpected API response structure."
+        # Limit results and optimize context length
         max_results = min(20, len(organic))
         limited_organic = organic[:max_results]
+        # Format results for better readability
         summary_lines = []
         for idx, item in enumerate(limited_organic, start=1):
             title = item.get("title", "No title")
             snippet = item.get("snippet", "No description")
             displayed_link = item.get("displayed_link", link)
+            # Markdown format
             summary_lines.append(
                 f"### Result {idx}: {title}\n\n"
                 f"{snippet}\n\n"
+                f"**Source**: [{displayed_link}]({link})\n\n"
                 f"---\n"
             )
+        # Add simple instructions for model
         instructions = """
 # X-RAY Security Scanning Reference Results
+Use this information to enhance your analysis.
 """
         search_results = instructions + "\n".join(summary_lines)
+        logger.info(f"Processed {len(limited_organic)} search results")
         return search_results
     except Exception as e:
 ##############################################################################
+# Model/Processor loading
 ##############################################################################
 MAX_CONTENT_CHARS = 2000
+MAX_INPUT_LENGTH = 2096  # Max input token limit
 model_id = os.getenv("MODEL_ID", "VIDraft/Gemma-3-R1984-4B")
 processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
     model_id,
     device_map="auto",
     torch_dtype=torch.bfloat16,
+    attn_implementation="eager"  # Change to "flash_attention_2" if available
 )
 MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
 ##############################################################################
+# CSV, TXT, PDF analysis functions
 ##############################################################################
 def analyze_csv_file(path: str) -> str:
     """
+    Convert CSV file to string. Truncate if too long.
     """
     try:
         df = pd.read_csv(path)
 def analyze_txt_file(path: str) -> str:
     """
+    Read TXT file. Truncate if too long.
     """
     try:
         with open(path, "r", encoding="utf-8") as f:
 def pdf_to_markdown(pdf_path: str) -> str:
     """
+    Convert PDF text to Markdown. Extract text by pages.
     """
     text_chunks = []
     try:
 ##############################################################################
+# Image/Video upload limit check
 ##############################################################################
 def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
     image_count = 0
 ##############################################################################
+# Video processing - with temp file tracking
 ##############################################################################
 def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
     vidcap = cv2.VideoCapture(video_path)
         success, image = vidcap.read()
         if success:
             image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+            # Resize image
             image = cv2.resize(image, (0, 0), fx=0.5, fy=0.5)
             pil_image = Image.fromarray(image)
             timestamp = round(i / fps, 2)
 def process_video(video_path: str) -> tuple[list[dict], list[str]]:
     content = []
+    temp_files = []  # List for tracking temp files
     frames = downsample_video(video_path)
     for frame in frames:
         pil_image, timestamp = frame
         with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
             pil_image.save(temp_file.name)
+            temp_files.append(temp_file.name)  # Track for deletion later
             content.append({"type": "text", "text": f"Frame {timestamp}:"})
             content.append({"type": "image", "url": temp_file.name})
 ##############################################################################
+# interleaved <image> processing
 ##############################################################################
 def process_interleaved_images(message: dict) -> list[dict]:
     parts = re.split(r"(<image>)", message["text"])
 ##############################################################################
+# PDF + CSV + TXT + Image/Video
 ##############################################################################
 def is_image_file(file_path: str) -> bool:
     return bool(re.search(r"\.(png|jpg|jpeg|gif|webp)$", file_path, re.IGNORECASE))
 def process_new_user_message(message: dict) -> tuple[list[dict], list[str]]:
+    temp_files = []  # List for tracking temp files
     if not message["files"]:
         return [{"type": "text", "text": message["text"]}], temp_files
 ##############################################################################
+# history -> LLM message conversion
 ##############################################################################
 def process_history(history: list[dict]) -> list[dict]:
     messages = []
 ##############################################################################
+# Model generation function with OOM catch
 ##############################################################################
 def _model_gen_with_oom_catch(**kwargs):
     """
+    Catch OutOfMemoryError in separate thread
     """
     try:
         model.generate(**kwargs)
     except torch.cuda.OutOfMemoryError:
         raise RuntimeError(
+            "[OutOfMemoryError] GPU memory insufficient. "
+            "Please reduce Max New Tokens or prompt length."
         )
     finally:
+        # Clear cache after generation
         clear_cuda_cache()
 ##############################################################################
+# Main inference function (with auto web search)
 ##############################################################################
 @spaces.GPU(duration=120)
 def run(
         yield ""
         return
+    temp_files = []  # For tracking temp files
     try:
         combined_system_msg = ""
+        # Used internally only (hidden from UI)
         if system_prompt.strip():
             combined_system_msg += f"[System Prompt]\n{system_prompt.strip()}\n\n"
                 logger.info(f"[Auto WebSearch Keyword] {ws_query!r}")
                 ws_result = do_web_search(ws_query)
                 combined_system_msg += f"[X-RAY Security Reference Data]\n{ws_result}\n\n"
             else:
                 combined_system_msg += "[No valid keywords found, skipping WebSearch]\n\n"
         messages.extend(process_history(history))
         user_content, user_temp_files = process_new_user_message(message)
+        temp_files.extend(user_temp_files)  # Track temp files
         for item in user_content:
             if item["type"] == "text" and len(item["text"]) > MAX_CONTENT_CHARS:
             return_tensors="pt",
         ).to(device=model.device, dtype=torch.bfloat16)
+        # Limit input token count
         if inputs.input_ids.shape[1] > MAX_INPUT_LENGTH:
             inputs.input_ids = inputs.input_ids[:, -MAX_INPUT_LENGTH:]
             if 'attention_mask' in inputs:
     except Exception as e:
         logger.error(f"Error in run: {str(e)}")
+        yield f"Error occurred: {str(e)}"
     finally:
+        # Delete temp files
         for temp_file in temp_files:
             try:
                 if os.path.exists(temp_file):
             except Exception as e:
                 logger.warning(f"Failed to delete temp file {temp_file}: {e}")
+        # Explicit memory cleanup
         try:
             del inputs, streamer
         except:
 ##############################################################################
+# X-RAY security scanning examples
 ##############################################################################
 examples = [
     [
 ]
 ##############################################################################
+# Gradio UI (Blocks) 구성
 ##############################################################################
 css = """
 .gradio-container {
+    background: white;
     padding: 30px 40px;
     margin: 20px auto;
     width: 100% !important;
     max-width: none !important;
 }
 .fillable {
     width: 100% !important;
     max-width: 100% !important;
 }
 body {
+    background: white;
     margin: 0;
     padding: 0;
+    font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
+    color: #333;
 }
 button, .btn {
+    background: transparent !important;
+    border: 1px solid #ddd;
+    color: #333;
     padding: 12px 24px;
     text-transform: uppercase;
     font-weight: bold;
     letter-spacing: 1px;
     cursor: pointer;
 }
 button:hover, .btn:hover {
+    background: rgba(0, 0, 0, 0.05) !important;
 }
 h1, h2, h3 {
+    color: #333;
 }
 .multimodal-textbox, textarea, input {
+    background: rgba(255, 255, 255, 0.5) !important;
+    border: 1px solid #ddd;
+    color: #333;
 }
 .chatbox, .chatbot, .message {
+    background: transparent !important;
 }
 #examples_container, .examples-container {
     margin: auto;
     width: 90%;
+    background: transparent !important;
 }
 """
 title_html = """
+<h1 align="center" style="margin-bottom: 0.2em; font-size: 1.6em;">Gemma-3-R1984-4B-BEAM</h1>
 """
     # Display the web search option (while the system prompt and token slider remain hidden)
     web_search_checkbox = gr.Checkbox(
+        label="Deep Research",
         value=False
     )
+    # X-RAY security scanning system prompt
     system_prompt_box = gr.Textbox(
         lines=3,
         value="""You are an advanced X-RAY security scanning AI specialized in threat detection and aviation security. Your primary mission is to identify ALL potential security threats in X-RAY images with extreme precision.
         minimum=100,
         maximum=8000,
         step=50,
+        value=1000,
         visible=False  # hidden from view
     )
     web_search_text = gr.Textbox(
         lines=1,
+        label="Web Search Query",
+        placeholder="",
         visible=False  # hidden from view
     )
                 ".mp4", ".csv", ".txt", ".pdf"
             ],
             file_count="multiple",
+            autofocus=True
         ),
         multimodal=True,
         additional_inputs=[
             web_search_text,
         ],
         stop_btn=False,
+        title='<a href="https://discord.gg/openfreeai" target="_blank">https://discord.gg/openfreeai</a>',
         examples=examples,
         run_examples_on_click=False,
         cache_examples=False,
     # Example section - since examples are already set in ChatInterface, this is for display only
     with gr.Row(elem_id="examples_row"):
         with gr.Column(scale=12, elem_id="examples_container"):
+            pass
 if __name__ == "__main__":