Robo-Beam

Running on Zero

File size: 31,548 Bytes

d48cdf4
 
 
 
 
9dddfec
d48cdf4
 
0ea9032
 
d48cdf4
 
 
 
 
 
 
507fad1
 
8fa1eef
507fad1
c6c9a50
 
9dddfec
 
 
 
 
 
 
 
 
5510c43
0ea9032
5510c43
ac92569
5510c43
1e394d0
a12c96b
1e394d0
 
a12c96b
6977531
a12c96b
 
 
 
1e394d0
 
 
 
5510c43
d07df81
0ea9032
5510c43
 
 
0ea9032
 
5510c43
 
 
d07df81
444ad96
5510c43
 
 
d07df81
5510c43
444ad96
 
5510c43
d07df81
 
 
 
 
 
 
 
 
9aac221
d07df81
 
 
 
 
 
5510c43
d07df81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5510c43
d07df81
 
 
 
 
5510c43
444ad96
 
 
 
f7748ac
5510c43
444ad96
 
 
 
f7748ac
444ad96
f7748ac
444ad96
f7748ac
 
0d4c8dd
f7748ac
444ad96
d07df81
f7748ac
 
0d4c8dd
 
 
 
 
 
f7748ac
 
 
0d4c8dd
f7748ac
d07df81
5510c43
 
 
 
 
ac92569
0ea9032
ac92569
42893c3
 
7b54c59
6977531
d48cdf4
 
2b2c22c
 
 
9dddfec
d48cdf4
 
 
507fad1
ac92569
507fad1
ac92569
8fa1eef
ac92569
 
 
8fa1eef
 
5ff87dd
 
507fad1
8fa1eef
 
2b2c22c
8fa1eef
 
 
 
 
ac92569
 
 
8fa1eef
 
507fad1
8fa1eef
 
2b2c22c
8fa1eef
 
 
 
c6c9a50
ac92569
895b1a9
ac92569
507fad1
c6c9a50
 
 
5ff87dd
 
 
507fad1
 
 
5ff87dd
 
 
 
 
c6c9a50
 
 
507fad1
 
 
 
 
 
c6c9a50
ac92569
507fad1
ac92569
d48cdf4
 
 
 
 
 
5ff87dd
d48cdf4
 
 
 
 
 
 
 
 
 
5ff87dd
 
 
 
 
 
 
d48cdf4
 
 
 
8fa1eef
 
2b2c22c
8fa1eef
 
 
d48cdf4
 
 
77b4bdf
d48cdf4
 
 
 
 
 
 
 
 
 
 
 
 
5ff87dd
 
 
 
 
 
 
77b4bdf
d48cdf4
 
 
ac92569
9dddfec
ac92569
d48cdf4
 
 
 
ac92569
77f7fca
507fad1
d48cdf4
 
 
 
 
9dddfec
 
d48cdf4
 
 
5ff87dd
 
507fad1
d48cdf4
 
 
 
9dddfec
d48cdf4
9dddfec
 
d48cdf4
507fad1
 
d48cdf4
 
9dddfec
d48cdf4
 
9dddfec
 
d48cdf4
 
ac92569
507fad1
ac92569
d48cdf4
 
 
 
5ff87dd
 
 
d48cdf4
5ff87dd
 
d48cdf4
 
 
c6c9a50
 
 
d48cdf4
 
 
ac92569
507fad1
ac92569
5ff87dd
 
 
 
 
 
 
ac92569
 
 
 
 
 
5ff87dd
9dddfec
 
 
d48cdf4
9dddfec
d48cdf4
5ff87dd
 
8fa1eef
 
c6c9a50
77b4bdf
507fad1
77b4bdf
8fa1eef
 
 
77b4bdf
8fa1eef
 
 
d48cdf4
c6c9a50
507fad1
 
c6c9a50
77b4bdf
9dddfec
 
 
 
77b4bdf
5ff87dd
 
ac92569
1e394d0
9dddfec
c6c9a50
8fa1eef
 
77b4bdf
9dddfec
d48cdf4
5ff87dd
ac92569
507fad1
ac92569
d48cdf4
 
507fad1
d48cdf4
 
 
 
 
 
 
 
 
 
5ff87dd
 
 
 
 
 
1e394d0
5ff87dd
 
 
d48cdf4
 
 
9dddfec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac92569
0ea9032
ac92569
d48cdf4
5510c43
 
 
 
 
 
 
 
1e394d0
d48cdf4
 
 
 
9dddfec
 
5ff87dd
0ea9032
 
5a98a93
0ea9032
 
 
1e394d0
 
 
0ea9032
 
 
 
5ea1450
0d4c8dd
0e80363
0d4c8dd
 
 
 
 
0e80363
1e394d0
0ea9032
5510c43
5ff87dd
0ea9032
 
 
 
 
 
5ff87dd
0ea9032
9dddfec
 
 
5ff87dd
 
 
 
 
 
 
 
 
 
 
 
9dddfec
 
 
 
 
 
 
5ff87dd
 
0d4c8dd
5ff87dd
 
 
0ea9032
6977531
5ff87dd
 
 
 
 
 
ac92569
5ff87dd
 
0d4c8dd
9dddfec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6977531
 
0d4c8dd
ac92569
cdd2c63
ac92569
d48cdf4
 
 
cdd2c63
ecd7689
 
 
ac92569
f9f2d2e
 
14d290c
 
cdd2c63
14d290c
 
ac92569
f9f2d2e
 
cdd2c63
be95d26
f9f2d2e
ac92569
ec629d4
 
cdd2c63
ec629d4
 
ac92569
f9f2d2e
 
cdd2c63
be95d26
f9f2d2e
ac92569
f9f2d2e
 
cdd2c63
4ab243c
f9f2d2e
ac92569
f9f2d2e
 
cdd2c63
4ab243c
f9f2d2e
ac92569
f9f2d2e
 
cdd2c63
f9f2d2e
 
 
 
 
 
 
 
 
d48cdf4
 
cdd2c63
4ab243c
f9f2d2e
ac92569
f9f2d2e
 
cdd2c63
f9f2d2e
 
 
 
 
cdd2c63
f9f2d2e
 
 
 
 
cdd2c63
f9f2d2e
 
b54f448
d48cdf4
 
ac92569
5a98a93
ac92569
5510c43
0e3a388
5510c43
a65c126
5510c43
0e3a388
 
 
5510c43
5a98a93
0e3a388
 
5510c43
a65c126
0e3a388
f76e5e4
0e3a388
 
 
 
5510c43
f76e5e4
5510c43
f76e5e4
 
 
5510c43
 
 
 
 
 
 
f76e5e4
0e3a388
f76e5e4
 
 
0e3a388
 
f76e5e4
0e3a388
f76e5e4
0e3a388
f76e5e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a65c126
f76e5e4
a65c126
 
f76e5e4
a65c126
f76e5e4
a65c126
f76e5e4
a65c126
5510c43
f76e5e4
 
 
 
 
 
 
 
 
 
5510c43
b018faf
5510c43
6ae2f8c
5510c43
a714b7a
0d4c8dd
6d3b20d
5510c43
b018faf
5510c43
0d4c8dd
2a84822
 
5510c43
2a84822
 
 
 
 
d48cdf4
2a84822
 
 
216d108
2a84822
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5a98a93
2a84822
 
 
 
 
 
 
 
 
 
 
 
 
5510c43
a12c96b
5a98a93
cdd2c63
2a84822

#!/usr/bin/env python

import os
import re
import tempfile
import gc  # garbage collector 추가
from collections.abc import Iterator
from threading import Thread
import json
import requests
import cv2
import gradio as gr
import spaces
import torch
from loguru import logger
from PIL import Image
from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer

# CSV/TXT 분석
import pandas as pd
# PDF 텍스트 추출
import PyPDF2

##############################################################################
# 메모리 정리 함수 추가
##############################################################################
def clear_cuda_cache():
    """CUDA 캐시를 명시적으로 비웁니다."""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        gc.collect()

##############################################################################
# SERPHouse API key from environment variable
##############################################################################
SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")

##############################################################################
# 간단한 키워드 추출 함수 (한글 + 알파벳 + 숫자 + 공백 보존)
##############################################################################
def extract_keywords(text: str, top_k: int = 5) -> str:
    """
    1) 한글(가-힣), 영어(a-zA-Z), 숫자(0-9), 공백만 남김
    2) 공백 기준 토큰 분리
    3) 최대 top_k개만
    """
    text = re.sub(r"[^a-zA-Z0-9가-힣\s]", "", text)
    tokens = text.split()
    key_tokens = tokens[:top_k]
    return " ".join(key_tokens)

##############################################################################
# SerpHouse Live endpoint 호출
# - 상위 20개 결과 JSON을 LLM에 넘길 때 link, snippet 등 모두 포함
##############################################################################
def do_web_search(query: str) -> str:
    """
    상위 20개 'organic' 결과 item 전체(제목, link, snippet 등)를
    JSON 문자열 형태로 반환
    """
    try:
        url = "https://api.serphouse.com/serp/live"
        
        # 기본 GET 방식으로 파라미터 간소화하고 결과 수를 20개로 제한
        params = {
            "q": query,
            "domain": "google.com",
            "serp_type": "web",  # 기본 웹 검색
            "device": "desktop",
            "lang": "en",
            "num": "20"  # 최대 20개 결과만 요청
        }
        
        headers = {
            "Authorization": f"Bearer {SERPHOUSE_API_KEY}"
        }
        
        logger.info(f"SerpHouse API 호출 중... 검색어: {query}")
        logger.info(f"요청 URL: {url} - 파라미터: {params}")
        
        # GET 요청 수행
        response = requests.get(url, headers=headers, params=params, timeout=60)
        response.raise_for_status()
        
        logger.info(f"SerpHouse API 응답 상태 코드: {response.status_code}")
        data = response.json()
        
        # 다양한 응답 구조 처리
        results = data.get("results", {})
        organic = None
        
        # 가능한 응답 구조 1
        if isinstance(results, dict) and "organic" in results:
            organic = results["organic"]
        
        # 가능한 응답 구조 2 (중첩된 results)
        elif isinstance(results, dict) and "results" in results:
            if isinstance(results["results"], dict) and "organic" in results["results"]:
                organic = results["results"]["organic"]
        
        # 가능한 응답 구조 3 (최상위 organic)
        elif "organic" in data:
            organic = data["organic"]
            
        if not organic:
            logger.warning("응답에서 organic 결과를 찾을 수 없습니다.")
            logger.debug(f"응답 구조: {list(data.keys())}")
            if isinstance(results, dict):
                logger.debug(f"results 구조: {list(results.keys())}")
            return "No web search results found or unexpected API response structure."

        # 결과 수 제한 및 컨텍스트 길이 최적화
        max_results = min(20, len(organic))
        limited_organic = organic[:max_results]
        
        # 결과 형식 개선 - 마크다운 형식으로 출력하여 가독성 향상
        summary_lines = []
        for idx, item in enumerate(limited_organic, start=1):
            title = item.get("title", "No title")
            link = item.get("link", "#")
            snippet = item.get("snippet", "No description")
            displayed_link = item.get("displayed_link", link)
            
            # 마크다운 형식 (링크 클릭 가능)
            summary_lines.append(
                f"### Result {idx}: {title}\n\n"
                f"{snippet}\n\n"
                f"**출처**: [{displayed_link}]({link})\n\n"
                f"---\n"
            )
        
        # 모델에게 명확한 지침 추가
        instructions = """
# 웹 검색 결과
아래는 검색 결과입니다. 질문에 답변할 때 이 정보를 활용하세요:
1. 각 결과의 제목, 내용, 출처 링크를 참고하세요
2. 답변에 관련 정보의 출처를 명시적으로 인용하세요 (예: "X 출처에 따르면...")
3. 응답에 실제 출처 링크를 포함하세요
4. 여러 출처의 정보를 종합하여 답변하세요
"""
        
        search_results = instructions + "\n".join(summary_lines)
        logger.info(f"검색 결과 {len(limited_organic)}개 처리 완료")
        return search_results
    
    except Exception as e:
        logger.error(f"Web search failed: {e}")
        return f"Web search failed: {str(e)}"


##############################################################################
# 모델/프로세서 로딩
##############################################################################
MAX_CONTENT_CHARS = 2000
MAX_INPUT_LENGTH = 2096  # 최대 입력 토큰 수 제한 추가
model_id = os.getenv("MODEL_ID", "VIDraft/Gemma-3-R1984-12B")

processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
model = Gemma3ForConditionalGeneration.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    attn_implementation="eager"  # 가능하다면 "flash_attention_2"로 변경
)
MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))


##############################################################################
# CSV, TXT, PDF 분석 함수
##############################################################################
def analyze_csv_file(path: str) -> str:
    """
    CSV 파일을 전체 문자열로 변환. 너무 길 경우 일부만 표시.
    """
    try:
        df = pd.read_csv(path)
        if df.shape[0] > 50 or df.shape[1] > 10:
            df = df.iloc[:50, :10]
        df_str = df.to_string()
        if len(df_str) > MAX_CONTENT_CHARS:
            df_str = df_str[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
        return f"**[CSV File: {os.path.basename(path)}]**\n\n{df_str}"
    except Exception as e:
        return f"Failed to read CSV ({os.path.basename(path)}): {str(e)}"


def analyze_txt_file(path: str) -> str:
    """
    TXT 파일 전문 읽기. 너무 길면 일부만 표시.
    """
    try:
        with open(path, "r", encoding="utf-8") as f:
            text = f.read()
        if len(text) > MAX_CONTENT_CHARS:
            text = text[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
        return f"**[TXT File: {os.path.basename(path)}]**\n\n{text}"
    except Exception as e:
        return f"Failed to read TXT ({os.path.basename(path)}): {str(e)}"


def pdf_to_markdown(pdf_path: str) -> str:
    """
    PDF 텍스트를 Markdown으로 변환. 페이지별로 간단히 텍스트 추출.
    """
    text_chunks = []
    try:
        with open(pdf_path, "rb") as f:
            reader = PyPDF2.PdfReader(f)
            max_pages = min(5, len(reader.pages))
            for page_num in range(max_pages):
                page = reader.pages[page_num]
                page_text = page.extract_text() or ""
                page_text = page_text.strip()
                if page_text:
                    if len(page_text) > MAX_CONTENT_CHARS // max_pages:
                        page_text = page_text[:MAX_CONTENT_CHARS // max_pages] + "...(truncated)"
                    text_chunks.append(f"## Page {page_num+1}\n\n{page_text}\n")
            if len(reader.pages) > max_pages:
                text_chunks.append(f"\n...(Showing {max_pages} of {len(reader.pages)} pages)...")
    except Exception as e:
        return f"Failed to read PDF ({os.path.basename(pdf_path)}): {str(e)}"

    full_text = "\n".join(text_chunks)
    if len(full_text) > MAX_CONTENT_CHARS:
        full_text = full_text[:MAX_CONTENT_CHARS] + "\n...(truncated)..."

    return f"**[PDF File: {os.path.basename(pdf_path)}]**\n\n{full_text}"


##############################################################################
# 이미지/비디오 업로드 제한 검사
##############################################################################
def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
    image_count = 0
    video_count = 0
    for path in paths:
        if path.endswith(".mp4"):
            video_count += 1
        elif re.search(r"\.(png|jpg|jpeg|gif|webp)$", path, re.IGNORECASE):
            image_count += 1
    return image_count, video_count


def count_files_in_history(history: list[dict]) -> tuple[int, int]:
    image_count = 0
    video_count = 0
    for item in history:
        if item["role"] != "user" or isinstance(item["content"], str):
            continue
        if isinstance(item["content"], list) and len(item["content"]) > 0:
            file_path = item["content"][0]
            if isinstance(file_path, str):
                if file_path.endswith(".mp4"):
                    video_count += 1
                elif re.search(r"\.(png|jpg|jpeg|gif|webp)$", file_path, re.IGNORECASE):
                    image_count += 1
    return image_count, video_count


def validate_media_constraints(message: dict, history: list[dict]) -> bool:
    media_files = []
    for f in message["files"]:
        if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE) or f.endswith(".mp4"):
            media_files.append(f)

    new_image_count, new_video_count = count_files_in_new_message(media_files)
    history_image_count, history_video_count = count_files_in_history(history)
    image_count = history_image_count + new_image_count
    video_count = history_video_count + new_video_count

    if video_count > 1:
        gr.Warning("Only one video is supported.")
        return False
    if video_count == 1:
        if image_count > 0:
            gr.Warning("Mixing images and videos is not allowed.")
            return False
        if "<image>" in message["text"]:
            gr.Warning("Using <image> tags with video files is not supported.")
            return False
    if video_count == 0 and image_count > MAX_NUM_IMAGES:
        gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
        return False
    
    if "<image>" in message["text"]:
        image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
        image_tag_count = message["text"].count("<image>")
        if image_tag_count != len(image_files):
            gr.Warning("The number of <image> tags in the text does not match the number of image files.")
            return False

    return True


##############################################################################
# 비디오 처리 - 임시 파일 추적 코드 추가
##############################################################################
def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
    vidcap = cv2.VideoCapture(video_path)
    fps = vidcap.get(cv2.CAP_PROP_FPS)
    total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_interval = max(int(fps), int(total_frames / 10))
    frames = []

    for i in range(0, total_frames, frame_interval):
        vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
        success, image = vidcap.read()
        if success:
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            # 이미지 크기 줄이기 추가
            image = cv2.resize(image, (0, 0), fx=0.5, fy=0.5)
            pil_image = Image.fromarray(image)
            timestamp = round(i / fps, 2)
            frames.append((pil_image, timestamp))
            if len(frames) >= 5:
                break

    vidcap.release()
    return frames


def process_video(video_path: str) -> tuple[list[dict], list[str]]:
    content = []
    temp_files = []  # 임시 파일 추적을 위한 리스트
    
    frames = downsample_video(video_path)
    for frame in frames:
        pil_image, timestamp = frame
        with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
            pil_image.save(temp_file.name)
            temp_files.append(temp_file.name)  # 추적을 위해 경로 저장
            content.append({"type": "text", "text": f"Frame {timestamp}:"})
            content.append({"type": "image", "url": temp_file.name})
    
    return content, temp_files


##############################################################################
# interleaved <image> 처리
##############################################################################
def process_interleaved_images(message: dict) -> list[dict]:
    parts = re.split(r"(<image>)", message["text"])
    content = []
    image_index = 0
    
    image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
    
    for part in parts:
        if part == "<image>" and image_index < len(image_files):
            content.append({"type": "image", "url": image_files[image_index]})
            image_index += 1
        elif part.strip():
            content.append({"type": "text", "text": part.strip()})
        else:
            if isinstance(part, str) and part != "<image>":
                content.append({"type": "text", "text": part})
    return content


##############################################################################
# PDF + CSV + TXT + 이미지/비디오
##############################################################################
def is_image_file(file_path: str) -> bool:
    return bool(re.search(r"\.(png|jpg|jpeg|gif|webp)$", file_path, re.IGNORECASE))

def is_video_file(file_path: str) -> bool:
    return file_path.endswith(".mp4")

def is_document_file(file_path: str) -> bool:
    return (
        file_path.lower().endswith(".pdf")
        or file_path.lower().endswith(".csv")
        or file_path.lower().endswith(".txt")
    )


def process_new_user_message(message: dict) -> tuple[list[dict], list[str]]:
    temp_files = []  # 임시 파일 추적용 리스트
    
    if not message["files"]:
        return [{"type": "text", "text": message["text"]}], temp_files

    video_files = [f for f in message["files"] if is_video_file(f)]
    image_files = [f for f in message["files"] if is_image_file(f)]
    csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
    txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
    pdf_files = [f for f in message["files"] if f.lower().endswith(".pdf")]

    content_list = [{"type": "text", "text": message["text"]}]

    for csv_path in csv_files:
        csv_analysis = analyze_csv_file(csv_path)
        content_list.append({"type": "text", "text": csv_analysis})

    for txt_path in txt_files:
        txt_analysis = analyze_txt_file(txt_path)
        content_list.append({"type": "text", "text": txt_analysis})

    for pdf_path in pdf_files:
        pdf_markdown = pdf_to_markdown(pdf_path)
        content_list.append({"type": "text", "text": pdf_markdown})

    if video_files:
        video_content, video_temp_files = process_video(video_files[0])
        content_list += video_content
        temp_files.extend(video_temp_files)
        return content_list, temp_files

    if "<image>" in message["text"] and image_files:
        interleaved_content = process_interleaved_images({"text": message["text"], "files": image_files})
        if content_list and content_list[0]["type"] == "text":
            content_list = content_list[1:]
        return interleaved_content + content_list, temp_files
    else:
        for img_path in image_files:
            content_list.append({"type": "image", "url": img_path})

    return content_list, temp_files


##############################################################################
# history -> LLM 메시지 변환
##############################################################################
def process_history(history: list[dict]) -> list[dict]:
    messages = []
    current_user_content: list[dict] = []
    for item in history:
        if item["role"] == "assistant":
            if current_user_content:
                messages.append({"role": "user", "content": current_user_content})
                current_user_content = []
            messages.append({"role": "assistant", "content": [{"type": "text", "text": item["content"]}]})
        else:
            content = item["content"]
            if isinstance(content, str):
                current_user_content.append({"type": "text", "text": content})
            elif isinstance(content, list) and len(content) > 0:
                file_path = content[0]
                if is_image_file(file_path):
                    current_user_content.append({"type": "image", "url": file_path})
                else:
                    current_user_content.append({"type": "text", "text": f"[File: {os.path.basename(file_path)}]"})

    if current_user_content:
        messages.append({"role": "user", "content": current_user_content})
        
    return messages


##############################################################################
# 모델 생성 함수에서 OOM 캐치
##############################################################################
def _model_gen_with_oom_catch(**kwargs):
    """
    별도 스레드에서 OutOfMemoryError를 잡아주기 위해
    """
    try:
        model.generate(**kwargs)
    except torch.cuda.OutOfMemoryError:
        raise RuntimeError(
            "[OutOfMemoryError] GPU 메모리가 부족합니다. "
            "Max New Tokens을 줄이거나, 프롬프트 길이를 줄여주세요."
        )
    finally:
        # 생성 완료 후 한번 더 캐시 비우기
        clear_cuda_cache()


##############################################################################
# 메인 추론 함수 (web search 체크 시 자동 키워드추출->검색->결과 system msg)
##############################################################################
@spaces.GPU(duration=120)
def run(
    message: dict,
    history: list[dict],
    system_prompt: str = "",
    max_new_tokens: int = 512,
    use_web_search: bool = False,
    web_search_query: str = "",
) -> Iterator[str]:

    if not validate_media_constraints(message, history):
        yield ""
        return

    temp_files = []  # 임시 파일 추적용
    
    try:
        combined_system_msg = ""

        # 내부적으로만 사용 (UI에서는 보이지 않음)
        if system_prompt.strip():
            combined_system_msg += f"[System Prompt]\n{system_prompt.strip()}\n\n"

        if use_web_search:
            user_text = message["text"]
            ws_query = extract_keywords(user_text, top_k=5)
            if ws_query.strip():
                logger.info(f"[Auto WebSearch Keyword] {ws_query!r}")
                ws_result = do_web_search(ws_query)
                combined_system_msg += f"[Search top-20 Full Items Based on user prompt]\n{ws_result}\n\n"
                # >>> 추가된 안내 문구 (검색 결과의 link 등 출처를 활용)
                combined_system_msg += "[참고: 위 검색결과 내용과 link를 출처로 인용하여 답변해 주세요.]\n\n"
                combined_system_msg += """
[중요 지시사항]
1. 답변에 검색 결과에서 찾은 정보의 출처를 반드시 인용하세요.
2. 출처 인용 시 "[출처 제목](링크)" 형식의 마크다운 링크를 사용하세요.
3. 여러 출처의 정보를 종합하여 답변하세요.
4. 답변 마지막에 "참고 자료:" 섹션을 추가하고 사용한 주요 출처 링크를 나열하세요.
"""
            else:
                combined_system_msg += "[No valid keywords found, skipping WebSearch]\n\n"

        messages = []
        if combined_system_msg.strip():
            messages.append({
                "role": "system",
                "content": [{"type": "text", "text": combined_system_msg.strip()}],
            })

        messages.extend(process_history(history))

        user_content, user_temp_files = process_new_user_message(message)
        temp_files.extend(user_temp_files)  # 임시 파일 추적
        
        for item in user_content:
            if item["type"] == "text" and len(item["text"]) > MAX_CONTENT_CHARS:
                item["text"] = item["text"][:MAX_CONTENT_CHARS] + "\n...(truncated)..."
        messages.append({"role": "user", "content": user_content})

        inputs = processor.apply_chat_template(
            messages,
            add_generation_prompt=True,
            tokenize=True,
            return_dict=True,
            return_tensors="pt",
        ).to(device=model.device, dtype=torch.bfloat16)
        
        # 입력 토큰 수 제한 추가
        if inputs.input_ids.shape[1] > MAX_INPUT_LENGTH:
            inputs.input_ids = inputs.input_ids[:, -MAX_INPUT_LENGTH:]
            if 'attention_mask' in inputs:
                inputs.attention_mask = inputs.attention_mask[:, -MAX_INPUT_LENGTH:]
        
        streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
        gen_kwargs = dict(
            inputs,
            streamer=streamer,
            max_new_tokens=max_new_tokens,
        )

        t = Thread(target=_model_gen_with_oom_catch, kwargs=gen_kwargs)
        t.start()

        output = ""
        for new_text in streamer:
            output += new_text
            yield output

    except Exception as e:
        logger.error(f"Error in run: {str(e)}")
        yield f"죄송합니다. 오류가 발생했습니다: {str(e)}"
    
    finally:
        # 임시 파일 삭제
        for temp_file in temp_files:
            try:
                if os.path.exists(temp_file):
                    os.unlink(temp_file)
                    logger.info(f"Deleted temp file: {temp_file}")
            except Exception as e:
                logger.warning(f"Failed to delete temp file {temp_file}: {e}")
        
        # 명시적 메모리 정리
        try:
            del inputs, streamer
        except:
            pass
        
        clear_cuda_cache()



##############################################################################
# 예시들 (모두 영어로)
##############################################################################
examples = [
    [
        {
            "text": "Compare the contents of the two PDF files.",
            "files": [
                "assets/additional-examples/before.pdf",
                "assets/additional-examples/after.pdf",
            ],
        }
    ],
    [
        {
            "text": "Summarize and analyze the contents of the CSV file.",
            "files": ["assets/additional-examples/sample-csv.csv"],
        }
    ],
    [
        {
            "text": "Assume the role of a friendly and understanding girlfriend. Describe this video.",
            "files": ["assets/additional-examples/tmp.mp4"],
        }
    ],
    [
        {
            "text": "Describe the cover and read the text on it.",
            "files": ["assets/additional-examples/maz.jpg"],
        }
    ],
    [
        {
            "text": "I already have this supplement <image> and I plan to buy this product <image>. Are there any precautions when taking them together?",
            "files": ["assets/additional-examples/pill1.png", "assets/additional-examples/pill2.png"],
        }
    ],
    [
        {
            "text": "Solve this integral.",
            "files": ["assets/additional-examples/4.png"],
        }
    ],
    [
        {
            "text": "When was this ticket issued, and what is its price?",
            "files": ["assets/additional-examples/2.png"],
        }
    ],
    [
        {
            "text": "Based on the sequence of these images, create a short story.",
            "files": [
                "assets/sample-images/09-1.png",
                "assets/sample-images/09-2.png",
                "assets/sample-images/09-3.png",
                "assets/sample-images/09-4.png",
                "assets/sample-images/09-5.png",
            ],
        }
    ],
    [
        {
            "text": "Write Python code using matplotlib to plot a bar chart that matches this image.",
            "files": ["assets/additional-examples/barchart.png"],
        }
    ],
    [
        {
            "text": "Read the text in the image and write it out in Markdown format.",
            "files": ["assets/additional-examples/3.png"],
        }
    ],
    [
        {
            "text": "What does this sign say?",
            "files": ["assets/sample-images/02.png"],
        }
    ],
    [
        {
            "text": "Compare the two images and describe their similarities and differences.",
            "files": ["assets/sample-images/03.png"],
        }
    ], 
]

##############################################################################
# Gradio UI (Blocks) 구성 (좌측 사이드 메뉴 없이 전체화면 채팅)
##############################################################################
css = """
/* 1) UI를 처음부터 가장 넓게 (width 100%) 고정하여 표시 */
.gradio-container {
    background: rgba(255, 255, 255, 0.7); /* 배경 투명도 증가 */
    padding: 30px 40px;
    margin: 20px auto;  /* 위아래 여백만 유지 */
    width: 100% !important;
    max-width: none !important; /* 1200px 제한 제거 */
}
.fillable {
    width: 100% !important; 
    max-width: 100% !important; 
}
/* 2) 배경을 완전히 투명하게 변경 */
body {
    background: transparent; /* 완전 투명 배경 */
    margin: 0;
    padding: 0;
    font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
    color: #333;
}
/* 버튼 색상 완전히 제거하고 투명하게 */
button, .btn {
    background: transparent !important; /* 색상 완전히 제거 */
    border: 1px solid #ddd; /* 경계선만 살짝 추가 */
    color: #333;
    padding: 12px 24px;
    text-transform: uppercase;
    font-weight: bold;
    letter-spacing: 1px;
    cursor: pointer;
}
button:hover, .btn:hover {
    background: rgba(0, 0, 0, 0.05) !important; /* 호버 시 아주 살짝 어둡게만 */
}

/* examples 관련 모든 색상 제거 */
#examples_container, .examples-container {
    margin: auto;
    width: 90%;
    background: transparent !important;
}
#examples_row, .examples-row {
    justify-content: center;
    background: transparent !important;
}

/* examples 버튼 내부의 모든 색상 제거 */
.gr-samples-table button,
.gr-samples-table .gr-button,
.gr-samples-table .gr-sample-btn,
.gr-examples button,
.gr-examples .gr-button,
.gr-examples .gr-sample-btn,
.examples button,
.examples .gr-button,
.examples .gr-sample-btn {
    background: transparent !important;
    border: 1px solid #ddd;
    color: #333;
}

/* examples 버튼 호버 시에도 색상 없게 */
.gr-samples-table button:hover,
.gr-samples-table .gr-button:hover,
.gr-samples-table .gr-sample-btn:hover,
.gr-examples button:hover,
.gr-examples .gr-button:hover,
.gr-examples .gr-sample-btn:hover,
.examples button:hover,
.examples .gr-button:hover,
.examples .gr-sample-btn:hover {
    background: rgba(0, 0, 0, 0.05) !important;
}

/* 채팅 인터페이스 요소들도 투명하게 */
.chatbox, .chatbot, .message {
    background: transparent !important;
}

/* 입력창 투명도 조정 */
.multimodal-textbox, textarea, input {
    background: rgba(255, 255, 255, 0.5) !important;
}

/* 모든 컨테이너 요소에 배경색 제거 */
.container, .wrap, .box, .panel, .gr-panel {
    background: transparent !important;
}

/* 예제 섹션의 모든 요소에서 배경색 제거 */
.gr-examples-container, .gr-examples, .gr-sample, .gr-sample-row, .gr-sample-cell {
    background: transparent !important;
}
"""

title_html = """
<h1 align="center" style="margin-bottom: 0.2em; font-size: 1.6em;"> 🤗 Gemma3-R1984-27B </h1>
<p align="center" style="font-size:1.1em; color:#555;">
    ✅Agentic AI Platform ✅Reasoning & Uncensored ✅Multimodal & VLM ✅Deep-Research & RAG <br>
    Operates on an ✅'NVIDIA A100 GPU' as an independent local server, enhancing security and preventing information leakage.<br>
    @Model Rpository: VIDraft/Gemma-3-R1984-27B, @Based by 'Google Gemma-3-27b', @Powered by 'MOUSE-II'(VIDRAFT)
</p>
"""


with gr.Blocks(css=css, title="Gemma3-R1984-27B") as demo:
    gr.Markdown(title_html)

    # Display the web search option (while the system prompt and token slider remain hidden)
    web_search_checkbox = gr.Checkbox(
        label="Deep Research",
        value=False
    )

    # Used internally but not visible to the user
    system_prompt_box = gr.Textbox(
        lines=3,
        value="You are a deep thinking AI that may use extremely long chains of thought to thoroughly analyze the problem and deliberate using systematic reasoning processes to arrive at a correct solution before answering.",
        visible=False  # hidden from view
    )
    
    max_tokens_slider = gr.Slider(
        label="Max New Tokens",
        minimum=100,
        maximum=8000,
        step=50,
        value=1000,
        visible=False  # hidden from view
    )
    
    web_search_text = gr.Textbox(
        lines=1,
        label="(Unused) Web Search Query",
        placeholder="No direct input needed",
        visible=False  # hidden from view
    )
    
    # Configure the chat interface
    chat = gr.ChatInterface(
        fn=run,
        type="messages",
        chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
        textbox=gr.MultimodalTextbox(
            file_types=[
                ".webp", ".png", ".jpg", ".jpeg", ".gif",
                ".mp4", ".csv", ".txt", ".pdf"
            ],
            file_count="multiple",
            autofocus=True
        ),
        multimodal=True,
        additional_inputs=[
            system_prompt_box,
            max_tokens_slider,
            web_search_checkbox,
            web_search_text,
        ],
        stop_btn=False,
        title='<a href="https://discord.gg/openfreeai" target="_blank">https://discord.gg/openfreeai</a>',
        examples=examples,
        run_examples_on_click=False,
        cache_examples=False,
        css_paths=None,
        delete_cache=(1800, 1800),
    )

    # Example section - since examples are already set in ChatInterface, this is for display only
    with gr.Row(elem_id="examples_row"):
        with gr.Column(scale=12, elem_id="examples_container"):
            gr.Markdown("### Example Inputs (click to load)")


if __name__ == "__main__":
    # Run locally
    demo.launch()