openfree commited on
Commit
b9f6a1d
·
verified ·
1 Parent(s): 54917ae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -148
app.py CHANGED
@@ -3,7 +3,7 @@
3
  import os
4
  import re
5
  import tempfile
6
- import gc # garbage collector 추가
7
  from collections.abc import Iterator
8
  from threading import Thread
9
  import json
@@ -16,16 +16,16 @@ from loguru import logger
16
  from PIL import Image
17
  from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer
18
 
19
- # CSV/TXT 분석
20
  import pandas as pd
21
- # PDF 텍스트 추출
22
  import PyPDF2
23
 
24
  ##############################################################################
25
- # 메모리 정리 함수 추가
26
  ##############################################################################
27
  def clear_cuda_cache():
28
- """CUDA 캐시를 명시적으로 비웁니다."""
29
  if torch.cuda.is_available():
30
  torch.cuda.empty_cache()
31
  gc.collect()
@@ -36,13 +36,11 @@ def clear_cuda_cache():
36
  SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")
37
 
38
  ##############################################################################
39
- # 간단한 키워드 추출 함수 (한글 + 알파벳 + 숫자 + 공백 보존)
40
  ##############################################################################
41
  def extract_keywords(text: str, top_k: int = 5) -> str:
42
  """
43
- 1) 한글(가-힣), 영어(a-zA-Z), 숫자(0-9), 공백만 남김
44
- 2) 공백 기준 토큰 분리
45
- 3) 최대 top_k개만
46
  """
47
  text = re.sub(r"[^a-zA-Z0-9가-힣\s]", "", text)
48
  tokens = text.split()
@@ -50,13 +48,11 @@ def extract_keywords(text: str, top_k: int = 5) -> str:
50
  return " ".join(key_tokens)
51
 
52
  ##############################################################################
53
- # SerpHouse Live endpoint 호출
54
- # - 상위 20개 결과 JSON을 LLM에 넘길 때 link, snippet 등 모두 포함
55
  ##############################################################################
56
  def do_web_search(query: str) -> str:
57
  """
58
- 상위 20 'organic' 결과 item 전체(제목, link, snippet 등)를
59
- JSON 문자열 형태로 반환
60
  """
61
  try:
62
  url = "https://api.serphouse.com/serp/live"
@@ -65,55 +61,55 @@ def do_web_search(query: str) -> str:
65
  params = {
66
  "q": query,
67
  "domain": "google.com",
68
- "serp_type": "web", # 기본 검색
69
  "device": "desktop",
70
  "lang": "en",
71
- "num": "20" # 최대 20 결과만 요청
72
  }
73
 
74
  headers = {
75
  "Authorization": f"Bearer {SERPHOUSE_API_KEY}"
76
  }
77
 
78
- logger.info(f"SerpHouse API 호출 중... 검색어: {query}")
79
- logger.info(f"요청 URL: {url} - 파라미터: {params}")
80
 
81
- # GET 요청 수행
82
  response = requests.get(url, headers=headers, params=params, timeout=60)
83
  response.raise_for_status()
84
 
85
- logger.info(f"SerpHouse API 응답 상태 코드: {response.status_code}")
86
  data = response.json()
87
 
88
- # 다양한 응답 구조 처리
89
  results = data.get("results", {})
90
  organic = None
91
 
92
- # 가능한 응답 구조 1
93
  if isinstance(results, dict) and "organic" in results:
94
  organic = results["organic"]
95
 
96
- # 가능한 응답 구조 2 (중첩된 results)
97
  elif isinstance(results, dict) and "results" in results:
98
  if isinstance(results["results"], dict) and "organic" in results["results"]:
99
  organic = results["results"]["organic"]
100
 
101
- # 가능한 응답 구조 3 (최상위 organic)
102
  elif "organic" in data:
103
  organic = data["organic"]
104
 
105
  if not organic:
106
- logger.warning("응답에서 organic 결과를 찾을 없습니다.")
107
- logger.debug(f"응답 구조: {list(data.keys())}")
108
  if isinstance(results, dict):
109
- logger.debug(f"results 구조: {list(results.keys())}")
110
  return "No web search results found or unexpected API response structure."
111
 
112
- # 결과 제한 컨텍스트 길이 최적화
113
  max_results = min(20, len(organic))
114
  limited_organic = organic[:max_results]
115
 
116
- # 결과 형식 개선 - 마크다운 형식으로 출력하여 가독성 향상
117
  summary_lines = []
118
  for idx, item in enumerate(limited_organic, start=1):
119
  title = item.get("title", "No title")
@@ -121,26 +117,22 @@ def do_web_search(query: str) -> str:
121
  snippet = item.get("snippet", "No description")
122
  displayed_link = item.get("displayed_link", link)
123
 
124
- # 마크다운 형식 (링크 클릭 가능)
125
  summary_lines.append(
126
  f"### Result {idx}: {title}\n\n"
127
  f"{snippet}\n\n"
128
- f"**출처**: [{displayed_link}]({link})\n\n"
129
  f"---\n"
130
  )
131
 
132
- # 모델에게 명확한 지침 추가
133
  instructions = """
134
  # X-RAY Security Scanning Reference Results
135
- Below are search results about X-RAY security scanning and threat detection. Use this information to enhance your analysis:
136
- 1. Reference security protocols and standards from the results
137
- 2. Compare findings with known threat patterns
138
- 3. Cite relevant security guidelines when applicable
139
- 4. Use multiple sources to verify threat assessments
140
  """
141
 
142
  search_results = instructions + "\n".join(summary_lines)
143
- logger.info(f"검색 결과 {len(limited_organic)} 처리 완료")
144
  return search_results
145
 
146
  except Exception as e:
@@ -149,10 +141,10 @@ Below are search results about X-RAY security scanning and threat detection. Use
149
 
150
 
151
  ##############################################################################
152
- # 모델/프로세서 로딩
153
  ##############################################################################
154
  MAX_CONTENT_CHARS = 2000
155
- MAX_INPUT_LENGTH = 2096 # 최대 입력 토큰 수 제한 추가
156
  model_id = os.getenv("MODEL_ID", "VIDraft/Gemma-3-R1984-4B")
157
 
158
  processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
@@ -160,17 +152,17 @@ model = Gemma3ForConditionalGeneration.from_pretrained(
160
  model_id,
161
  device_map="auto",
162
  torch_dtype=torch.bfloat16,
163
- attn_implementation="eager" # 가능하다면 "flash_attention_2" 변경
164
  )
165
  MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
166
 
167
 
168
  ##############################################################################
169
- # CSV, TXT, PDF 분석 함수
170
  ##############################################################################
171
  def analyze_csv_file(path: str) -> str:
172
  """
173
- CSV 파일을 전체 문자열로 변환. 너무 경우 일부만 표시.
174
  """
175
  try:
176
  df = pd.read_csv(path)
@@ -186,7 +178,7 @@ def analyze_csv_file(path: str) -> str:
186
 
187
  def analyze_txt_file(path: str) -> str:
188
  """
189
- TXT 파일 전문 읽기. 너무 길면 일부만 표시.
190
  """
191
  try:
192
  with open(path, "r", encoding="utf-8") as f:
@@ -200,7 +192,7 @@ def analyze_txt_file(path: str) -> str:
200
 
201
  def pdf_to_markdown(pdf_path: str) -> str:
202
  """
203
- PDF 텍스트를 Markdown으로 변환. 페이지별로 간단히 텍스트 추출.
204
  """
205
  text_chunks = []
206
  try:
@@ -228,7 +220,7 @@ def pdf_to_markdown(pdf_path: str) -> str:
228
 
229
 
230
  ##############################################################################
231
- # 이미지/비디오 업로드 제한 검사
232
  ##############################################################################
233
  def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
234
  image_count = 0
@@ -293,7 +285,7 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
293
 
294
 
295
  ##############################################################################
296
- # 비디오 처리 - 임시 파일 추적 코드 추가
297
  ##############################################################################
298
  def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
299
  vidcap = cv2.VideoCapture(video_path)
@@ -307,7 +299,7 @@ def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
307
  success, image = vidcap.read()
308
  if success:
309
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
310
- # 이미지 크기 줄이기 추가
311
  image = cv2.resize(image, (0, 0), fx=0.5, fy=0.5)
312
  pil_image = Image.fromarray(image)
313
  timestamp = round(i / fps, 2)
@@ -321,14 +313,14 @@ def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
321
 
322
  def process_video(video_path: str) -> tuple[list[dict], list[str]]:
323
  content = []
324
- temp_files = [] # 임시 파일 추적을 위한 리스트
325
 
326
  frames = downsample_video(video_path)
327
  for frame in frames:
328
  pil_image, timestamp = frame
329
  with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
330
  pil_image.save(temp_file.name)
331
- temp_files.append(temp_file.name) # 추적을 위해 경로 저장
332
  content.append({"type": "text", "text": f"Frame {timestamp}:"})
333
  content.append({"type": "image", "url": temp_file.name})
334
 
@@ -336,7 +328,7 @@ def process_video(video_path: str) -> tuple[list[dict], list[str]]:
336
 
337
 
338
  ##############################################################################
339
- # interleaved <image> 처리
340
  ##############################################################################
341
  def process_interleaved_images(message: dict) -> list[dict]:
342
  parts = re.split(r"(<image>)", message["text"])
@@ -358,7 +350,7 @@ def process_interleaved_images(message: dict) -> list[dict]:
358
 
359
 
360
  ##############################################################################
361
- # PDF + CSV + TXT + 이미지/비디오
362
  ##############################################################################
363
  def is_image_file(file_path: str) -> bool:
364
  return bool(re.search(r"\.(png|jpg|jpeg|gif|webp)$", file_path, re.IGNORECASE))
@@ -375,7 +367,7 @@ def is_document_file(file_path: str) -> bool:
375
 
376
 
377
  def process_new_user_message(message: dict) -> tuple[list[dict], list[str]]:
378
- temp_files = [] # 임시 파일 추적용 리스트
379
 
380
  if not message["files"]:
381
  return [{"type": "text", "text": message["text"]}], temp_files
@@ -419,7 +411,7 @@ def process_new_user_message(message: dict) -> tuple[list[dict], list[str]]:
419
 
420
 
421
  ##############################################################################
422
- # history -> LLM 메시지 변환
423
  ##############################################################################
424
  def process_history(history: list[dict]) -> list[dict]:
425
  messages = []
@@ -448,26 +440,26 @@ def process_history(history: list[dict]) -> list[dict]:
448
 
449
 
450
  ##############################################################################
451
- # 모델 생성 함수에서 OOM 캐치
452
  ##############################################################################
453
  def _model_gen_with_oom_catch(**kwargs):
454
  """
455
- 별도 스레드에서 OutOfMemoryError 잡아주기 위해
456
  """
457
  try:
458
  model.generate(**kwargs)
459
  except torch.cuda.OutOfMemoryError:
460
  raise RuntimeError(
461
- "[OutOfMemoryError] GPU 메모리가 부족합니다. "
462
- "Max New Tokens 줄이거나, 프롬프트 길이를 줄여주세요."
463
  )
464
  finally:
465
- # 생성 완료 한번 더 캐시 비우기
466
  clear_cuda_cache()
467
 
468
 
469
  ##############################################################################
470
- # 메인 추론 함수 (web search 체크 시 자동 키워드추출->검색->결과 system msg)
471
  ##############################################################################
472
  @spaces.GPU(duration=120)
473
  def run(
@@ -483,12 +475,12 @@ def run(
483
  yield ""
484
  return
485
 
486
- temp_files = [] # 임시 파일 추적용
487
 
488
  try:
489
  combined_system_msg = ""
490
 
491
- # 내부적으로만 사용 (UI에서는 보이지 않음)
492
  if system_prompt.strip():
493
  combined_system_msg += f"[System Prompt]\n{system_prompt.strip()}\n\n"
494
 
@@ -499,14 +491,6 @@ def run(
499
  logger.info(f"[Auto WebSearch Keyword] {ws_query!r}")
500
  ws_result = do_web_search(ws_query)
501
  combined_system_msg += f"[X-RAY Security Reference Data]\n{ws_result}\n\n"
502
- combined_system_msg += """
503
- [IMPORTANT SECURITY ANALYSIS GUIDELINES]
504
- 1. Systematically scan and identify ALL potential threats in the X-RAY image
505
- 2. Reference security protocols and threat detection standards from search results
506
- 3. Use proper threat classification terminology
507
- 4. Provide threat severity levels (HIGH/MEDIUM/LOW)
508
- 5. Suggest appropriate security response actions
509
- """
510
  else:
511
  combined_system_msg += "[No valid keywords found, skipping WebSearch]\n\n"
512
 
@@ -520,7 +504,7 @@ def run(
520
  messages.extend(process_history(history))
521
 
522
  user_content, user_temp_files = process_new_user_message(message)
523
- temp_files.extend(user_temp_files) # 임시 파일 추적
524
 
525
  for item in user_content:
526
  if item["type"] == "text" and len(item["text"]) > MAX_CONTENT_CHARS:
@@ -535,7 +519,7 @@ def run(
535
  return_tensors="pt",
536
  ).to(device=model.device, dtype=torch.bfloat16)
537
 
538
- # 입력 토큰 제한 추가
539
  if inputs.input_ids.shape[1] > MAX_INPUT_LENGTH:
540
  inputs.input_ids = inputs.input_ids[:, -MAX_INPUT_LENGTH:]
541
  if 'attention_mask' in inputs:
@@ -558,10 +542,10 @@ def run(
558
 
559
  except Exception as e:
560
  logger.error(f"Error in run: {str(e)}")
561
- yield f"죄송합니다. 오류가 발생했습니다: {str(e)}"
562
 
563
  finally:
564
- # 임시 파일 삭제
565
  for temp_file in temp_files:
566
  try:
567
  if os.path.exists(temp_file):
@@ -570,7 +554,7 @@ def run(
570
  except Exception as e:
571
  logger.warning(f"Failed to delete temp file {temp_file}: {e}")
572
 
573
- # 명시적 메모리 정리
574
  try:
575
  del inputs, streamer
576
  except:
@@ -581,7 +565,7 @@ def run(
581
 
582
 
583
  ##############################################################################
584
- # X-RAY 보안 검사 전용 예시들
585
  ##############################################################################
586
  examples = [
587
  [
@@ -647,108 +631,64 @@ examples = [
647
  ]
648
 
649
  ##############################################################################
650
- # Gradio UI (Blocks) 구성 (X-RAY 보안 검사 테마)
651
  ##############################################################################
652
  css = """
653
- /* X-RAY Security Scanner Theme */
654
  .gradio-container {
655
- background: linear-gradient(135deg, rgba(0, 20, 40, 0.9), rgba(0, 40, 80, 0.9));
656
  padding: 30px 40px;
657
  margin: 20px auto;
658
  width: 100% !important;
659
  max-width: none !important;
660
- border: 2px solid #00ff00;
661
- box-shadow: 0 0 20px rgba(0, 255, 0, 0.3);
662
  }
663
  .fillable {
664
  width: 100% !important;
665
  max-width: 100% !important;
666
  }
667
  body {
668
- background: #000;
669
  margin: 0;
670
  padding: 0;
671
- font-family: 'Courier New', monospace;
672
- color: #00ff00;
673
  }
674
- /* Security-themed buttons */
675
  button, .btn {
676
- background: rgba(0, 255, 0, 0.1) !important;
677
- border: 1px solid #00ff00;
678
- color: #00ff00;
679
  padding: 12px 24px;
680
  text-transform: uppercase;
681
  font-weight: bold;
682
  letter-spacing: 1px;
683
  cursor: pointer;
684
- transition: all 0.3s;
685
  }
686
  button:hover, .btn:hover {
687
- background: rgba(0, 255, 0, 0.2) !important;
688
- box-shadow: 0 0 10px rgba(0, 255, 0, 0.5);
689
  }
690
 
691
- /* Alert-style headers */
692
  h1, h2, h3 {
693
- color: #00ff00;
694
- text-shadow: 0 0 10px rgba(0, 255, 0, 0.5);
695
  }
696
 
697
- /* Input fields with security theme */
698
  .multimodal-textbox, textarea, input {
699
- background: rgba(0, 40, 80, 0.8) !important;
700
- border: 1px solid #00ff00;
701
- color: #00ff00;
702
  }
703
 
704
- /* Chat interface security styling */
705
  .chatbox, .chatbot, .message {
706
- background: rgba(0, 20, 40, 0.8) !important;
707
- border: 1px solid #00ff00;
708
  }
709
 
710
- /* Example section styling */
711
  #examples_container, .examples-container {
712
  margin: auto;
713
  width: 90%;
714
- background: rgba(0, 40, 80, 0.5) !important;
715
- border: 1px solid #00ff00;
716
- padding: 20px;
717
- }
718
-
719
- /* Security alert animation */
720
- @keyframes security-pulse {
721
- 0% { box-shadow: 0 0 10px rgba(0, 255, 0, 0.5); }
722
- 50% { box-shadow: 0 0 20px rgba(0, 255, 0, 0.8); }
723
- 100% { box-shadow: 0 0 10px rgba(0, 255, 0, 0.5); }
724
- }
725
-
726
- .gradio-container {
727
- animation: security-pulse 2s infinite;
728
  }
729
-
730
- /* Threat level indicators */
731
- .threat-high { color: #ff0000; font-weight: bold; }
732
- .threat-medium { color: #ffaa00; font-weight: bold; }
733
- .threat-low { color: #00ff00; font-weight: bold; }
734
  """
735
 
736
  title_html = """
737
- <h1 align="center" style="margin-bottom: 0.2em; font-size: 1.6em; color: #00ff00; text-shadow: 0 0 20px #00ff00;">
738
- 🔍 Gemma-3-R1984-4B-BEAM 🔍
739
- </h1>
740
- <p align="center" style="font-size:1.1em; color:#00ff00; text-shadow: 0 0 10px #00ff00;">
741
- ⚡ X-RAY Security Threat Detection System ⚡<br>
742
- ✅ Real-time Weapon Detection ✅ Explosive Material Identification<br>
743
- ✅ Prohibited Item Classification ✅ Multi-threat Analysis<br>
744
- <span style="color: #ffaa00;">⚠️ Detects: Guns, Knives, Bombs, Batteries, Scissors, Springs, Liquids >100ml, EOD Components ⚠️</span><br>
745
- <span style="font-size: 0.9em;">Powered by Advanced AI Vision Model • Based on Google Gemma-3-4b • Enhanced by VIDRAFT</span>
746
- </p>
747
- <div align="center" style="margin: 10px 0; padding: 10px; border: 2px solid #ff0000; background: rgba(255, 0, 0, 0.1);">
748
- <p style="color: #ff0000; margin: 0; font-weight: bold;">
749
- 🚨 SECURITY ALERT: This system is for authorized security personnel only 🚨
750
- </p>
751
- </div>
752
  """
753
 
754
 
@@ -757,11 +697,11 @@ with gr.Blocks(css=css, title="Gemma-3-R1984-4B-BEAM - X-RAY Security Scanner")
757
 
758
  # Display the web search option (while the system prompt and token slider remain hidden)
759
  web_search_checkbox = gr.Checkbox(
760
- label="Enable Security Database Search (TSA/Aviation Security Standards)",
761
  value=False
762
  )
763
 
764
- # X-RAY 보안 검사 전용 시스템 프롬프트
765
  system_prompt_box = gr.Textbox(
766
  lines=3,
767
  value="""You are an advanced X-RAY security scanning AI specialized in threat detection and aviation security. Your primary mission is to identify ALL potential security threats in X-RAY images with extreme precision.
@@ -789,14 +729,14 @@ CRITICAL: Never miss a potential threat. When in doubt, flag for manual inspecti
789
  minimum=100,
790
  maximum=8000,
791
  step=50,
792
- value=1500, # Increased for detailed security analysis
793
  visible=False # hidden from view
794
  )
795
 
796
  web_search_text = gr.Textbox(
797
  lines=1,
798
- label="(Unused) Web Search Query",
799
- placeholder="No direct input needed",
800
  visible=False # hidden from view
801
  )
802
 
@@ -811,8 +751,7 @@ CRITICAL: Never miss a potential threat. When in doubt, flag for manual inspecti
811
  ".mp4", ".csv", ".txt", ".pdf"
812
  ],
813
  file_count="multiple",
814
- autofocus=True,
815
- placeholder="Upload X-RAY images for security analysis..."
816
  ),
817
  multimodal=True,
818
  additional_inputs=[
@@ -822,7 +761,7 @@ CRITICAL: Never miss a potential threat. When in doubt, flag for manual inspecti
822
  web_search_text,
823
  ],
824
  stop_btn=False,
825
- title='<a href="https://discord.gg/openfreeai" target="_blank" style="color: #00ff00;">Security Support: https://discord.gg/openfreeai</a>',
826
  examples=examples,
827
  run_examples_on_click=False,
828
  cache_examples=False,
@@ -833,11 +772,7 @@ CRITICAL: Never miss a potential threat. When in doubt, flag for manual inspecti
833
  # Example section - since examples are already set in ChatInterface, this is for display only
834
  with gr.Row(elem_id="examples_row"):
835
  with gr.Column(scale=12, elem_id="examples_container"):
836
- gr.Markdown("""
837
- ### 🔍 X-RAY Security Scanning Examples
838
- Click any example below to load a pre-configured security scan scenario.
839
- Each example demonstrates different threat detection capabilities of the BEAM system.
840
- """)
841
 
842
 
843
  if __name__ == "__main__":
 
3
  import os
4
  import re
5
  import tempfile
6
+ import gc # garbage collector
7
  from collections.abc import Iterator
8
  from threading import Thread
9
  import json
 
16
  from PIL import Image
17
  from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer
18
 
19
+ # CSV/TXT analysis
20
  import pandas as pd
21
+ # PDF text extraction
22
  import PyPDF2
23
 
24
  ##############################################################################
25
+ # Memory cleanup function
26
  ##############################################################################
27
  def clear_cuda_cache():
28
+ """Clear CUDA cache explicitly."""
29
  if torch.cuda.is_available():
30
  torch.cuda.empty_cache()
31
  gc.collect()
 
36
  SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")
37
 
38
  ##############################################################################
39
+ # Simple keyword extraction function
40
  ##############################################################################
41
  def extract_keywords(text: str, top_k: int = 5) -> str:
42
  """
43
+ Extract keywords from text
 
 
44
  """
45
  text = re.sub(r"[^a-zA-Z0-9가-힣\s]", "", text)
46
  tokens = text.split()
 
48
  return " ".join(key_tokens)
49
 
50
  ##############################################################################
51
+ # SerpHouse Live endpoint call
 
52
  ##############################################################################
53
  def do_web_search(query: str) -> str:
54
  """
55
+ Return top 20 'organic' results as JSON string
 
56
  """
57
  try:
58
  url = "https://api.serphouse.com/serp/live"
 
61
  params = {
62
  "q": query,
63
  "domain": "google.com",
64
+ "serp_type": "web", # Basic web search
65
  "device": "desktop",
66
  "lang": "en",
67
+ "num": "20" # Request max 20 results
68
  }
69
 
70
  headers = {
71
  "Authorization": f"Bearer {SERPHOUSE_API_KEY}"
72
  }
73
 
74
+ logger.info(f"SerpHouse API call... query: {query}")
75
+ logger.info(f"Request URL: {url} - params: {params}")
76
 
77
+ # GET request
78
  response = requests.get(url, headers=headers, params=params, timeout=60)
79
  response.raise_for_status()
80
 
81
+ logger.info(f"SerpHouse API response status: {response.status_code}")
82
  data = response.json()
83
 
84
+ # Handle various response structures
85
  results = data.get("results", {})
86
  organic = None
87
 
88
+ # Possible response structure 1
89
  if isinstance(results, dict) and "organic" in results:
90
  organic = results["organic"]
91
 
92
+ # Possible response structure 2 (nested results)
93
  elif isinstance(results, dict) and "results" in results:
94
  if isinstance(results["results"], dict) and "organic" in results["results"]:
95
  organic = results["results"]["organic"]
96
 
97
+ # Possible response structure 3 (top-level organic)
98
  elif "organic" in data:
99
  organic = data["organic"]
100
 
101
  if not organic:
102
+ logger.warning("No organic results found in response.")
103
+ logger.debug(f"Response structure: {list(data.keys())}")
104
  if isinstance(results, dict):
105
+ logger.debug(f"results structure: {list(results.keys())}")
106
  return "No web search results found or unexpected API response structure."
107
 
108
+ # Limit results and optimize context length
109
  max_results = min(20, len(organic))
110
  limited_organic = organic[:max_results]
111
 
112
+ # Format results for better readability
113
  summary_lines = []
114
  for idx, item in enumerate(limited_organic, start=1):
115
  title = item.get("title", "No title")
 
117
  snippet = item.get("snippet", "No description")
118
  displayed_link = item.get("displayed_link", link)
119
 
120
+ # Markdown format
121
  summary_lines.append(
122
  f"### Result {idx}: {title}\n\n"
123
  f"{snippet}\n\n"
124
+ f"**Source**: [{displayed_link}]({link})\n\n"
125
  f"---\n"
126
  )
127
 
128
+ # Add simple instructions for model
129
  instructions = """
130
  # X-RAY Security Scanning Reference Results
131
+ Use this information to enhance your analysis.
 
 
 
 
132
  """
133
 
134
  search_results = instructions + "\n".join(summary_lines)
135
+ logger.info(f"Processed {len(limited_organic)} search results")
136
  return search_results
137
 
138
  except Exception as e:
 
141
 
142
 
143
  ##############################################################################
144
+ # Model/Processor loading
145
  ##############################################################################
146
  MAX_CONTENT_CHARS = 2000
147
+ MAX_INPUT_LENGTH = 2096 # Max input token limit
148
  model_id = os.getenv("MODEL_ID", "VIDraft/Gemma-3-R1984-4B")
149
 
150
  processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
 
152
  model_id,
153
  device_map="auto",
154
  torch_dtype=torch.bfloat16,
155
+ attn_implementation="eager" # Change to "flash_attention_2" if available
156
  )
157
  MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
158
 
159
 
160
  ##############################################################################
161
+ # CSV, TXT, PDF analysis functions
162
  ##############################################################################
163
  def analyze_csv_file(path: str) -> str:
164
  """
165
+ Convert CSV file to string. Truncate if too long.
166
  """
167
  try:
168
  df = pd.read_csv(path)
 
178
 
179
  def analyze_txt_file(path: str) -> str:
180
  """
181
+ Read TXT file. Truncate if too long.
182
  """
183
  try:
184
  with open(path, "r", encoding="utf-8") as f:
 
192
 
193
  def pdf_to_markdown(pdf_path: str) -> str:
194
  """
195
+ Convert PDF text to Markdown. Extract text by pages.
196
  """
197
  text_chunks = []
198
  try:
 
220
 
221
 
222
  ##############################################################################
223
+ # Image/Video upload limit check
224
  ##############################################################################
225
  def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
226
  image_count = 0
 
285
 
286
 
287
  ##############################################################################
288
+ # Video processing - with temp file tracking
289
  ##############################################################################
290
  def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
291
  vidcap = cv2.VideoCapture(video_path)
 
299
  success, image = vidcap.read()
300
  if success:
301
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
302
+ # Resize image
303
  image = cv2.resize(image, (0, 0), fx=0.5, fy=0.5)
304
  pil_image = Image.fromarray(image)
305
  timestamp = round(i / fps, 2)
 
313
 
314
  def process_video(video_path: str) -> tuple[list[dict], list[str]]:
315
  content = []
316
+ temp_files = [] # List for tracking temp files
317
 
318
  frames = downsample_video(video_path)
319
  for frame in frames:
320
  pil_image, timestamp = frame
321
  with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
322
  pil_image.save(temp_file.name)
323
+ temp_files.append(temp_file.name) # Track for deletion later
324
  content.append({"type": "text", "text": f"Frame {timestamp}:"})
325
  content.append({"type": "image", "url": temp_file.name})
326
 
 
328
 
329
 
330
  ##############################################################################
331
+ # interleaved <image> processing
332
  ##############################################################################
333
  def process_interleaved_images(message: dict) -> list[dict]:
334
  parts = re.split(r"(<image>)", message["text"])
 
350
 
351
 
352
  ##############################################################################
353
+ # PDF + CSV + TXT + Image/Video
354
  ##############################################################################
355
  def is_image_file(file_path: str) -> bool:
356
  return bool(re.search(r"\.(png|jpg|jpeg|gif|webp)$", file_path, re.IGNORECASE))
 
367
 
368
 
369
  def process_new_user_message(message: dict) -> tuple[list[dict], list[str]]:
370
+ temp_files = [] # List for tracking temp files
371
 
372
  if not message["files"]:
373
  return [{"type": "text", "text": message["text"]}], temp_files
 
411
 
412
 
413
  ##############################################################################
414
+ # history -> LLM message conversion
415
  ##############################################################################
416
  def process_history(history: list[dict]) -> list[dict]:
417
  messages = []
 
440
 
441
 
442
  ##############################################################################
443
+ # Model generation function with OOM catch
444
  ##############################################################################
445
  def _model_gen_with_oom_catch(**kwargs):
446
  """
447
+ Catch OutOfMemoryError in separate thread
448
  """
449
  try:
450
  model.generate(**kwargs)
451
  except torch.cuda.OutOfMemoryError:
452
  raise RuntimeError(
453
+ "[OutOfMemoryError] GPU memory insufficient. "
454
+ "Please reduce Max New Tokens or prompt length."
455
  )
456
  finally:
457
+ # Clear cache after generation
458
  clear_cuda_cache()
459
 
460
 
461
  ##############################################################################
462
+ # Main inference function (with auto web search)
463
  ##############################################################################
464
  @spaces.GPU(duration=120)
465
  def run(
 
475
  yield ""
476
  return
477
 
478
+ temp_files = [] # For tracking temp files
479
 
480
  try:
481
  combined_system_msg = ""
482
 
483
+ # Used internally only (hidden from UI)
484
  if system_prompt.strip():
485
  combined_system_msg += f"[System Prompt]\n{system_prompt.strip()}\n\n"
486
 
 
491
  logger.info(f"[Auto WebSearch Keyword] {ws_query!r}")
492
  ws_result = do_web_search(ws_query)
493
  combined_system_msg += f"[X-RAY Security Reference Data]\n{ws_result}\n\n"
 
 
 
 
 
 
 
 
494
  else:
495
  combined_system_msg += "[No valid keywords found, skipping WebSearch]\n\n"
496
 
 
504
  messages.extend(process_history(history))
505
 
506
  user_content, user_temp_files = process_new_user_message(message)
507
+ temp_files.extend(user_temp_files) # Track temp files
508
 
509
  for item in user_content:
510
  if item["type"] == "text" and len(item["text"]) > MAX_CONTENT_CHARS:
 
519
  return_tensors="pt",
520
  ).to(device=model.device, dtype=torch.bfloat16)
521
 
522
+ # Limit input token count
523
  if inputs.input_ids.shape[1] > MAX_INPUT_LENGTH:
524
  inputs.input_ids = inputs.input_ids[:, -MAX_INPUT_LENGTH:]
525
  if 'attention_mask' in inputs:
 
542
 
543
  except Exception as e:
544
  logger.error(f"Error in run: {str(e)}")
545
+ yield f"Error occurred: {str(e)}"
546
 
547
  finally:
548
+ # Delete temp files
549
  for temp_file in temp_files:
550
  try:
551
  if os.path.exists(temp_file):
 
554
  except Exception as e:
555
  logger.warning(f"Failed to delete temp file {temp_file}: {e}")
556
 
557
+ # Explicit memory cleanup
558
  try:
559
  del inputs, streamer
560
  except:
 
565
 
566
 
567
  ##############################################################################
568
+ # X-RAY security scanning examples
569
  ##############################################################################
570
  examples = [
571
  [
 
631
  ]
632
 
633
  ##############################################################################
634
+ # Gradio UI (Blocks) 구성
635
  ##############################################################################
636
  css = """
 
637
  .gradio-container {
638
+ background: white;
639
  padding: 30px 40px;
640
  margin: 20px auto;
641
  width: 100% !important;
642
  max-width: none !important;
 
 
643
  }
644
  .fillable {
645
  width: 100% !important;
646
  max-width: 100% !important;
647
  }
648
  body {
649
+ background: white;
650
  margin: 0;
651
  padding: 0;
652
+ font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
653
+ color: #333;
654
  }
 
655
  button, .btn {
656
+ background: transparent !important;
657
+ border: 1px solid #ddd;
658
+ color: #333;
659
  padding: 12px 24px;
660
  text-transform: uppercase;
661
  font-weight: bold;
662
  letter-spacing: 1px;
663
  cursor: pointer;
 
664
  }
665
  button:hover, .btn:hover {
666
+ background: rgba(0, 0, 0, 0.05) !important;
 
667
  }
668
 
 
669
  h1, h2, h3 {
670
+ color: #333;
 
671
  }
672
 
 
673
  .multimodal-textbox, textarea, input {
674
+ background: rgba(255, 255, 255, 0.5) !important;
675
+ border: 1px solid #ddd;
676
+ color: #333;
677
  }
678
 
 
679
  .chatbox, .chatbot, .message {
680
+ background: transparent !important;
 
681
  }
682
 
 
683
  #examples_container, .examples-container {
684
  margin: auto;
685
  width: 90%;
686
+ background: transparent !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
687
  }
 
 
 
 
 
688
  """
689
 
690
  title_html = """
691
+ <h1 align="center" style="margin-bottom: 0.2em; font-size: 1.6em;">Gemma-3-R1984-4B-BEAM</h1>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
692
  """
693
 
694
 
 
697
 
698
  # Display the web search option (while the system prompt and token slider remain hidden)
699
  web_search_checkbox = gr.Checkbox(
700
+ label="Deep Research",
701
  value=False
702
  )
703
 
704
+ # X-RAY security scanning system prompt
705
  system_prompt_box = gr.Textbox(
706
  lines=3,
707
  value="""You are an advanced X-RAY security scanning AI specialized in threat detection and aviation security. Your primary mission is to identify ALL potential security threats in X-RAY images with extreme precision.
 
729
  minimum=100,
730
  maximum=8000,
731
  step=50,
732
+ value=1000,
733
  visible=False # hidden from view
734
  )
735
 
736
  web_search_text = gr.Textbox(
737
  lines=1,
738
+ label="Web Search Query",
739
+ placeholder="",
740
  visible=False # hidden from view
741
  )
742
 
 
751
  ".mp4", ".csv", ".txt", ".pdf"
752
  ],
753
  file_count="multiple",
754
+ autofocus=True
 
755
  ),
756
  multimodal=True,
757
  additional_inputs=[
 
761
  web_search_text,
762
  ],
763
  stop_btn=False,
764
+ title='<a href="https://discord.gg/openfreeai" target="_blank">https://discord.gg/openfreeai</a>',
765
  examples=examples,
766
  run_examples_on_click=False,
767
  cache_examples=False,
 
772
  # Example section - since examples are already set in ChatInterface, this is for display only
773
  with gr.Row(elem_id="examples_row"):
774
  with gr.Column(scale=12, elem_id="examples_container"):
775
+ pass
 
 
 
 
776
 
777
 
778
  if __name__ == "__main__":