seawolf2357 commited on
Commit
42713f1
ยท
1 Parent(s): 0ea9032

Update app-backup.py

Browse files
Files changed (1) hide show
  1. app-backup.py +399 -142
app-backup.py CHANGED
@@ -6,6 +6,7 @@ import tempfile
6
  from collections.abc import Iterator
7
  from threading import Thread
8
 
 
9
  import cv2
10
  import gradio as gr
11
  import spaces
@@ -20,7 +21,55 @@ import pandas as pd
20
  # PDF ํ…์ŠคํŠธ ์ถ”์ถœ
21
  import PyPDF2
22
 
23
- MAX_CONTENT_CHARS = 8000 # ๋„ˆ๋ฌด ํฐ ํŒŒ์ผ์„ ๋ง‰๊ธฐ ์œ„ํ•ด ์ตœ๋Œ€ ํ‘œ์‹œ 8000์ž
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
26
  processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
@@ -43,6 +92,10 @@ def analyze_csv_file(path: str) -> str:
43
  """
44
  try:
45
  df = pd.read_csv(path)
 
 
 
 
46
  df_str = df.to_string()
47
  if len(df_str) > MAX_CONTENT_CHARS:
48
  df_str = df_str[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
@@ -73,11 +126,20 @@ def pdf_to_markdown(pdf_path: str) -> str:
73
  try:
74
  with open(pdf_path, "rb") as f:
75
  reader = PyPDF2.PdfReader(f)
76
- for page_num, page in enumerate(reader.pages, start=1):
 
 
 
77
  page_text = page.extract_text() or ""
78
  page_text = page_text.strip()
79
  if page_text:
80
- text_chunks.append(f"## Page {page_num}\n\n{page_text}\n")
 
 
 
 
 
 
81
  except Exception as e:
82
  return f"Failed to read PDF ({os.path.basename(pdf_path)}): {str(e)}"
83
 
@@ -97,7 +159,7 @@ def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
97
  for path in paths:
98
  if path.endswith(".mp4"):
99
  video_count += 1
100
- else:
101
  image_count += 1
102
  return image_count, video_count
103
 
@@ -108,10 +170,13 @@ def count_files_in_history(history: list[dict]) -> tuple[int, int]:
108
  for item in history:
109
  if item["role"] != "user" or isinstance(item["content"], str):
110
  continue
111
- if item["content"][0].endswith(".mp4"):
112
- video_count += 1
113
- else:
114
- image_count += 1
 
 
 
115
  return image_count, video_count
116
 
117
 
@@ -123,11 +188,9 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
123
  - <image> ํƒœ๊ทธ๊ฐ€ ์žˆ์œผ๋ฉด ํƒœ๊ทธ ์ˆ˜์™€ ์‹ค์ œ ์ด๋ฏธ์ง€ ์ˆ˜ ์ผ์น˜
124
  - CSV, TXT, PDF ๋“ฑ์€ ์—ฌ๊ธฐ์„œ ์ œํ•œํ•˜์ง€ ์•Š์Œ
125
  """
 
126
  media_files = []
127
  for f in message["files"]:
128
- # ์ด๋ฏธ์ง€: png/jpg/jpeg/gif/webp
129
- # ๋น„๋””์˜ค: mp4
130
- # cf) PDF, CSV, TXT ๋“ฑ์€ ์ œ์™ธ
131
  if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE) or f.endswith(".mp4"):
132
  media_files.append(f)
133
 
@@ -149,9 +212,15 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
149
  if video_count == 0 and image_count > MAX_NUM_IMAGES:
150
  gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
151
  return False
152
- if "<image>" in message["text"] and message["text"].count("<image>") != new_image_count:
153
- gr.Warning("The number of <image> tags in the text does not match the number of images.")
154
- return False
 
 
 
 
 
 
155
 
156
  return True
157
 
@@ -164,7 +233,8 @@ def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
164
  fps = vidcap.get(cv2.CAP_PROP_FPS)
165
  total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
166
 
167
- frame_interval = int(fps / 3)
 
168
  frames = []
169
 
170
  for i in range(0, total_frames, frame_interval):
@@ -175,6 +245,10 @@ def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
175
  pil_image = Image.fromarray(image)
176
  timestamp = round(i / fps, 2)
177
  frames.append((pil_image, timestamp))
 
 
 
 
178
 
179
  vidcap.release()
180
  return frames
@@ -200,9 +274,13 @@ def process_interleaved_images(message: dict) -> list[dict]:
200
  parts = re.split(r"(<image>)", message["text"])
201
  content = []
202
  image_index = 0
 
 
 
 
203
  for part in parts:
204
- if part == "<image>":
205
- content.append({"type": "image", "url": message["files"][image_index]})
206
  image_index += 1
207
  elif part.strip():
208
  content.append({"type": "text", "text": part.strip()})
@@ -216,13 +294,30 @@ def process_interleaved_images(message: dict) -> list[dict]:
216
  ##################################################
217
  # PDF + CSV + TXT + ์ด๋ฏธ์ง€/๋น„๋””์˜ค
218
  ##################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  def process_new_user_message(message: dict) -> list[dict]:
220
  if not message["files"]:
221
  return [{"type": "text", "text": message["text"]}]
222
 
223
  # 1) ํŒŒ์ผ ๋ถ„๋ฅ˜
224
- video_files = [f for f in message["files"] if f.endswith(".mp4")]
225
- image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
226
  csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
227
  txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
228
  pdf_files = [f for f in message["files"] if f.lower().endswith(".pdf")]
@@ -251,9 +346,13 @@ def process_new_user_message(message: dict) -> list[dict]:
251
  return content_list
252
 
253
  # 7) ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ
254
- if "<image>" in message["text"]:
255
  # interleaved
256
- return process_interleaved_images(message)
 
 
 
 
257
  else:
258
  # ์ผ๋ฐ˜ ์—ฌ๋Ÿฌ ์žฅ
259
  for img_path in image_files:
@@ -281,9 +380,18 @@ def process_history(history: list[dict]) -> list[dict]:
281
  content = item["content"]
282
  if isinstance(content, str):
283
  current_user_content.append({"type": "text", "text": content})
284
- else:
285
- # ์ด๋ฏธ์ง€๋‚˜ ๊ธฐํƒ€
286
- current_user_content.append({"type": "image", "url": content[0]})
 
 
 
 
 
 
 
 
 
287
  return messages
288
 
289
 
@@ -291,43 +399,100 @@ def process_history(history: list[dict]) -> list[dict]:
291
  # ๋ฉ”์ธ ์ถ”๋ก  ํ•จ์ˆ˜
292
  ##################################################
293
  @spaces.GPU(duration=120)
294
- def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
  if not validate_media_constraints(message, history):
296
  yield ""
297
  return
298
 
299
- messages = []
300
- if system_prompt:
301
- messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt}]})
302
- messages.extend(process_history(history))
303
- messages.append({"role": "user", "content": process_new_user_message(message)})
304
-
305
- inputs = processor.apply_chat_template(
306
- messages,
307
- add_generation_prompt=True,
308
- tokenize=True,
309
- return_dict=True,
310
- return_tensors="pt",
311
- ).to(device=model.device, dtype=torch.bfloat16)
312
-
313
- streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
314
- gen_kwargs = dict(
315
- inputs,
316
- streamer=streamer,
317
- max_new_tokens=max_new_tokens,
318
- )
319
- t = Thread(target=model.generate, kwargs=gen_kwargs)
320
- t.start()
321
-
322
- output = ""
323
- for new_text in streamer:
324
- output += new_text
325
- yield output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
 
327
 
328
- ##################################################
329
- # ์˜ˆ์‹œ๋“ค (๊ธฐ์กด)
330
- ##################################################
331
  ##################################################
332
  # ์˜ˆ์‹œ๋“ค (ํ•œ๊ธ€ํ™” ๋ฒ„์ „)
333
  ##################################################
@@ -335,8 +500,12 @@ examples = [
335
 
336
  [
337
  {
338
- "text": "PDF ํŒŒ์ผ ๋‚ด์šฉ์„ ์š”์•ฝ, ๋ถ„์„ํ•˜๋ผ.",
339
  "files": ["assets/additional-examples/pdf.pdf"],
 
 
 
 
340
  }
341
  ],
342
  [
@@ -347,45 +516,34 @@ examples = [
347
  ],
348
  [
349
  {
350
- "text": "๋™์ผํ•œ ๋ง‰๋Œ€ ๊ทธ๋ž˜ํ”„๋ฅผ ๊ทธ๋ฆฌ๋Š” matplotlib ์ฝ”๋“œ๋ฅผ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”.",
351
- "files": ["assets/additional-examples/barchart.png"],
352
  }
353
- ],
354
  [
355
  {
356
- "text": "์ด ์˜์ƒ์—์„œ ์ด์ƒํ•œ ์ ์ด ๋ฌด์—‡์ธ๊ฐ€์š”?",
357
- "files": ["assets/additional-examples/tmp.mp4"],
358
  }
359
- ],
360
  [
361
  {
362
  "text": "์ด๋ฏธ ์ด ์˜์–‘์ œ๋ฅผ <image> ๊ฐ€์ง€๊ณ  ์žˆ๊ณ , ์ด ์ œํ’ˆ <image>์„ ์ƒˆ๋กœ ์‚ฌ๋ ค ํ•ฉ๋‹ˆ๋‹ค. ํ•จ๊ป˜ ์„ญ์ทจํ•  ๋•Œ ์ฃผ์˜ํ•ด์•ผ ํ•  ์ ์ด ์žˆ์„๊นŒ์š”?",
363
  "files": ["assets/additional-examples/pill1.png", "assets/additional-examples/pill2.png"],
364
  }
365
- ],
366
- [
367
- {
368
- "text": "์ด๋ฏธ์ง€์˜ ์‹œ๊ฐ์  ์š”์†Œ์—์„œ ์˜๊ฐ์„ ๋ฐ›์•„ ์‹œ๋ฅผ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”.",
369
- "files": ["assets/sample-images/06-1.png", "assets/sample-images/06-2.png"],
370
- }
371
- ],
372
  [
373
  {
374
- "text": "์ด๋ฏธ์ง€์˜ ์‹œ๊ฐ์  ์š”์†Œ๋ฅผ ํ† ๋Œ€๋กœ ์งง์€ ์•…๊ณก์„ ์ž‘๊ณกํ•ด์ฃผ์„ธ์š”.",
375
- "files": [
376
- "assets/sample-images/07-1.png",
377
- "assets/sample-images/07-2.png",
378
- "assets/sample-images/07-3.png",
379
- "assets/sample-images/07-4.png",
380
- ],
381
  }
382
- ],
383
  [
384
  {
385
- "text": "์ด ์ง‘์—์„œ ๋ฌด์Šจ ์ผ์ด ์žˆ์—ˆ์„์ง€ ์งง์€ ์ด์•ผ๊ธฐ๋ฅผ ์ง€์–ด๋ณด์„ธ์š”.",
386
- "files": ["assets/sample-images/08.png"],
387
  }
388
- ],
389
  [
390
  {
391
  "text": "์ด๋ฏธ์ง€๋“ค์˜ ์ˆœ์„œ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ์งง์€ ์ด์•ผ๊ธฐ๋ฅผ ๋งŒ๋“ค์–ด ์ฃผ์„ธ์š”.",
@@ -400,40 +558,33 @@ examples = [
400
  ],
401
  [
402
  {
403
- "text": "์ด ์„ธ๊ณ„์—์„œ ์‚ด๊ณ  ์žˆ์„ ์ƒ๋ฌผ๋“ค์„ ์ƒ์ƒํ•ด์„œ ๋ฌ˜์‚ฌํ•ด์ฃผ์„ธ์š”.",
404
- "files": ["assets/sample-images/10.png"],
405
  }
406
  ],
407
  [
408
  {
409
- "text": "์ด๋ฏธ์ง€์— ์ ํžŒ ํ…์ŠคํŠธ๋ฅผ ์ฝ์–ด์ฃผ์„ธ์š”.",
410
- "files": ["assets/additional-examples/1.png"],
411
  }
412
- ],
 
413
  [
414
  {
415
- "text": "์ด ํ‹ฐ์ผ“์€ ์–ธ์ œ ๋ฐœ๊ธ‰๋œ ๊ฒƒ์ด๊ณ , ๊ฐ€๊ฒฉ์€ ์–ผ๋งˆ์ธ๊ฐ€์š”?",
416
- "files": ["assets/additional-examples/2.png"],
417
  }
418
  ],
 
 
419
  [
420
  {
421
  "text": "์ด๋ฏธ์ง€์— ์žˆ๋Š” ํ…์ŠคํŠธ๋ฅผ ๊ทธ๋Œ€๋กœ ์ฝ์–ด์„œ ๋งˆํฌ๋‹ค์šด ํ˜•ํƒœ๋กœ ์ ์–ด์ฃผ์„ธ์š”.",
422
  "files": ["assets/additional-examples/3.png"],
423
  }
424
  ],
425
- [
426
- {
427
- "text": "์ด ์ ๋ถ„์„ ํ’€์–ด์ฃผ์„ธ์š”.",
428
- "files": ["assets/additional-examples/4.png"],
429
- }
430
- ],
431
- [
432
- {
433
- "text": "์ด ์ด๋ฏธ์ง€๋ฅผ ๊ฐ„๋‹จํžˆ ์บก์…˜์œผ๋กœ ์„ค๋ช…ํ•ด์ฃผ์„ธ์š”.",
434
- "files": ["assets/sample-images/01.png"],
435
- }
436
- ],
437
  [
438
  {
439
  "text": "์ด ํ‘œ์ง€ํŒ์—๋Š” ๋ฌด์Šจ ๋ฌธ๊ตฌ๊ฐ€ ์ ํ˜€ ์žˆ๋‚˜์š”?",
@@ -446,54 +597,160 @@ examples = [
446
  "files": ["assets/sample-images/03.png"],
447
  }
448
  ],
449
- [
450
- {
451
- "text": "์ด๋ฏธ์ง€์— ๋ณด์ด๋Š” ๋ชจ๋“  ์‚ฌ๋ฌผ๊ณผ ๊ทธ ์ƒ‰์ƒ์„ ๋‚˜์—ดํ•ด์ฃผ์„ธ์š”.",
452
- "files": ["assets/sample-images/04.png"],
453
- }
454
- ],
455
- [
456
- {
457
- "text": "์žฅ๋ฉด์˜ ๋ถ„์œ„๊ธฐ๋ฅผ ๋ฌ˜์‚ฌํ•ด์ฃผ์„ธ์š”.",
458
- "files": ["assets/sample-images/05.png"],
459
- }
460
- ],
461
  ]
462
 
463
 
464
 
465
- demo = gr.ChatInterface(
466
- fn=run,
467
- type="messages",
468
- chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
469
- # .webp, .png, .jpg, .jpeg, .gif, .mp4, .csv, .txt, .pdf ๋ชจ๋‘ ํ—ˆ์šฉ
470
- textbox=gr.MultimodalTextbox(
471
- file_types=[
472
- ".webp", ".png", ".jpg", ".jpeg", ".gif",
473
- ".mp4", ".csv", ".txt", ".pdf"
474
- ],
475
- file_count="multiple",
476
- autofocus=True
477
- ),
478
- multimodal=True,
479
- additional_inputs=[
480
- gr.Textbox(
481
- label="System Prompt",
482
- value=(
483
- "You are a deeply thoughtful AI. Consider problems thoroughly and derive "
484
- "correct solutions through systematic reasoning. Please answer in korean."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
485
  )
486
- ),
487
- gr.Slider(label="Max New Tokens", minimum=100, maximum=8000, step=50, value=2000),
488
- ],
489
- stop_btn=False,
490
- title="Vidraft-Gemma-3-27B",
491
- examples=examples,
492
- run_examples_on_click=False,
493
- cache_examples=False,
494
- css_paths="style.css",
495
- delete_cache=(1800, 1800),
496
- )
497
 
498
  if __name__ == "__main__":
499
  demo.launch()
 
6
  from collections.abc import Iterator
7
  from threading import Thread
8
 
9
+ import requests # <-- For SERPHouse web search
10
  import cv2
11
  import gradio as gr
12
  import spaces
 
21
  # PDF ํ…์ŠคํŠธ ์ถ”์ถœ
22
  import PyPDF2
23
 
24
+ ##############################################################################
25
+ # SERPHouse API key for web search
26
+ ##############################################################################
27
+ SERPHOUSE_API_KEY = "V38CNn4HXpLtynJQyOeoUensTEYoFy8PBUxKpDqAW1pawT1vfJ2BWtPQ98h6"
28
+
29
+ ##############################################################################
30
+ # Simple function to call the SERPHouse Live endpoint
31
+ # https://api.serphouse.com/serp/live
32
+ ##############################################################################
33
+ def do_web_search(query: str) -> str:
34
+ """
35
+ Calls SERPHouse live endpoint with the given query (q).
36
+ Returns a simple text summary or error message.
37
+ """
38
+ try:
39
+ url = "https://api.serphouse.com/serp/live"
40
+ params = {
41
+ "q": query,
42
+ "domain": "google.com",
43
+ "lang": "en",
44
+ "device": "desktop",
45
+ "serp_type": "web",
46
+ "api_token": SERPHOUSE_API_KEY,
47
+ }
48
+ resp = requests.get(url, params=params, timeout=30)
49
+ resp.raise_for_status() # Raise an exception for 4xx/5xx errors
50
+ data = resp.json()
51
+
52
+ # For demonstration, let's extract top 3 organic results:
53
+ results = data.get("results", {})
54
+ organic = results.get("results", {}).get("organic", [])
55
+ if not organic:
56
+ return "No web search results found."
57
+
58
+ summary_lines = []
59
+ for item in organic[:3]:
60
+ rank = item.get("position", "-")
61
+ title = item.get("title", "No Title")
62
+ link = item.get("link", "No Link")
63
+ snippet = item.get("snippet", "(No snippet)")
64
+ summary_lines.append(f"**Rank {rank}:** [{title}]({link})\n\n> {snippet}")
65
+
66
+ return "\n\n".join(summary_lines) if summary_lines else "No web search results found."
67
+ except Exception as e:
68
+ logger.error(f"Web search failed: {e}")
69
+ return f"Web search failed: {str(e)}"
70
+
71
+
72
+ MAX_CONTENT_CHARS = 4000 # ๋„ˆ๋ฌด ํฐ ํŒŒ์ผ์„ ๋ง‰๊ธฐ ์œ„ํ•ด ์ตœ๋Œ€ ํ‘œ์‹œ 4000์ž
73
 
74
  model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
75
  processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
 
92
  """
93
  try:
94
  df = pd.read_csv(path)
95
+ # ๋ฐ์ดํ„ฐ ํ”„๋ ˆ์ž„ ํฌ๊ธฐ ์ œํ•œ (ํ–‰/์—ด ์ˆ˜๊ฐ€ ๋งŽ์€ ๊ฒฝ์šฐ)
96
+ if df.shape[0] > 50 or df.shape[1] > 10:
97
+ df = df.iloc[:50, :10]
98
+
99
  df_str = df.to_string()
100
  if len(df_str) > MAX_CONTENT_CHARS:
101
  df_str = df_str[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
 
126
  try:
127
  with open(pdf_path, "rb") as f:
128
  reader = PyPDF2.PdfReader(f)
129
+ # ์ตœ๋Œ€ 5ํŽ˜์ด์ง€๋งŒ ์ฒ˜๋ฆฌ
130
+ max_pages = min(5, len(reader.pages))
131
+ for page_num in range(max_pages):
132
+ page = reader.pages[page_num]
133
  page_text = page.extract_text() or ""
134
  page_text = page_text.strip()
135
  if page_text:
136
+ # ํŽ˜์ด์ง€๋ณ„ ํ…์ŠคํŠธ๋„ ์ œํ•œ
137
+ if len(page_text) > MAX_CONTENT_CHARS // max_pages:
138
+ page_text = page_text[:MAX_CONTENT_CHARS // max_pages] + "...(truncated)"
139
+ text_chunks.append(f"## Page {page_num+1}\n\n{page_text}\n")
140
+
141
+ if len(reader.pages) > max_pages:
142
+ text_chunks.append(f"\n...(Showing {max_pages} of {len(reader.pages)} pages)...")
143
  except Exception as e:
144
  return f"Failed to read PDF ({os.path.basename(pdf_path)}): {str(e)}"
145
 
 
159
  for path in paths:
160
  if path.endswith(".mp4"):
161
  video_count += 1
162
+ elif re.search(r"\.(png|jpg|jpeg|gif|webp)$", path, re.IGNORECASE):
163
  image_count += 1
164
  return image_count, video_count
165
 
 
170
  for item in history:
171
  if item["role"] != "user" or isinstance(item["content"], str):
172
  continue
173
+ if isinstance(item["content"], list) and len(item["content"]) > 0:
174
+ file_path = item["content"][0]
175
+ if isinstance(file_path, str):
176
+ if file_path.endswith(".mp4"):
177
+ video_count += 1
178
+ elif re.search(r"\.(png|jpg|jpeg|gif|webp)$", file_path, re.IGNORECASE):
179
+ image_count += 1
180
  return image_count, video_count
181
 
182
 
 
188
  - <image> ํƒœ๊ทธ๊ฐ€ ์žˆ์œผ๋ฉด ํƒœ๊ทธ ์ˆ˜์™€ ์‹ค์ œ ์ด๋ฏธ์ง€ ์ˆ˜ ์ผ์น˜
189
  - CSV, TXT, PDF ๋“ฑ์€ ์—ฌ๊ธฐ์„œ ์ œํ•œํ•˜์ง€ ์•Š์Œ
190
  """
191
+ # ์ด๋ฏธ์ง€์™€ ๋น„๋””์˜ค ํŒŒ์ผ๋งŒ ํ•„ํ„ฐ๋ง
192
  media_files = []
193
  for f in message["files"]:
 
 
 
194
  if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE) or f.endswith(".mp4"):
195
  media_files.append(f)
196
 
 
212
  if video_count == 0 and image_count > MAX_NUM_IMAGES:
213
  gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
214
  return False
215
+
216
+ # ์ด๋ฏธ์ง€ ํƒœ๊ทธ ๊ฒ€์ฆ (์‹ค์ œ ์ด๋ฏธ์ง€ ํŒŒ์ผ๋งŒ ๊ณ„์‚ฐ)
217
+ if "<image>" in message["text"]:
218
+ # ์ด๋ฏธ์ง€ ํŒŒ์ผ๋งŒ ํ•„ํ„ฐ๋ง
219
+ image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
220
+ image_tag_count = message["text"].count("<image>")
221
+ if image_tag_count != len(image_files):
222
+ gr.Warning("The number of <image> tags in the text does not match the number of image files.")
223
+ return False
224
 
225
  return True
226
 
 
233
  fps = vidcap.get(cv2.CAP_PROP_FPS)
234
  total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
235
 
236
+ # ๋” ์ ์€ ํ”„๋ ˆ์ž„์„ ์ถ”์ถœํ•˜๋„๋ก ์กฐ์ •
237
+ frame_interval = max(int(fps), int(total_frames / 10)) # ์ดˆ๋‹น 1ํ”„๋ ˆ์ž„ ๋˜๋Š” ์ตœ๋Œ€ 10ํ”„๋ ˆ์ž„
238
  frames = []
239
 
240
  for i in range(0, total_frames, frame_interval):
 
245
  pil_image = Image.fromarray(image)
246
  timestamp = round(i / fps, 2)
247
  frames.append((pil_image, timestamp))
248
+
249
+ # ์ตœ๋Œ€ 5ํ”„๋ ˆ์ž„๋งŒ ์‚ฌ์šฉ
250
+ if len(frames) >= 5:
251
+ break
252
 
253
  vidcap.release()
254
  return frames
 
274
  parts = re.split(r"(<image>)", message["text"])
275
  content = []
276
  image_index = 0
277
+
278
+ # ์ด๋ฏธ์ง€ ํŒŒ์ผ๋งŒ ํ•„ํ„ฐ๋ง
279
+ image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
280
+
281
  for part in parts:
282
+ if part == "<image>" and image_index < len(image_files):
283
+ content.append({"type": "image", "url": image_files[image_index]})
284
  image_index += 1
285
  elif part.strip():
286
  content.append({"type": "text", "text": part.strip()})
 
294
  ##################################################
295
  # PDF + CSV + TXT + ์ด๋ฏธ์ง€/๋น„๋””์˜ค
296
  ##################################################
297
+ def is_image_file(file_path: str) -> bool:
298
+ """์ด๋ฏธ์ง€ ํŒŒ์ผ์ธ์ง€ ํ™•์ธ"""
299
+ return bool(re.search(r"\.(png|jpg|jpeg|gif|webp)$", file_path, re.IGNORECASE))
300
+
301
+
302
+ def is_video_file(file_path: str) -> bool:
303
+ """๋น„๋””์˜ค ํŒŒ์ผ์ธ์ง€ ํ™•์ธ"""
304
+ return file_path.endswith(".mp4")
305
+
306
+
307
+ def is_document_file(file_path: str) -> bool:
308
+ """๋ฌธ์„œ ํŒŒ์ผ์ธ์ง€ ํ™•์ธ (PDF, CSV, TXT)"""
309
+ return (file_path.lower().endswith(".pdf") or
310
+ file_path.lower().endswith(".csv") or
311
+ file_path.lower().endswith(".txt"))
312
+
313
+
314
  def process_new_user_message(message: dict) -> list[dict]:
315
  if not message["files"]:
316
  return [{"type": "text", "text": message["text"]}]
317
 
318
  # 1) ํŒŒ์ผ ๋ถ„๋ฅ˜
319
+ video_files = [f for f in message["files"] if is_video_file(f)]
320
+ image_files = [f for f in message["files"] if is_image_file(f)]
321
  csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
322
  txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
323
  pdf_files = [f for f in message["files"] if f.lower().endswith(".pdf")]
 
346
  return content_list
347
 
348
  # 7) ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ
349
+ if "<image>" in message["text"] and image_files:
350
  # interleaved
351
+ interleaved_content = process_interleaved_images({"text": message["text"], "files": image_files})
352
+ # ์›๋ณธ content_list ์•ž๋ถ€๋ถ„(ํ…์ŠคํŠธ)์„ ์ œ๊ฑฐํ•˜๊ณ  interleaved๋กœ ๋Œ€์ฒด
353
+ if content_list[0]["type"] == "text":
354
+ content_list = content_list[1:] # ์›๋ณธ ํ…์ŠคํŠธ ์ œ๊ฑฐ
355
+ return interleaved_content + content_list # interleaved + ๋‚˜๋จธ์ง€ ๋ฌธ์„œ ๋ถ„์„ ๋‚ด์šฉ
356
  else:
357
  # ์ผ๋ฐ˜ ์—ฌ๋Ÿฌ ์žฅ
358
  for img_path in image_files:
 
380
  content = item["content"]
381
  if isinstance(content, str):
382
  current_user_content.append({"type": "text", "text": content})
383
+ elif isinstance(content, list) and len(content) > 0:
384
+ file_path = content[0]
385
+ if is_image_file(file_path):
386
+ current_user_content.append({"type": "image", "url": file_path})
387
+ else:
388
+ # ๋น„์ด๋ฏธ์ง€ ํŒŒ์ผ์€ ํ…์ŠคํŠธ๋กœ ์ฒ˜๋ฆฌ
389
+ current_user_content.append({"type": "text", "text": f"[File: {os.path.basename(file_path)}]"})
390
+
391
+ # ๋งˆ์ง€๋ง‰ ์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€๊ฐ€ ์ฒ˜๋ฆฌ๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ ์ถ”๊ฐ€
392
+ if current_user_content:
393
+ messages.append({"role": "user", "content": current_user_content})
394
+
395
  return messages
396
 
397
 
 
399
  # ๋ฉ”์ธ ์ถ”๋ก  ํ•จ์ˆ˜
400
  ##################################################
401
  @spaces.GPU(duration=120)
402
+ def run(
403
+ message: dict,
404
+ history: list[dict],
405
+ system_prompt: str = "",
406
+ max_new_tokens: int = 512,
407
+ use_web_search: bool = False,
408
+ web_search_query: str = "",
409
+ ) -> Iterator[str]:
410
+ """
411
+ The main inference function. Now extended with optional web_search arguments:
412
+ - use_web_search: bool
413
+ - web_search_query: str
414
+ If `use_web_search` is True, calls SERPHouse for the given `web_search_query`.
415
+ """
416
+ # Validate media constraints first
417
  if not validate_media_constraints(message, history):
418
  yield ""
419
  return
420
 
421
+ try:
422
+ # If user opted for "Web Search", do it here and yield a prefix message
423
+ if use_web_search and web_search_query.strip():
424
+ ws_result = do_web_search(web_search_query.strip())
425
+ yield f"**[Web Search Results for '{web_search_query.strip()}':]**\n\n{ws_result}\n\n---\n"
426
+
427
+ messages = []
428
+ if system_prompt:
429
+ messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt}]})
430
+ messages.extend(process_history(history))
431
+
432
+ # ์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€ ์ฒ˜๋ฆฌ
433
+ user_content = process_new_user_message(message)
434
+
435
+ # ํ† ํฐ ์ˆ˜๋ฅผ ์ค„์ด๊ธฐ ์œ„ํ•ด ๋„ˆ๋ฌด ๊ธด ํ…์ŠคํŠธ๋Š” ์ž˜๋ผ๋‚ด๊ธฐ
436
+ for item in user_content:
437
+ if item["type"] == "text" and len(item["text"]) > MAX_CONTENT_CHARS:
438
+ item["text"] = item["text"][:MAX_CONTENT_CHARS] + "\n...(truncated)..."
439
+
440
+ messages.append({"role": "user", "content": user_content})
441
+
442
+ # ๋ชจ๋ธ ์ž…๋ ฅ ์ƒ์„ฑ ์ „ ์ตœ์ข… ํ™•์ธ
443
+ for msg in messages:
444
+ if msg["role"] != "user":
445
+ continue
446
+
447
+ filtered_content = []
448
+ for item in msg["content"]:
449
+ if item["type"] == "image":
450
+ if is_image_file(item["url"]):
451
+ filtered_content.append(item)
452
+ else:
453
+ # ์ด๋ฏธ์ง€ ํŒŒ์ผ์ด ์•„๋‹Œ ๊ฒฝ์šฐ ํ…์ŠคํŠธ๋กœ ๋ณ€ํ™˜
454
+ filtered_content.append({
455
+ "type": "text",
456
+ "text": f"[Non-image file: {os.path.basename(item['url'])}]"
457
+ })
458
+ else:
459
+ filtered_content.append(item)
460
+
461
+ msg["content"] = filtered_content
462
+
463
+ # ๋ชจ๋ธ ์ž…๋ ฅ ์ƒ์„ฑ
464
+ inputs = processor.apply_chat_template(
465
+ messages,
466
+ add_generation_prompt=True,
467
+ tokenize=True,
468
+ return_dict=True,
469
+ return_tensors="pt",
470
+ ).to(device=model.device, dtype=torch.bfloat16)
471
+
472
+ # ํ…์ŠคํŠธ ์ƒ์„ฑ ์ŠคํŠธ๋ฆฌ๋จธ ์„ค์ •
473
+ streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
474
+ gen_kwargs = dict(
475
+ inputs,
476
+ streamer=streamer,
477
+ max_new_tokens=max_new_tokens,
478
+ )
479
+
480
+ # ๋ณ„๋„ ์Šค๋ ˆ๋“œ์—์„œ ํ…์ŠคํŠธ ์ƒ์„ฑ
481
+ t = Thread(target=model.generate, kwargs=gen_kwargs)
482
+ t.start()
483
+
484
+ # ๊ฒฐ๊ณผ ์ŠคํŠธ๋ฆฌ๋ฐ
485
+ output = ""
486
+ for new_text in streamer:
487
+ output += new_text
488
+ yield output
489
+
490
+ except Exception as e:
491
+ logger.error(f"Error in run: {str(e)}")
492
+ yield f"์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
493
+
494
 
495
 
 
 
 
496
  ##################################################
497
  # ์˜ˆ์‹œ๋“ค (ํ•œ๊ธ€ํ™” ๋ฒ„์ „)
498
  ##################################################
 
500
 
501
  [
502
  {
503
+ "text": "๋‘ PDF ํŒŒ์ผ ๋‚ด์šฉ์„ ๋น„๊ตํ•˜๋ผ.",
504
  "files": ["assets/additional-examples/pdf.pdf"],
505
+ "files": [
506
+ "assets/additional-examples/before.pdf",
507
+ "assets/additional-examples/after.pdf",
508
+ ],
509
  }
510
  ],
511
  [
 
516
  ],
517
  [
518
  {
519
+ "text": "์ด ์˜์ƒ์˜ ๋‚ด์šฉ์„ ์„ค๋ช…ํ•˜๋ผ",
520
+ "files": ["assets/additional-examples/tmp.mp4"],
521
  }
522
+ ],
523
  [
524
  {
525
+ "text": "ํ‘œ์ง€ ๋‚ด์šฉ์„ ์„ค๋ช…ํ•˜๊ณ  ๊ธ€์ž๋ฅผ ์ฝ์–ด์ฃผ์„ธ์š”.",
526
+ "files": ["assets/additional-examples/maz.jpg"],
527
  }
528
+ ],
529
  [
530
  {
531
  "text": "์ด๋ฏธ ์ด ์˜์–‘์ œ๋ฅผ <image> ๊ฐ€์ง€๊ณ  ์žˆ๊ณ , ์ด ์ œํ’ˆ <image>์„ ์ƒˆ๋กœ ์‚ฌ๋ ค ํ•ฉ๋‹ˆ๋‹ค. ํ•จ๊ป˜ ์„ญ์ทจํ•  ๋•Œ ์ฃผ์˜ํ•ด์•ผ ํ•  ์ ์ด ์žˆ์„๊นŒ์š”?",
532
  "files": ["assets/additional-examples/pill1.png", "assets/additional-examples/pill2.png"],
533
  }
534
+ ],
 
 
 
 
 
 
535
  [
536
  {
537
+ "text": "์ด ์ ๋ถ„์„ ํ’€์–ด์ฃผ์„ธ์š”.",
538
+ "files": ["assets/additional-examples/4.png"],
 
 
 
 
 
539
  }
540
+ ],
541
  [
542
  {
543
+ "text": "์ด ํ‹ฐ์ผ“์€ ์–ธ์ œ ๋ฐœ๊ธ‰๋œ ๊ฒƒ์ด๊ณ , ๊ฐ€๊ฒฉ์€ ์–ผ๋งˆ์ธ๊ฐ€์š”?",
544
+ "files": ["assets/additional-examples/2.png"],
545
  }
546
+ ],
547
  [
548
  {
549
  "text": "์ด๋ฏธ์ง€๋“ค์˜ ์ˆœ์„œ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ์งง์€ ์ด์•ผ๊ธฐ๋ฅผ ๋งŒ๋“ค์–ด ์ฃผ์„ธ์š”.",
 
558
  ],
559
  [
560
  {
561
+ "text": "์ด๋ฏธ์ง€์˜ ์‹œ๊ฐ์  ์š”์†Œ์—์„œ ์˜๊ฐ์„ ๋ฐ›์•„ ์‹œ๋ฅผ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”.",
562
+ "files": ["assets/sample-images/06-1.png", "assets/sample-images/06-2.png"],
563
  }
564
  ],
565
  [
566
  {
567
+ "text": "๋™์ผํ•œ ๋ง‰๋Œ€ ๊ทธ๋ž˜ํ”„๋ฅผ ๊ทธ๋ฆฌ๋Š” matplotlib ์ฝ”๋“œ๋ฅผ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”.",
568
+ "files": ["assets/additional-examples/barchart.png"],
569
  }
570
+ ],
571
+
572
  [
573
  {
574
+ "text": "์ด ์„ธ๊ณ„์—์„œ ์‚ด๊ณ  ์žˆ์„ ์ƒ๋ฌผ๋“ค์„ ์ƒ์ƒํ•ด์„œ ๋ฌ˜์‚ฌํ•ด์ฃผ์„ธ์š”.",
575
+ "files": ["assets/sample-images/08.png"],
576
  }
577
  ],
578
+
579
+
580
  [
581
  {
582
  "text": "์ด๋ฏธ์ง€์— ์žˆ๋Š” ํ…์ŠคํŠธ๋ฅผ ๊ทธ๋Œ€๋กœ ์ฝ์–ด์„œ ๋งˆํฌ๋‹ค์šด ํ˜•ํƒœ๋กœ ์ ์–ด์ฃผ์„ธ์š”.",
583
  "files": ["assets/additional-examples/3.png"],
584
  }
585
  ],
586
+
587
+
 
 
 
 
 
 
 
 
 
 
588
  [
589
  {
590
  "text": "์ด ํ‘œ์ง€ํŒ์—๋Š” ๋ฌด์Šจ ๋ฌธ๊ตฌ๊ฐ€ ์ ํ˜€ ์žˆ๋‚˜์š”?",
 
597
  "files": ["assets/sample-images/03.png"],
598
  }
599
  ],
600
+
 
 
 
 
 
 
 
 
 
 
 
601
  ]
602
 
603
 
604
 
605
+
606
+
607
+ ##############################################################################
608
+ # Custom CSS similar to second example (colorful background, panel, etc.)
609
+ ##############################################################################
610
+ css = """
611
+ body {
612
+ background: linear-gradient(135deg, #667eea, #764ba2);
613
+ font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
614
+ color: #333;
615
+ margin: 0;
616
+ padding: 0;
617
+ }
618
+ .gradio-container {
619
+ background: rgba(255, 255, 255, 0.95);
620
+ border-radius: 15px;
621
+ padding: 30px 40px;
622
+ box-shadow: 0 8px 30px rgba(0, 0, 0, 0.3);
623
+ margin: 40px auto;
624
+ max-width: 1200px;
625
+ }
626
+ .gradio-container h1 {
627
+ color: #333;
628
+ text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.2);
629
+ }
630
+ .fillable {
631
+ width: 95% !important;
632
+ max-width: unset !important;
633
+ }
634
+ #examples_container {
635
+ margin: auto;
636
+ width: 90%;
637
+ }
638
+ #examples_row {
639
+ justify-content: center;
640
+ }
641
+ .sidebar {
642
+ background: rgba(255, 255, 255, 0.98);
643
+ border-radius: 10px;
644
+ padding: 20px;
645
+ box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
646
+ }
647
+ button, .btn {
648
+ background: linear-gradient(90deg, #ff8a00, #e52e71);
649
+ border: none;
650
+ color: #fff;
651
+ padding: 12px 24px;
652
+ text-transform: uppercase;
653
+ font-weight: bold;
654
+ letter-spacing: 1px;
655
+ border-radius: 5px;
656
+ cursor: pointer;
657
+ transition: transform 0.2s ease-in-out;
658
+ }
659
+ button:hover, .btn:hover {
660
+ transform: scale(1.05);
661
+ }
662
+ """
663
+
664
+ title_html = """
665
+ <h1 align="center" style="margin-bottom: 0.2em;"> ๐Ÿค— Vidraft-Gemma-3-27B </h1>
666
+ <p align="center" style="font-size:1.1em; color:#555;">
667
+ Multimodal Chat Interface + Optional Web Search
668
+ </p>
669
+ """
670
+
671
+ ##############################################################################
672
+ # Build a Blocks layout that includes:
673
+ # - A left sidebar with "Web Search" controls
674
+ # - The main ChatInterface in the center or right
675
+ ##############################################################################
676
+ with gr.Blocks(css=css, title="Vidraft-Gemma-3-27B") as demo:
677
+ gr.Markdown(title_html)
678
+
679
+ with gr.Row():
680
+ # Left Sidebar
681
+ with gr.Column(scale=3, variant="panel"):
682
+ gr.Markdown("### Menu / Options")
683
+ with gr.Row():
684
+ web_search_checkbox = gr.Checkbox(
685
+ label="Web Search",
686
+ value=False,
687
+ info="Check to enable a SERPHouse web search before the chat reply"
688
+ )
689
+ web_search_text = gr.Textbox(
690
+ lines=1,
691
+ label="Web Search Query",
692
+ placeholder="Enter search keywords..."
693
+ )
694
+
695
+ gr.Markdown("---")
696
+ gr.Markdown("#### System Prompt")
697
+ system_prompt_box = gr.Textbox(
698
+ lines=3,
699
+ value=(
700
+ "You are a deeply thoughtful AI. Consider problems thoroughly and derive "
701
+ "correct solutions through systematic reasoning. Please answer in korean."
702
+ ),
703
+ )
704
+
705
+ max_tokens_slider = gr.Slider(
706
+ label="Max New Tokens",
707
+ minimum=100,
708
+ maximum=8000,
709
+ step=50,
710
+ value=2000,
711
+ )
712
+
713
+ gr.Markdown("<br><br>") # spacing
714
+
715
+ # Main ChatInterface to the right
716
+ with gr.Column(scale=7):
717
+ chat = gr.ChatInterface(
718
+ fn=run,
719
+ type="messages",
720
+ chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
721
+ textbox=gr.MultimodalTextbox(
722
+ file_types=[
723
+ ".webp", ".png", ".jpg", ".jpeg", ".gif",
724
+ ".mp4", ".csv", ".txt", ".pdf"
725
+ ],
726
+ file_count="multiple",
727
+ autofocus=True
728
+ ),
729
+ multimodal=True,
730
+ additional_inputs=[
731
+ system_prompt_box,
732
+ max_tokens_slider,
733
+ web_search_checkbox,
734
+ web_search_text,
735
+ ],
736
+ stop_btn=False,
737
+ title="Vidraft-Gemma-3-27B",
738
+ examples=examples,
739
+ run_examples_on_click=False,
740
+ cache_examples=False,
741
+ css_paths=None,
742
+ delete_cache=(1800, 1800),
743
+ )
744
+
745
+ with gr.Row(elem_id="examples_row"):
746
+ with gr.Column(scale=12, elem_id="examples_container"):
747
+ gr.Markdown("### Example Inputs (click to load)")
748
+ # The fix: pass an empty list to avoid the "None" error, so we keep the code structure.
749
+ gr.Examples(
750
+ examples=examples,
751
+ inputs=[], # Instead of None or chat.
752
+ cache_examples=False
753
  )
 
 
 
 
 
 
 
 
 
 
 
754
 
755
  if __name__ == "__main__":
756
  demo.launch()