seawolf2357 commited on
Commit
f024201
ยท
verified ยท
1 Parent(s): c3d078f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -54
app.py CHANGED
@@ -16,6 +16,8 @@ from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIter
16
 
17
  # [PDF] PyPDF2 ์ถ”๊ฐ€
18
  import PyPDF2
 
 
19
 
20
  model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
21
  processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
@@ -25,7 +27,46 @@ model = Gemma3ForConditionalGeneration.from_pretrained(
25
 
26
  MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
 
 
 
29
  def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
30
  image_count = 0
31
  video_count = 0
@@ -36,7 +77,6 @@ def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
36
  image_count += 1
37
  return image_count, video_count
38
 
39
-
40
  def count_files_in_history(history: list[dict]) -> tuple[int, int]:
41
  image_count = 0
42
  video_count = 0
@@ -49,18 +89,22 @@ def count_files_in_history(history: list[dict]) -> tuple[int, int]:
49
  image_count += 1
50
  return image_count, video_count
51
 
52
-
 
 
53
  def validate_media_constraints(message: dict, history: list[dict]) -> bool:
54
  """
55
  ์ด๋ฏธ์ง€/๋น„๋””์˜ค ๊ฐœ์ˆ˜์™€ ํ˜ผํ•ฉ ์—ฌ๋ถ€ ๋“ฑ์„ ๊ฒ€์‚ฌํ•˜๋Š” ํ•จ์ˆ˜.
56
- PDF๋Š” ๊ฒ€์‚ฌ ๋กœ์ง์—์„œ ์ œ์™ธํ•˜์—ฌ ์—…๋กœ๋“œ๋งŒ ํ—ˆ์šฉ.
57
  """
58
- # [PDF] PDF ํŒŒ์ผ ์ œ์™ธ ์ฒ˜๋ฆฌ
59
  pdf_files = [f for f in message["files"] if f.endswith(".pdf")]
60
- non_pdf_files = [f for f in message["files"] if not f.endswith(".pdf")]
 
 
61
 
62
- # ๊ธฐ์กด ๋กœ์ง์€ non_pdf_files(= ์ด๋ฏธ์ง€/๋น„๋””์˜ค)์— ๋Œ€ํ•ด์„œ๋งŒ ์ฒดํฌ
63
- new_image_count, new_video_count = count_files_in_new_message(non_pdf_files)
64
  history_image_count, history_video_count = count_files_in_history(history)
65
  image_count = history_image_count + new_image_count
66
  video_count = history_video_count + new_video_count
@@ -75,25 +119,22 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
75
  if "<image>" in message["text"]:
76
  gr.Warning("Using <image> tags with video files is not supported.")
77
  return False
78
- # TODO: Add frame count validation for videos similar to image count limits # noqa: FIX002, TD002, TD003
79
 
80
  if video_count == 0 and image_count > MAX_NUM_IMAGES:
81
  gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
82
  return False
83
 
84
- # [PDF] PDF ๊ฐฏ์ˆ˜ ์ œํ•œ(ํ•„์š”ํ•˜๋‹ค๋ฉด)๋„ ์ถ”๊ฐ€ ๊ฐ€๋Šฅ
85
- # ์ผ๋‹จ ์ œํ•œ์€ ๋‘์ง€ ์•Š๊ณ  ๋ฐ”๋กœ True ๋ฐ˜ํ™˜
86
-
87
- # <image> ํƒœ๊ทธ๊ฐ€ ์žˆ์„ ๊ฒฝ์šฐ, ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜์™€ ๋งค์นญ ๊ฒ€์‚ฌ
88
  if "<image>" in message["text"]:
89
- # new_image_count๋Š” pdf ์ œ์™ธ๋œ ์ด๋ฏธ์ง€ ์ˆ˜
90
  if message["text"].count("<image>") != new_image_count:
91
  gr.Warning("The number of <image> tags in the text does not match the number of images.")
92
  return False
93
 
94
  return True
95
 
96
-
 
 
97
  def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
98
  vidcap = cv2.VideoCapture(video_path)
99
  fps = vidcap.get(cv2.CAP_PROP_FPS)
@@ -114,7 +155,6 @@ def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
114
  vidcap.release()
115
  return frames
116
 
117
-
118
  def process_video(video_path: str) -> list[dict]:
119
  content = []
120
  frames = downsample_video(video_path)
@@ -127,7 +167,9 @@ def process_video(video_path: str) -> list[dict]:
127
  logger.debug(f"{content=}")
128
  return content
129
 
130
-
 
 
131
  def process_interleaved_images(message: dict) -> list[dict]:
132
  logger.debug(f"{message['files']=}")
133
  parts = re.split(r"(<image>)", message["text"])
@@ -148,40 +190,25 @@ def process_interleaved_images(message: dict) -> list[dict]:
148
  logger.debug(f"{content=}")
149
  return content
150
 
151
-
152
- # [PDF] PDF -> Markdown ๋ณ€ํ™˜ ํ•จ์ˆ˜ ์ถ”๊ฐ€
153
- def pdf_to_markdown(pdf_path: str) -> str:
154
- """
155
- PDF ํŒŒ์ผ์„ ํ…์ŠคํŠธ๋กœ ์ถ”์ถœ ํ›„, ๊ฐ„๋‹จํ•œ Markdown ํ˜•ํƒœ๋กœ ๋ฐ˜ํ™˜.
156
- """
157
- text_chunks = []
158
- with open(pdf_path, "rb") as f:
159
- reader = PyPDF2.PdfReader(f)
160
- for page_num, page in enumerate(reader.pages, start=1):
161
- page_text = page.extract_text()
162
- page_text = page_text.strip() if page_text else ""
163
- if page_text:
164
- # ํŽ˜์ด์ง€๋ณ„๋กœ ๊ฐ„๋‹จํ•œ ํ—ค๋”์™€ ๋ณธ๋ฌธ์„ Markdown์œผ๋กœ ํ•ฉ์นจ
165
- text_chunks.append(f"## Page {page_num}\n\n{page_text}\n")
166
- return "\n".join(text_chunks)
167
-
168
-
169
  def process_new_user_message(message: dict) -> list[dict]:
170
- """
171
- ์ƒˆ user message์—์„œ text, ํŒŒ์ผ(์ด๋ฏธ์ง€/๋น„๋””์˜ค/PDF)์„ ์ฒ˜๋ฆฌ.
172
- """
173
  if not message["files"]:
174
  return [{"type": "text", "text": message["text"]}]
175
 
176
- # [PDF] PDF ํŒŒ์ผ ๋ชฉ๋ก
177
  pdf_files = [f for f in message["files"] if f.endswith(".pdf")]
178
- # ์ด๋ฏธ์ง€ยท๋น„๋””์˜ค ๋ชฉ๋ก
179
- other_files = [f for f in message["files"] if not f.endswith(".pdf")]
 
 
 
180
 
181
- # ์ผ๋‹จ ์‚ฌ์šฉ์ž์˜ text๋ฅผ ๊ฐ€์žฅ ๋จผ์ € ๋„ฃ๋Š”๋‹ค
182
  content_list = [{"type": "text", "text": message["text"]}]
183
 
184
- # PDF ๋ณ€ํ™˜ ํ›„ ์ถ”๊ฐ€
185
  for pdf_path in pdf_files:
186
  pdf_markdown = pdf_to_markdown(pdf_path)
187
  if pdf_markdown.strip():
@@ -189,12 +216,14 @@ def process_new_user_message(message: dict) -> list[dict]:
189
  else:
190
  content_list.append({"type": "text", "text": "(PDF์—์„œ ํ…์ŠคํŠธ ์ถ”์ถœ ์‹คํŒจ)"})
191
 
 
 
 
 
192
 
193
- # ์˜์ƒ์ด ์žˆ๋Š”์ง€ ํ™•์ธ
194
  video_files = [f for f in other_files if f.endswith(".mp4")]
195
  if video_files:
196
- # ๋น„๋””์˜ค๋Š” ํ•œ ๊ฐœ๋งŒ ์ฒ˜๋ฆฌํ•œ๋‹ค๋Š” ์ „์ œ (validate_media_constraints์—์„œ ์ด๋ฏธ ๊ฒ€์‚ฌ)
197
- # ์—ฌ๋Ÿฌ ๊ฐœ์ผ ๊ฒฝ์šฐ ์ฒซ ๋ฒˆ์งธ ๊ฒƒ๋งŒ ์ฒ˜๋ฆฌํ•˜๊ฑฐ๋‚˜, ๊ฒฝ๊ณ  ์ฒ˜๋ฆฌ
198
  content_list += process_video(video_files[0])
199
  return content_list
200
 
@@ -209,7 +238,9 @@ def process_new_user_message(message: dict) -> list[dict]:
209
 
210
  return content_list
211
 
212
-
 
 
213
  def process_history(history: list[dict]) -> list[dict]:
214
  messages = []
215
  current_user_content: list[dict] = []
@@ -227,7 +258,9 @@ def process_history(history: list[dict]) -> list[dict]:
227
  current_user_content.append({"type": "image", "url": content[0]})
228
  return messages
229
 
230
-
 
 
231
  @spaces.GPU(duration=120)
232
  def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
233
  if not validate_media_constraints(message, history):
@@ -262,7 +295,9 @@ def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tok
262
  output += delta
263
  yield output
264
 
265
-
 
 
266
  examples = [
267
  [
268
  {
@@ -385,22 +420,34 @@ examples = [
385
  ],
386
  ]
387
 
388
-
389
-
390
- # [PDF] .pdf ํ—ˆ์šฉ
391
  demo = gr.ChatInterface(
392
  fn=run,
393
  type="messages",
394
  chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
395
  textbox=gr.MultimodalTextbox(
396
- file_types=["image", ".mp4", ".pdf"], # [PDF] ํ—ˆ์šฉ
397
  file_count="multiple",
398
  autofocus=True
399
  ),
400
  multimodal=True,
401
  additional_inputs=[
402
- gr.Textbox(label="System Prompt", value="ou are a deeply thoughtful AI. Consider problems thoroughly and derive correct solutions through systematic reasoning. Please answer in korean."),
403
- gr.Slider(label="Max New Tokens", minimum=100, maximum=8000, step=50, value=2000),
 
 
 
 
 
 
 
 
 
 
 
 
404
  ],
405
  stop_btn=False,
406
  title="Gemma 3 27B IT",
 
16
 
17
  # [PDF] PyPDF2 ์ถ”๊ฐ€
18
  import PyPDF2
19
+ # [CSV] Pandas ์ถ”๊ฐ€
20
+ import pandas as pd
21
 
22
  model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
23
  processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
 
27
 
28
  MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
29
 
30
+ ###################################################################
31
+ # CSV๋ฅผ Markdown์œผ๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ์œ ํ‹ธ ํ•จ์ˆ˜
32
+ ###################################################################
33
+ def csv_to_markdown(csv_path: str) -> str:
34
+ """
35
+ CSV ํŒŒ์ผ ์ „์ฒด๋ฅผ ๋ฌธ์ž์—ด๋กœ ๋ณ€ํ™˜ํ•˜์—ฌ Markdown ํ˜•ํƒœ๋กœ ๋ฐ˜ํ™˜.
36
+ (๋งค์šฐ ํฐ CSV๋ผ๋ฉด ์ „์ฒด๋ฅผ ๋„˜๊ธฐ๋Š” ๊ฒƒ์ด ์œ„ํ—˜ํ•  ์ˆ˜ ์žˆ์Œ -> ํ•„์š” ์‹œ ์ž˜๋ผ๋‚ผ ๊ฒƒ)
37
+ """
38
+ try:
39
+ df = pd.read_csv(csv_path)
40
+ df_str = df.to_string()
41
+ # ํ•„์š”ํ•˜๋‹ค๋ฉด ๊ธธ์ด ์ œํ•œ์„ ๊ฑธ์–ด๋„ ๋จ
42
+ # if len(df_str) > 10000:
43
+ # df_str = df_str[:10000] + "\n...(truncated)..."
44
+
45
+ return f"**[CSV File: {os.path.basename(csv_path)}]**\n\n```\n{df_str}\n```"
46
+ except Exception as e:
47
+ return f"Failed to read CSV ({os.path.basename(csv_path)}): {str(e)}"
48
+
49
+ ###################################################################
50
+ # PDF -> Markdown ๋ณ€ํ™˜ ํ•จ์ˆ˜ (๊ธฐ์กด)
51
+ ###################################################################
52
+ def pdf_to_markdown(pdf_path: str) -> str:
53
+ """
54
+ PDF ํŒŒ์ผ์„ ํ…์ŠคํŠธ๋กœ ์ถ”์ถœ ํ›„, ๊ฐ„๋‹จํ•œ Markdown ํ˜•ํƒœ๋กœ ๋ฐ˜ํ™˜.
55
+ """
56
+ text_chunks = []
57
+ with open(pdf_path, "rb") as f:
58
+ reader = PyPDF2.PdfReader(f)
59
+ for page_num, page in enumerate(reader.pages, start=1):
60
+ page_text = page.extract_text()
61
+ page_text = page_text.strip() if page_text else ""
62
+ if page_text:
63
+ # ํŽ˜์ด์ง€๋ณ„๋กœ ๊ฐ„๋‹จํ•œ ํ—ค๋”์™€ ๋ณธ๋ฌธ์„ Markdown์œผ๋กœ ํ•ฉ์นจ
64
+ text_chunks.append(f"## Page {page_num}\n\n{page_text}\n")
65
+ return "\n".join(text_chunks)
66
 
67
+ ###################################################################
68
+ # ์ด๋ฏธ์ง€/๋น„๋””์˜ค ๊ฐœ์ˆ˜ ์นด์šดํŠธ (๊ธฐ์กด)
69
+ ###################################################################
70
  def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
71
  image_count = 0
72
  video_count = 0
 
77
  image_count += 1
78
  return image_count, video_count
79
 
 
80
  def count_files_in_history(history: list[dict]) -> tuple[int, int]:
81
  image_count = 0
82
  video_count = 0
 
89
  image_count += 1
90
  return image_count, video_count
91
 
92
+ ###################################################################
93
+ # ๋ฏธ๋””์–ด(์ด๋ฏธ์ง€/๋น„๋””์˜ค) ์ œํ•œ ๊ฒ€์‚ฌ + PDF/CSV ์˜ˆ์™ธ (๊ธฐ์กด/์ˆ˜์ •)
94
+ ###################################################################
95
  def validate_media_constraints(message: dict, history: list[dict]) -> bool:
96
  """
97
  ์ด๋ฏธ์ง€/๋น„๋””์˜ค ๊ฐœ์ˆ˜์™€ ํ˜ผํ•ฉ ์—ฌ๋ถ€ ๋“ฑ์„ ๊ฒ€์‚ฌํ•˜๋Š” ํ•จ์ˆ˜.
98
+ PDF, CSV ๋“ฑ์€ ๊ฒ€์‚ฌ ๋กœ์ง์—์„œ ์ œ์™ธํ•˜์—ฌ ์—…๋กœ๋“œ๋งŒ ํ—ˆ์šฉ.
99
  """
100
+ # pdf, csv ํŒŒ์ผ ์ œ์™ธ
101
  pdf_files = [f for f in message["files"] if f.endswith(".pdf")]
102
+ csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
103
+ non_pdf_csv_files = [f for f in message["files"]
104
+ if not f.endswith(".pdf") and not f.lower().endswith(".csv")]
105
 
106
+ # ๊ธฐ์กด ๋กœ์ง์€ ์ด๋ฏธ์ง€/๋น„๋””์˜ค์— ๋Œ€ํ•ด์„œ๋งŒ ์ฒดํฌ
107
+ new_image_count, new_video_count = count_files_in_new_message(non_pdf_csv_files)
108
  history_image_count, history_video_count = count_files_in_history(history)
109
  image_count = history_image_count + new_image_count
110
  video_count = history_video_count + new_video_count
 
119
  if "<image>" in message["text"]:
120
  gr.Warning("Using <image> tags with video files is not supported.")
121
  return False
 
122
 
123
  if video_count == 0 and image_count > MAX_NUM_IMAGES:
124
  gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
125
  return False
126
 
127
+ # <image> ํƒœ๊ทธ๊ฐ€ ์žˆ์„ ๊ฒฝ์šฐ, ์ด๋ฏธ์ง€ ์ˆ˜์™€ ํƒœ๊ทธ ์ˆ˜ ์ผ์น˜
 
 
 
128
  if "<image>" in message["text"]:
 
129
  if message["text"].count("<image>") != new_image_count:
130
  gr.Warning("The number of <image> tags in the text does not match the number of images.")
131
  return False
132
 
133
  return True
134
 
135
+ ###################################################################
136
+ # ๋™์˜์ƒ ์ฒ˜๋ฆฌ (๊ธฐ์กด)
137
+ ###################################################################
138
  def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
139
  vidcap = cv2.VideoCapture(video_path)
140
  fps = vidcap.get(cv2.CAP_PROP_FPS)
 
155
  vidcap.release()
156
  return frames
157
 
 
158
  def process_video(video_path: str) -> list[dict]:
159
  content = []
160
  frames = downsample_video(video_path)
 
167
  logger.debug(f"{content=}")
168
  return content
169
 
170
+ ###################################################################
171
+ # <image> ํƒœ๊ทธ interleaved ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ (๊ธฐ์กด)
172
+ ###################################################################
173
  def process_interleaved_images(message: dict) -> list[dict]:
174
  logger.debug(f"{message['files']=}")
175
  parts = re.split(r"(<image>)", message["text"])
 
190
  logger.debug(f"{content=}")
191
  return content
192
 
193
+ ###################################################################
194
+ # ์ƒˆ user message ์ฒ˜๋ฆฌ (PDF + CSV + ์ด๋ฏธ์ง€/๋น„๋””์˜ค)
195
+ ###################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  def process_new_user_message(message: dict) -> list[dict]:
 
 
 
197
  if not message["files"]:
198
  return [{"type": "text", "text": message["text"]}]
199
 
200
+ # PDF ํŒŒ์ผ ๋ชฉ๋ก
201
  pdf_files = [f for f in message["files"] if f.endswith(".pdf")]
202
+ # CSV ํŒŒ์ผ ๋ชฉ๋ก
203
+ csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
204
+ # ์ด๋ฏธ์ง€/๋น„๋””์˜ค (๊ธฐ์กด)
205
+ other_files = [f for f in message["files"]
206
+ if not f.endswith(".pdf") and not f.lower().endswith(".csv")]
207
 
208
+ # ์ผ๋‹จ ์‚ฌ์šฉ์ž์˜ text๋ฅผ ๋จผ์ € ๋„ฃ๋Š”๋‹ค
209
  content_list = [{"type": "text", "text": message["text"]}]
210
 
211
+ # [PDF] ๋ณ€ํ™˜ ํ›„ ์ถ”๊ฐ€
212
  for pdf_path in pdf_files:
213
  pdf_markdown = pdf_to_markdown(pdf_path)
214
  if pdf_markdown.strip():
 
216
  else:
217
  content_list.append({"type": "text", "text": "(PDF์—์„œ ํ…์ŠคํŠธ ์ถ”์ถœ ์‹คํŒจ)"})
218
 
219
+ # [CSV] ๋ณ€ํ™˜ ํ›„ ์ถ”๊ฐ€
220
+ for cfile in csv_files:
221
+ csv_md = csv_to_markdown(cfile)
222
+ content_list.append({"type": "text", "text": csv_md})
223
 
224
+ # ์˜์ƒ ์ฒ˜๋ฆฌ
225
  video_files = [f for f in other_files if f.endswith(".mp4")]
226
  if video_files:
 
 
227
  content_list += process_video(video_files[0])
228
  return content_list
229
 
 
238
 
239
  return content_list
240
 
241
+ ###################################################################
242
+ # ํžˆ์Šคํ† ๋ฆฌ -> LLM์šฉ ๋ฉ”์‹œ์ง€ ๋ณ€ํ™˜ (๊ธฐ์กด)
243
+ ###################################################################
244
  def process_history(history: list[dict]) -> list[dict]:
245
  messages = []
246
  current_user_content: list[dict] = []
 
258
  current_user_content.append({"type": "image", "url": content[0]})
259
  return messages
260
 
261
+ ###################################################################
262
+ # ๋ฉ”์ธ ์ถ”๋ก  ํ•จ์ˆ˜ (๊ธฐ์กด)
263
+ ###################################################################
264
  @spaces.GPU(duration=120)
265
  def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
266
  if not validate_media_constraints(message, history):
 
295
  output += delta
296
  yield output
297
 
298
+ ###################################################################
299
+ # ์˜ˆ์‹œ๋“ค (๊ธฐ์กด ๊ทธ๋Œ€๋กœ)
300
+ ###################################################################
301
  examples = [
302
  [
303
  {
 
420
  ],
421
  ]
422
 
423
+ ###################################################################
424
+ # PDF + CSV๋ฅผ ํ—ˆ์šฉํ•˜๋Š” Gradio ChatInterface
425
+ ###################################################################
426
  demo = gr.ChatInterface(
427
  fn=run,
428
  type="messages",
429
  chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
430
  textbox=gr.MultimodalTextbox(
431
+ file_types=["image", ".mp4", ".pdf", ".csv"], # pdf & csv ํ—ˆ์šฉ
432
  file_count="multiple",
433
  autofocus=True
434
  ),
435
  multimodal=True,
436
  additional_inputs=[
437
+ gr.Textbox(
438
+ label="System Prompt",
439
+ value=(
440
+ "You are a deeply thoughtful AI. Consider problems thoroughly and derive correct "
441
+ "solutions through systematic reasoning. Please answer in korean."
442
+ )
443
+ ),
444
+ gr.Slider(
445
+ label="Max New Tokens",
446
+ minimum=100,
447
+ maximum=8000,
448
+ step=50,
449
+ value=2000
450
+ ),
451
  ],
452
  stop_btn=False,
453
  title="Gemma 3 27B IT",