openfree commited on
Commit
9d13074
ยท
verified ยท
1 Parent(s): 3c39492

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -48
app.py CHANGED
@@ -35,7 +35,7 @@ import PyPDF2
35
 
36
  warnings.filterwarnings('ignore')
37
 
38
- print("๐ŸŽฎ ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™” (Gemma3-R1984-4B GGUF Q4_K_M)...")
39
 
40
  ##############################################################################
41
  # ์ƒ์ˆ˜ ์ •์˜
@@ -50,7 +50,7 @@ SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")
50
  ##############################################################################
51
  llm = None
52
  model_loaded = False
53
- model_name = "Gemma3-R1984-4B-Q4_K_M"
54
 
55
  ##############################################################################
56
  # ๋ฉ”๋ชจ๋ฆฌ ๊ด€๋ฆฌ
@@ -215,34 +215,73 @@ def image_to_base64_data_uri(image: Union[np.ndarray, Image.Image]) -> str:
215
  ##############################################################################
216
  def download_model_files():
217
  """Hugging Face Hub์—์„œ ๋ชจ๋ธ ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ"""
218
- model_repo = "VIDraft/Gemma-3-R1984-4B-GGUF"
219
-
220
- # ๋ฉ”์ธ ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ
221
- model_filename = "Gemma-3-R1984-4B.Q4_K_M.gguf"
222
- logger.info(f"๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ ์ค‘: {model_filename}")
223
-
224
- model_path = hf_hub_download(
225
- repo_id=model_repo,
226
- filename=model_filename,
227
- resume_download=True,
228
- local_files_only=False
229
- )
230
-
231
- # Vision projection ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ
232
- mmproj_filename = "Gemma-3-R1984-4B.mmproj-Q8_0.gguf"
233
- logger.info(f"Vision ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ ์ค‘: {mmproj_filename}")
234
-
235
- mmproj_path = hf_hub_download(
236
- repo_id=model_repo,
237
- filename=mmproj_filename,
238
- resume_download=True,
239
- local_files_only=False
240
- )
241
 
242
- logger.info(f"๋ชจ๋ธ ๊ฒฝ๋กœ: {model_path}")
243
- logger.info(f"Vision ๊ฒฝ๋กœ: {mmproj_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
- return model_path, mmproj_path
 
246
 
247
  @spaces.GPU(duration=120)
248
  def load_model():
@@ -253,7 +292,7 @@ def load_model():
253
  return True
254
 
255
  try:
256
- logger.info("Gemma3-R1984-4B GGUF Q4_K_M ๋ชจ๋ธ ๋กœ๋”ฉ ์‹œ์ž‘...")
257
  clear_cuda_cache()
258
 
259
  # ๋ชจ๋ธ ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ
@@ -262,26 +301,40 @@ def load_model():
262
  # GPU ์‚ฌ์šฉ ๊ฐ€๋Šฅ ์—ฌ๋ถ€ ํ™•์ธ
263
  n_gpu_layers = -1 if torch.cuda.is_available() else 0
264
 
265
- # ์ฑ„ํŒ… ํ•ธ๋“ค๋Ÿฌ ์ƒ์„ฑ (๋น„์ „ ์ง€์›)
266
- chat_handler = Llava16ChatHandler(
267
- clip_model_path=mmproj_path,
268
- verbose=False
269
- )
 
 
 
 
 
 
 
270
 
271
  # ๋ชจ๋ธ ๋กœ๋“œ
272
- llm = Llama(
273
- model_path=model_path,
274
- chat_handler=chat_handler,
275
- n_ctx=4096, # ์ปจํ…์ŠคํŠธ ํฌ๊ธฐ
276
- n_gpu_layers=n_gpu_layers, # GPU ๋ ˆ์ด์–ด
277
- n_threads=8, # CPU ์Šค๋ ˆ๋“œ
278
- verbose=False,
279
- seed=42,
280
- logits_all=True, # ๋น„์ „ ๋ชจ๋ธ์— ํ•„์š”
281
- )
 
 
 
 
 
282
 
283
  model_loaded = True
284
- logger.info(f"โœ… {model_name} ๋กœ๋”ฉ ์™„๋ฃŒ!")
 
 
285
  return True
286
 
287
  except Exception as e:
@@ -342,6 +395,31 @@ def analyze_image_for_robot(
342
  return "โŒ ๋ชจ๋ธ ๋กœ๋”ฉ ์‹คํŒจ"
343
 
344
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  # ์ด๋ฏธ์ง€๋ฅผ base64๋กœ ๋ณ€ํ™˜
346
  image_uri = image_to_base64_data_uri(image)
347
 
@@ -537,14 +615,14 @@ with gr.Blocks(title="๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ (Gemma3-4B GGUF)", css=css) a
537
  gr.HTML("""
538
  <div class="robot-header">
539
  <h1>๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ</h1>
540
- <h3>๐ŸŽฎ Gemma3-R1984-4B GGUF Q4_K_M + ๐Ÿ“ท ์‹ค์‹œ๊ฐ„ ์›น์บ  + ๐Ÿ” ์›น ๊ฒ€์ƒ‰</h3>
541
  <p>โšก ์–‘์žํ™” ๋ชจ๋ธ๋กœ ๋” ๋น ๋ฅด๊ณ  ํšจ์œจ์ ์ธ ๋กœ๋ด‡ ์ž‘์—… ๋ถ„์„!</p>
542
  </div>
543
  """)
544
 
545
  gr.HTML("""
546
  <div class="model-info">
547
- <strong>๋ชจ๋ธ:</strong> Gemma3-R1984-4B Q4_K_M (2.49GB) | <strong>๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ:</strong> ~3-4GB VRAM
548
  </div>
549
  """)
550
 
@@ -851,7 +929,7 @@ with gr.Blocks(title="๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ (Gemma3-4B GGUF)", css=css) a
851
  )
852
 
853
  if __name__ == "__main__":
854
- print("๐Ÿš€ ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ ์‹œ์ž‘ (Gemma3-R1984-4B GGUF Q4_K_M)...")
855
  demo.launch(
856
  server_name="0.0.0.0",
857
  server_port=7860,
 
35
 
36
  warnings.filterwarnings('ignore')
37
 
38
+ print("๐ŸŽฎ ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™” (Gemma3-4B GGUF Q4_K_M)...")
39
 
40
  ##############################################################################
41
  # ์ƒ์ˆ˜ ์ •์˜
 
50
  ##############################################################################
51
  llm = None
52
  model_loaded = False
53
+ model_name = "Gemma3-4B-GGUF-Q4_K_M"
54
 
55
  ##############################################################################
56
  # ๋ฉ”๋ชจ๋ฆฌ ๊ด€๋ฆฌ
 
215
  ##############################################################################
216
  def download_model_files():
217
  """Hugging Face Hub์—์„œ ๋ชจ๋ธ ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ"""
218
+ # ์—ฌ๋Ÿฌ ๊ฐ€๋Šฅํ•œ ์ €์žฅ์†Œ ์‹œ๋„
219
+ model_repos = [
220
+ # ์ฒซ ๋ฒˆ์งธ ์‹œ๋„: ์ผ๋ฐ˜์ ์ธ Gemma 3 4B GGUF
221
+ {
222
+ "repo": "Mungert/gemma-3-4b-it-gguf",
223
+ "model": "google_gemma-3-4b-it-q4_k_m.gguf",
224
+ "mmproj": "google_gemma-3-4b-it-mmproj-bf16.gguf"
225
+ },
226
+ # ๋‘ ๋ฒˆ์งธ ์‹œ๋„: LM Studio ๋ฒ„์ „
227
+ {
228
+ "repo": "lmstudio-community/gemma-3-4b-it-GGUF",
229
+ "model": "gemma-3-4b-it-Q4_K_M.gguf",
230
+ "mmproj": "gemma-3-4b-it-mmproj-f16.gguf"
231
+ },
232
+ # ์„ธ ๋ฒˆ์งธ ์‹œ๋„: unsloth ๋ฒ„์ „
233
+ {
234
+ "repo": "unsloth/gemma-3-4b-it-GGUF",
235
+ "model": "gemma-3-4b-it.Q4_K_M.gguf",
236
+ "mmproj": "gemma-3-4b-it.mmproj.gguf"
237
+ }
238
+ ]
 
 
239
 
240
+ for repo_info in model_repos:
241
+ try:
242
+ logger.info(f"์ €์žฅ์†Œ ์‹œ๋„: {repo_info['repo']}")
243
+
244
+ # ๋ฉ”์ธ ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ
245
+ model_filename = repo_info["model"]
246
+ logger.info(f"๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ ์ค‘: {model_filename}")
247
+
248
+ model_path = hf_hub_download(
249
+ repo_id=repo_info["repo"],
250
+ filename=model_filename,
251
+ resume_download=True,
252
+ local_files_only=False
253
+ )
254
+
255
+ # Vision projection ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ
256
+ mmproj_filename = repo_info["mmproj"]
257
+ logger.info(f"Vision ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ ์ค‘: {mmproj_filename}")
258
+
259
+ try:
260
+ mmproj_path = hf_hub_download(
261
+ repo_id=repo_info["repo"],
262
+ filename=mmproj_filename,
263
+ resume_download=True,
264
+ local_files_only=False
265
+ )
266
+ except:
267
+ # mmproj ํŒŒ์ผ์ด ์—†์„ ์ˆ˜๋„ ์žˆ์Œ
268
+ logger.warning(f"Vision ๋ชจ๋ธ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {mmproj_filename}")
269
+ logger.warning("ํ…์ŠคํŠธ ์ „์šฉ ๋ชจ๋“œ๋กœ ์ง„ํ–‰ํ•ฉ๋‹ˆ๋‹ค.")
270
+ mmproj_path = None
271
+
272
+ logger.info(f"โœ… ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ ์„ฑ๊ณต!")
273
+ logger.info(f"๋ชจ๋ธ ๊ฒฝ๋กœ: {model_path}")
274
+ if mmproj_path:
275
+ logger.info(f"Vision ๊ฒฝ๋กœ: {mmproj_path}")
276
+
277
+ return model_path, mmproj_path
278
+
279
+ except Exception as e:
280
+ logger.error(f"์ €์žฅ์†Œ {repo_info['repo']} ์‹œ๋„ ์‹คํŒจ: {e}")
281
+ continue
282
 
283
+ # ๋ชจ๋“  ์‹œ๋„๊ฐ€ ์‹คํŒจํ•œ ๊ฒฝ์šฐ
284
+ raise Exception("์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ GGUF ๋ชจ๋ธ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ์ธํ„ฐ๋„ท ์—ฐ๊ฒฐ์„ ํ™•์ธํ•˜์„ธ์š”.")
285
 
286
  @spaces.GPU(duration=120)
287
  def load_model():
 
292
  return True
293
 
294
  try:
295
+ logger.info("Gemma3-4B GGUF Q4_K_M ๋ชจ๋ธ ๋กœ๋”ฉ ์‹œ์ž‘...")
296
  clear_cuda_cache()
297
 
298
  # ๋ชจ๋ธ ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ
 
301
  # GPU ์‚ฌ์šฉ ๊ฐ€๋Šฅ ์—ฌ๋ถ€ ํ™•์ธ
302
  n_gpu_layers = -1 if torch.cuda.is_available() else 0
303
 
304
+ # ์ฑ„ํŒ… ํ•ธ๋“ค๋Ÿฌ ์ƒ์„ฑ (๋น„์ „ ์ง€์› - mmproj๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ๋งŒ)
305
+ chat_handler = None
306
+ if mmproj_path:
307
+ try:
308
+ chat_handler = Llava16ChatHandler(
309
+ clip_model_path=mmproj_path,
310
+ verbose=False
311
+ )
312
+ logger.info("โœ… Vision ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต")
313
+ except Exception as e:
314
+ logger.warning(f"Vision ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ, ํ…์ŠคํŠธ ์ „์šฉ ๋ชจ๋“œ๋กœ ์ „ํ™˜: {e}")
315
+ chat_handler = None
316
 
317
  # ๋ชจ๋ธ ๋กœ๋“œ
318
+ llm_params = {
319
+ "model_path": model_path,
320
+ "n_ctx": 4096, # ์ปจํ…์ŠคํŠธ ํฌ๊ธฐ
321
+ "n_gpu_layers": n_gpu_layers, # GPU ๋ ˆ์ด์–ด
322
+ "n_threads": 8, # CPU ์Šค๋ ˆ๋“œ
323
+ "verbose": False,
324
+ "seed": 42,
325
+ }
326
+
327
+ # chat_handler๊ฐ€ ์žˆ์œผ๋ฉด ์ถ”๊ฐ€
328
+ if chat_handler:
329
+ llm_params["chat_handler"] = chat_handler
330
+ llm_params["logits_all"] = True # ๋น„์ „ ๋ชจ๋ธ์— ํ•„์š”
331
+
332
+ llm = Llama(**llm_params)
333
 
334
  model_loaded = True
335
+ logger.info(f"โœ… Gemma3-4B ๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ!")
336
+ if not chat_handler:
337
+ logger.warning("โš ๏ธ ํ…์ŠคํŠธ ์ „์šฉ ๋ชจ๋“œ๋กœ ์‹คํ–‰ ์ค‘์ž…๋‹ˆ๋‹ค. ์ด๋ฏธ์ง€ ๋ถ„์„์ด ์ œํ•œ๋  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.")
338
  return True
339
 
340
  except Exception as e:
 
395
  return "โŒ ๋ชจ๋ธ ๋กœ๋”ฉ ์‹คํŒจ"
396
 
397
  try:
398
+ # Vision ๋ชจ๋ธ์ด ์—†๋Š” ๊ฒฝ์šฐ ๊ฒฝ๊ณ 
399
+ if not hasattr(llm, 'chat_handler') or llm.chat_handler is None:
400
+ logger.warning("Vision ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ๋ถ„์„๋งŒ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค.")
401
+
402
+ # ํ…์ŠคํŠธ ์ „์šฉ ๋ถ„์„
403
+ system_prompt = f"""๋‹น์‹ ์€ ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ ์‹œ๋ฎฌ๋ ˆ์ดํ„ฐ์ž…๋‹ˆ๋‹ค.
404
+ ์‹ค์ œ ์ด๋ฏธ์ง€๋ฅผ ๋ณผ ์ˆ˜๋Š” ์—†์ง€๋งŒ, ์‚ฌ์šฉ์ž์˜ ์„ค๋ช…์„ ๋ฐ”ํƒ•์œผ๋กœ ๋กœ๋ด‡ ์ž‘์—…์„ ๊ณ„ํšํ•˜๊ณ  ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.
405
+ ํƒœ์Šคํฌ ์œ ํ˜•: {task_type}"""
406
+
407
+ messages = [
408
+ {"role": "system", "content": system_prompt},
409
+ {"role": "user", "content": f"[์ด๋ฏธ์ง€ ๋ถ„์„ ์š”์ฒญ] {prompt}"}
410
+ ]
411
+
412
+ response = llm.create_chat_completion(
413
+ messages=messages,
414
+ max_tokens=max_new_tokens,
415
+ temperature=0.7,
416
+ top_p=0.9,
417
+ stream=False
418
+ )
419
+
420
+ result = response['choices'][0]['message']['content'].strip()
421
+ return f"โš ๏ธ ํ…์ŠคํŠธ ์ „์šฉ ๋ชจ๋“œ\n\n{result}"
422
+
423
  # ์ด๋ฏธ์ง€๋ฅผ base64๋กœ ๋ณ€ํ™˜
424
  image_uri = image_to_base64_data_uri(image)
425
 
 
615
  gr.HTML("""
616
  <div class="robot-header">
617
  <h1>๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ</h1>
618
+ <h3>๐ŸŽฎ Gemma3-4B GGUF Q4_K_M + ๐Ÿ“ท ์‹ค์‹œ๊ฐ„ ์›น์บ  + ๐Ÿ” ์›น ๊ฒ€์ƒ‰</h3>
619
  <p>โšก ์–‘์žํ™” ๋ชจ๋ธ๋กœ ๋” ๋น ๋ฅด๊ณ  ํšจ์œจ์ ์ธ ๋กœ๋ด‡ ์ž‘์—… ๋ถ„์„!</p>
620
  </div>
621
  """)
622
 
623
  gr.HTML("""
624
  <div class="model-info">
625
+ <strong>๋ชจ๋ธ:</strong> Gemma3-4B Q4_K_M (2.5GB) | <strong>๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ:</strong> ~3-4GB VRAM
626
  </div>
627
  """)
628
 
 
929
  )
930
 
931
  if __name__ == "__main__":
932
+ print("๐Ÿš€ ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ ์‹œ์ž‘ (Gemma3-4B GGUF Q4_K_M)...")
933
  demo.launch(
934
  server_name="0.0.0.0",
935
  server_port=7860,