tienanh2003 commited on
Commit
df641a4
·
verified ·
1 Parent(s): 140f03e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -50
app.py CHANGED
@@ -28,24 +28,24 @@ if device.type == 'cuda':
28
  torch.cuda.set_per_process_memory_fraction(GPU_MEMORY_FRACTION, device=0)
29
 
30
  # --- 2. Load model ---
31
- from transformers import AutoProcessor, BitsAndBytesConfig
32
- from transformers.models.qwen2_5_vl import Qwen2_5_VLForConditionalGeneration
33
-
34
- bnb = BitsAndBytesConfig(
35
- load_in_4bit=True,
36
- bnb_4bit_use_double_quant=True,
37
- bnb_4bit_quant_type="nf4",
38
- bnb_4bit_compute_dtype=torch.float16
39
- )
40
-
41
- processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
42
- model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
43
- MODEL_ID,
44
- quantization_config=bnb,
45
- device_map="auto",
46
- trust_remote_code=True
47
- ).eval()
48
- processor.tokenizer.pad_token_id = processor.tokenizer.eos_token_id
49
 
50
  # --- 8. File handler ---
51
  import traceback
@@ -112,41 +112,45 @@ def handle_file(file, prompt, extra_prompt, max_new_tokens, progress=gr.Progress
112
  traceback.print_exc()
113
  return "error", f"[ERROR] handle_file unexpected: {e}"
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  def run_inference(img: Image.Image, prompt: str = "", max_new_tokens: int = 512) -> str:
116
  if img.mode != "RGB":
117
  img = img.convert("RGB")
118
- prompt_text = prompt.strip()
119
-
120
- messages = [{
121
- "role": "user",
122
- "content": [
123
- {"type": "image", "image": img},
124
- {"type": "text", "text": prompt_text}
125
- ]
126
- }]
127
-
128
- text_prompt = processor.apply_chat_template(
129
- messages, tokenize=False, add_generation_prompt=True
130
- )
131
-
132
- inputs = processor(
133
- text=[text_prompt], images=[img], return_tensors="pt", padding=True
134
- ).to(device)
135
-
136
- with torch.inference_mode(), torch.cuda.amp.autocast(enabled=(device.type == 'cuda')):
137
- gen = model.generate(
138
- **inputs,
139
- max_new_tokens=max_new_tokens,
140
- do_sample=False,
141
- eos_token_id=processor.tokenizer.eos_token_id
142
- )
143
-
144
- trimmed = [o[len(i):] for i, o in zip(inputs['input_ids'], gen)]
145
- result = processor.tokenizer.batch_decode(
146
- trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=True
147
- )[0].strip()
148
-
149
- return result
150
 
151
  # --- 9. Prompt templates & JSON export ---
152
  prompt_templates = {
 
28
  torch.cuda.set_per_process_memory_fraction(GPU_MEMORY_FRACTION, device=0)
29
 
30
  # --- 2. Load model ---
31
+ # from transformers import AutoProcessor, BitsAndBytesConfig
32
+ # from transformers.models.qwen2_5_vl import Qwen2_5_VLForConditionalGeneration
33
+
34
+ # bnb = BitsAndBytesConfig(
35
+ # load_in_4bit=True,
36
+ # bnb_4bit_use_double_quant=True,
37
+ # bnb_4bit_quant_type="nf4",
38
+ # bnb_4bit_compute_dtype=torch.float16
39
+ # )
40
+
41
+ # processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
42
+ # model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
43
+ # MODEL_ID,
44
+ # quantization_config=bnb,
45
+ # device_map="auto",
46
+ # trust_remote_code=True
47
+ # ).eval()
48
+ # processor.tokenizer.pad_token_id = processor.tokenizer.eos_token_id
49
 
50
  # --- 8. File handler ---
51
  import traceback
 
112
  traceback.print_exc()
113
  return "error", f"[ERROR] handle_file unexpected: {e}"
114
 
115
+ # def run_inference(img: Image.Image, prompt: str = "", max_new_tokens: int = 512) -> str:
116
+ # if img.mode != "RGB":
117
+ # img = img.convert("RGB")
118
+ # prompt_text = prompt.strip()
119
+
120
+ # messages = [{
121
+ # "role": "user",
122
+ # "content": [
123
+ # {"type": "image", "image": img},
124
+ # {"type": "text", "text": prompt_text}
125
+ # ]
126
+ # }]
127
+
128
+ # text_prompt = processor.apply_chat_template(
129
+ # messages, tokenize=False, add_generation_prompt=True
130
+ # )
131
+
132
+ # inputs = processor(
133
+ # text=[text_prompt], images=[img], return_tensors="pt", padding=True
134
+ # ).to(device)
135
+
136
+ # with torch.inference_mode(), torch.cuda.amp.autocast(enabled=(device.type == 'cuda')):
137
+ # gen = model.generate(
138
+ # **inputs,
139
+ # max_new_tokens=max_new_tokens,
140
+ # do_sample=False,
141
+ # eos_token_id=processor.tokenizer.eos_token_id
142
+ # )
143
+
144
+ # trimmed = [o[len(i):] for i, o in zip(inputs['input_ids'], gen)]
145
+ # result = processor.tokenizer.batch_decode(
146
+ # trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=True
147
+ # )[0].strip()
148
+
149
+ # return result
150
  def run_inference(img: Image.Image, prompt: str = "", max_new_tokens: int = 512) -> str:
151
  if img.mode != "RGB":
152
  img = img.convert("RGB")
153
+ return f"[DEBUG] Ảnh nhận: size={img.size}, prompt='{prompt[:30]}...'"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
  # --- 9. Prompt templates & JSON export ---
156
  prompt_templates = {