Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -28,24 +28,24 @@ if device.type == 'cuda':
|
|
28 |
torch.cuda.set_per_process_memory_fraction(GPU_MEMORY_FRACTION, device=0)
|
29 |
|
30 |
# --- 2. Load model ---
|
31 |
-
from transformers import AutoProcessor, BitsAndBytesConfig
|
32 |
-
from transformers.models.qwen2_5_vl import Qwen2_5_VLForConditionalGeneration
|
33 |
-
|
34 |
-
bnb = BitsAndBytesConfig(
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
)
|
40 |
-
|
41 |
-
processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
|
42 |
-
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
).eval()
|
48 |
-
processor.tokenizer.pad_token_id = processor.tokenizer.eos_token_id
|
49 |
|
50 |
# --- 8. File handler ---
|
51 |
import traceback
|
@@ -112,41 +112,45 @@ def handle_file(file, prompt, extra_prompt, max_new_tokens, progress=gr.Progress
|
|
112 |
traceback.print_exc()
|
113 |
return "error", f"[ERROR] handle_file unexpected: {e}"
|
114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
def run_inference(img: Image.Image, prompt: str = "", max_new_tokens: int = 512) -> str:
|
116 |
if img.mode != "RGB":
|
117 |
img = img.convert("RGB")
|
118 |
-
|
119 |
-
|
120 |
-
messages = [{
|
121 |
-
"role": "user",
|
122 |
-
"content": [
|
123 |
-
{"type": "image", "image": img},
|
124 |
-
{"type": "text", "text": prompt_text}
|
125 |
-
]
|
126 |
-
}]
|
127 |
-
|
128 |
-
text_prompt = processor.apply_chat_template(
|
129 |
-
messages, tokenize=False, add_generation_prompt=True
|
130 |
-
)
|
131 |
-
|
132 |
-
inputs = processor(
|
133 |
-
text=[text_prompt], images=[img], return_tensors="pt", padding=True
|
134 |
-
).to(device)
|
135 |
-
|
136 |
-
with torch.inference_mode(), torch.cuda.amp.autocast(enabled=(device.type == 'cuda')):
|
137 |
-
gen = model.generate(
|
138 |
-
**inputs,
|
139 |
-
max_new_tokens=max_new_tokens,
|
140 |
-
do_sample=False,
|
141 |
-
eos_token_id=processor.tokenizer.eos_token_id
|
142 |
-
)
|
143 |
-
|
144 |
-
trimmed = [o[len(i):] for i, o in zip(inputs['input_ids'], gen)]
|
145 |
-
result = processor.tokenizer.batch_decode(
|
146 |
-
trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=True
|
147 |
-
)[0].strip()
|
148 |
-
|
149 |
-
return result
|
150 |
|
151 |
# --- 9. Prompt templates & JSON export ---
|
152 |
prompt_templates = {
|
|
|
28 |
torch.cuda.set_per_process_memory_fraction(GPU_MEMORY_FRACTION, device=0)
|
29 |
|
30 |
# --- 2. Load model ---
|
31 |
+
# from transformers import AutoProcessor, BitsAndBytesConfig
|
32 |
+
# from transformers.models.qwen2_5_vl import Qwen2_5_VLForConditionalGeneration
|
33 |
+
|
34 |
+
# bnb = BitsAndBytesConfig(
|
35 |
+
# load_in_4bit=True,
|
36 |
+
# bnb_4bit_use_double_quant=True,
|
37 |
+
# bnb_4bit_quant_type="nf4",
|
38 |
+
# bnb_4bit_compute_dtype=torch.float16
|
39 |
+
# )
|
40 |
+
|
41 |
+
# processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
|
42 |
+
# model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
43 |
+
# MODEL_ID,
|
44 |
+
# quantization_config=bnb,
|
45 |
+
# device_map="auto",
|
46 |
+
# trust_remote_code=True
|
47 |
+
# ).eval()
|
48 |
+
# processor.tokenizer.pad_token_id = processor.tokenizer.eos_token_id
|
49 |
|
50 |
# --- 8. File handler ---
|
51 |
import traceback
|
|
|
112 |
traceback.print_exc()
|
113 |
return "error", f"[ERROR] handle_file unexpected: {e}"
|
114 |
|
115 |
+
# def run_inference(img: Image.Image, prompt: str = "", max_new_tokens: int = 512) -> str:
|
116 |
+
# if img.mode != "RGB":
|
117 |
+
# img = img.convert("RGB")
|
118 |
+
# prompt_text = prompt.strip()
|
119 |
+
|
120 |
+
# messages = [{
|
121 |
+
# "role": "user",
|
122 |
+
# "content": [
|
123 |
+
# {"type": "image", "image": img},
|
124 |
+
# {"type": "text", "text": prompt_text}
|
125 |
+
# ]
|
126 |
+
# }]
|
127 |
+
|
128 |
+
# text_prompt = processor.apply_chat_template(
|
129 |
+
# messages, tokenize=False, add_generation_prompt=True
|
130 |
+
# )
|
131 |
+
|
132 |
+
# inputs = processor(
|
133 |
+
# text=[text_prompt], images=[img], return_tensors="pt", padding=True
|
134 |
+
# ).to(device)
|
135 |
+
|
136 |
+
# with torch.inference_mode(), torch.cuda.amp.autocast(enabled=(device.type == 'cuda')):
|
137 |
+
# gen = model.generate(
|
138 |
+
# **inputs,
|
139 |
+
# max_new_tokens=max_new_tokens,
|
140 |
+
# do_sample=False,
|
141 |
+
# eos_token_id=processor.tokenizer.eos_token_id
|
142 |
+
# )
|
143 |
+
|
144 |
+
# trimmed = [o[len(i):] for i, o in zip(inputs['input_ids'], gen)]
|
145 |
+
# result = processor.tokenizer.batch_decode(
|
146 |
+
# trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=True
|
147 |
+
# )[0].strip()
|
148 |
+
|
149 |
+
# return result
|
150 |
def run_inference(img: Image.Image, prompt: str = "", max_new_tokens: int = 512) -> str:
|
151 |
if img.mode != "RGB":
|
152 |
img = img.convert("RGB")
|
153 |
+
return f"[DEBUG] Ảnh nhận: size={img.size}, prompt='{prompt[:30]}...'"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
|
155 |
# --- 9. Prompt templates & JSON export ---
|
156 |
prompt_templates = {
|