Spaces:

sunbv56
/

demo-qwen2.5-vl-vqa-vibook

Sleeping

App Files Files Community

sunbv56 commited on Jun 20

Commit

aa2e87f

verified ·

1 Parent(s): 8305d80

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -52

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py (Phiên bản cuối cùng đã sửa lỗi và cảnh báo)
 import gradio as gr
 import torch
@@ -7,17 +7,16 @@ from transformers import AutoModelForImageTextToText, AutoProcessor
 from gradio.events import SelectData
 import warnings
 import os
-from urllib.request import urlretrieve
 warnings.filterwarnings("ignore", category=UserWarning, message="Overriding torch_dtype=None")
 # --- 1. Tải Model và Processor ---
-MODEL_ID = "sunbv56/qwen2.5-vl-vqa-vibook"
 print(f"🚀 Đang tải model '{MODEL_ID}' và processor...")
 try:
     dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float16
     model = AutoModelForImageTextToText.from_pretrained(MODEL_ID, torch_dtype=dtype, device_map="auto", trust_remote_code=True)
-    # SỬA LỖI 3: Thêm use_fast=True để tắt cảnh báo
     processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True, use_fast=True)
     model.eval()
     print(f"✅ Model và processor đã được tải thành công!")
@@ -32,19 +31,12 @@ def process_vqa(image: Image.Image, question: str):
     messages = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": question}]}]
     prompt_text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     model_inputs = processor(text=[prompt_text], images=[image], return_tensors="pt").to(model.device)
-    generated_ids = model.generate(
-        **model_inputs,
-        max_new_tokens=1024,
-        do_sample=False,
-        eos_token_id=processor.tokenizer.eos_token_id,
-        pad_token_id=processor.tokenizer.pad_token_id
-    )
     generated_ids = generated_ids[:, model_inputs['input_ids'].shape[1]:]
     response = processor.tokenizer.decode(generated_ids[0], skip_special_tokens=True).strip()
     return response
 # --- 3. Logic Chatbot ---
-# Hàm dành cho việc người dùng tự nhập câu hỏi
 def manual_chat_responder(user_question: str, chat_history: list, uploaded_image: Image.Image):
     if uploaded_image is None:
         gr.Warning("Vui lòng tải ảnh lên trước để đặt câu hỏi về nó.")
@@ -52,30 +44,12 @@ def manual_chat_responder(user_question: str, chat_history: list, uploaded_image
     if not user_question or not user_question.strip():
         gr.Warning("Vui lòng nhập một câu hỏi.")
         return "", chat_history
-    # SỬA LỖI 2: Sử dụng định dạng `messages` mới
     chat_history.append({"role": "user", "content": user_question})
     yield "", chat_history
     bot_response = process_vqa(uploaded_image, user_question)
     chat_history.append({"role": "assistant", "content": bot_response})
     yield "", chat_history
-# Hàm dành riêng cho việc xử lý khi nhấn vào ví dụ
-def run_example(example_list: list, evt: SelectData):
-    selected_example = example_list[evt.index]
-    image_path, question = selected_example
-    gr.Info(f"Đang chạy ví dụ: \"{question}\"")
-    image = Image.open(image_path).convert("RGB")
-    # SỬA LỖI 2: Bắt đầu cuộc trò chuyện với định dạng `messages` mới
-    chat_history = [{"role": "user", "content": question}]
-    bot_response = process_vqa(image, question)
-    chat_history.append({"role": "assistant", "content": bot_response})
-    return image, question, chat_history
 def clear_chat():
     return []
@@ -89,33 +63,42 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), ti
         ["./assets/book_example_2.jpg", "tác giả và tên của cuốn sách là gì?"],
     ]
     with gr.Row(equal_height=False):
         with gr.Column(scale=1, min_width=350):
             gr.Markdown("### Bảng điều khiển")
             image_input = gr.Image(type="pil", label="Tải ảnh lên", sources=["upload", "clipboard"])
             gr.Markdown("---")
             gr.Markdown("### Ví dụ (Nhấn để chạy)")
-            example_dataset = gr.Dataset(
-                components=[gr.Image(visible=False), gr.Textbox(visible=False)],
-                samples=example_list,
-                label="Ví dụ",
-                type="index"
-            )
         with gr.Column(scale=2):
-            # SỬA LỖI 2: Thêm type="messages" và khởi tạo giá trị
-            chatbot = gr.Chatbot(
-                label="Cuộc trò chuyện",
-                bubble_full_width=False,
-                height=600,
-                avatar_images=(None, "https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png"),
-                type="messages",
-                value=[]
-            )
             question_input = gr.Textbox(label="Hoặc nhập câu hỏi về ảnh đã tải lên", placeholder="Nhập câu hỏi và nhấn Enter...", container=False, scale=7)
     # --- 5. Xử lý Sự kiện ---
     question_input.submit(fn=manual_chat_responder, inputs=[question_input, chatbot, image_input], outputs=[question_input, chatbot])
-    example_dataset.select(fn=run_example, inputs=[example_dataset], outputs=[image_input, question_input, chatbot], show_progress="full")
     image_input.upload(fn=clear_chat, inputs=None, outputs=[chatbot])
     image_input.clear(fn=clear_chat, inputs=None, outputs=[chatbot])
@@ -126,18 +109,23 @@ if __name__ == "__main__":
         os.makedirs(ASSETS_DIR)
         print("Đã tạo thư mục 'assets' cho các hình ảnh ví dụ.")
-    # SỬA LỖI 1: Thêm định nghĩa EXAMPLE_FILES bị thiếu
     EXAMPLE_FILES = {
-        "book_example_1.jpg": "https://huggingface.co/spaces/sunbv56/demo-qwen2.5-vl-vqa-vibook/resolve/main/assets/book_example_1.jpg",
-        "book_example_2.jpg": "https://huggingface.co/spaces/sunbv56/demo-qwen2.5-vl-vqa-vibook/resolve/main/assets/book_example_2.jpg"
     }
     for filename, url in EXAMPLE_FILES.items():
         filepath = os.path.join(ASSETS_DIR, filename)
         if not os.path.exists(filepath):
             print(f"Đang tải xuống hình ảnh ví dụ: {filename}...")
-            # Sửa lỗi logic tải file
-            urlretrieve(url, filepath)
-            print("...Đã xong.")
     demo.launch(debug=True)

+# app.py (Phiên bản cuối cùng đã sửa lỗi TypeError và các cảnh báo)
 import gradio as gr
 import torch
 from gradio.events import SelectData
 import warnings
 import os
+import requests
 warnings.filterwarnings("ignore", category=UserWarning, message="Overriding torch_dtype=None")
 # --- 1. Tải Model và Processor ---
+MODEL_ID = "sunbv56/qwen2.5-vl-vqa-vibook-lora-merged"
 print(f"🚀 Đang tải model '{MODEL_ID}' và processor...")
 try:
     dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float16
     model = AutoModelForImageTextToText.from_pretrained(MODEL_ID, torch_dtype=dtype, device_map="auto", trust_remote_code=True)
     processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True, use_fast=True)
     model.eval()
     print(f"✅ Model và processor đã được tải thành công!")
     messages = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": question}]}]
     prompt_text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     model_inputs = processor(text=[prompt_text], images=[image], return_tensors="pt").to(model.device)
+    generated_ids = model.generate(**model_inputs, max_new_tokens=1024, do_sample=False, eos_token_id=processor.tokenizer.eos_token_id, pad_token_id=processor.tokenizer.pad_token_id)
     generated_ids = generated_ids[:, model_inputs['input_ids'].shape[1]:]
     response = processor.tokenizer.decode(generated_ids[0], skip_special_tokens=True).strip()
     return response
 # --- 3. Logic Chatbot ---
 def manual_chat_responder(user_question: str, chat_history: list, uploaded_image: Image.Image):
     if uploaded_image is None:
         gr.Warning("Vui lòng tải ảnh lên trước để đặt câu hỏi về nó.")
     if not user_question or not user_question.strip():
         gr.Warning("Vui lòng nhập một câu hỏi.")
         return "", chat_history
     chat_history.append({"role": "user", "content": user_question})
     yield "", chat_history
     bot_response = process_vqa(uploaded_image, user_question)
     chat_history.append({"role": "assistant", "content": bot_response})
     yield "", chat_history
 def clear_chat():
     return []
         ["./assets/book_example_2.jpg", "tác giả và tên của cuốn sách là gì?"],
     ]
+    # SỬA LỖI: Di chuyển hàm xử lý ví dụ vào bên trong khối `with`
+    # để nó có thể truy cập `example_list` từ phạm vi bên ngoài.
+    def run_example(evt: SelectData):
+        # `example_list` giờ đây là biến list chính xác từ phạm vi ngoài
+        selected_example = example_list[evt.index]
+        image_path, question = selected_example
+        gr.Info(f"Đang chạy ví dụ: \"{question}\"")
+        image = Image.open(image_path).convert("RGB")
+        chat_history = [{"role": "user", "content": question}]
+        bot_response = process_vqa(image, question)
+        chat_history.append({"role": "assistant", "content": bot_response})
+        return image, question, chat_history
     with gr.Row(equal_height=False):
         with gr.Column(scale=1, min_width=350):
             gr.Markdown("### Bảng điều khiển")
             image_input = gr.Image(type="pil", label="Tải ảnh lên", sources=["upload", "clipboard"])
             gr.Markdown("---")
             gr.Markdown("### Ví dụ (Nhấn để chạy)")
+            example_dataset = gr.Dataset(components=[gr.Image(visible=False), gr.Textbox(visible=False)], samples=example_list, label="Ví dụ", type="index")
         with gr.Column(scale=2):
+            # Sửa cảnh báo: Xóa `bubble_full_width` đã lỗi thời
+            chatbot = gr.Chatbot(label="Cuộc trò chuyện", height=600, avatar_images=(None, "https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png"), type="messages", value=[])
             question_input = gr.Textbox(label="Hoặc nhập câu hỏi về ảnh đã tải lên", placeholder="Nhập câu hỏi và nhấn Enter...", container=False, scale=7)
     # --- 5. Xử lý Sự kiện ---
     question_input.submit(fn=manual_chat_responder, inputs=[question_input, chatbot, image_input], outputs=[question_input, chatbot])
+    # SỬA LỖI: Loại bỏ `inputs` để hàm chỉ nhận `evt`
+    example_dataset.select(
+        fn=run_example,
+        inputs=None,
+        outputs=[image_input, question_input, chatbot],
+        show_progress="full"
+    )
     image_input.upload(fn=clear_chat, inputs=None, outputs=[chatbot])
     image_input.clear(fn=clear_chat, inputs=None, outputs=[chatbot])
         os.makedirs(ASSETS_DIR)
         print("Đã tạo thư mục 'assets' cho các hình ảnh ví dụ.")
     EXAMPLE_FILES = {
+        "book_example_1.jpg": "https://cdn0.fahasa.com/media/catalog/product/d/i/dieu-ky-dieu-cua-tiem-tap-hoa-namiya---tai-ban-2020.jpg",
+        "book_example_2.jpg": "https://cdn0.fahasa.com/media/catalog/product/d/r/dr.-stone_bia_tap-26.jpg"
     }
+    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"}
     for filename, url in EXAMPLE_FILES.items():
         filepath = os.path.join(ASSETS_DIR, filename)
         if not os.path.exists(filepath):
             print(f"Đang tải xuống hình ảnh ví dụ: {filename}...")
+            try:
+                response = requests.get(url, headers=headers, timeout=10)
+                response.raise_for_status()
+                with open(filepath, 'wb') as f:
+                    f.write(response.content)
+                print("...Đã xong.")
+            except requests.exceptions.RequestException as e:
+                print(f" Lỗi khi tải {filename}: {e}")
     demo.launch(debug=True)