Spaces:

deddoggo
/

chatbot_demo

Paused

App Files Files Community

deddoggo commited on Jun 29

Commit

f998afd

1 Parent(s): 268c26f

update

Browse files

Files changed (1) hide show

rag_pipeline.py +15 -8

rag_pipeline.py CHANGED Viewed

@@ -1,4 +1,5 @@
 # file: rag_pipeline.py
 import torch
 import json
 import faiss
@@ -20,11 +21,11 @@ def initialize_components(data_path):
     """
     print("--- Bắt đầu khởi tạo các thành phần ---")
-    # 1. Tải LLM và Tokenizer từ Unsloth
     print("1. Tải mô hình LLM (Unsloth)...")
     model, tokenizer = FastLanguageModel.from_pretrained(
         model_name="unsloth/Llama-3.2-11B-Vision-Instruct-unsloth-bnb-4bit",
-        max_seq_length=2048,
         dtype=None,
         load_in_4bit=True,
     )
@@ -81,15 +82,15 @@ def initialize_components(data_path):
 def generate_response(query, components):
     """
-    Tạo câu trả lời cho một query bằng cách sử dụng các thành phần đã được khởi tạo.
     """
-    print("--- Bắt đầu quy trình RAG cho query mới ---")
     # Unpack các thành phần
     llm_model = components["llm_model"]
     tokenizer = components["tokenizer"]
-    # 1. Truy xuất ngữ cảnh
     retrieved_results = search_relevant_laws(
         query_text=query,
         embedding_model=components["embedding_model"],
@@ -112,7 +113,7 @@ def generate_response(query, components):
             context_parts.append(f"{header}\n{text}")
         context = "\n\n---\n\n".join(context_parts)
-    # 3. Xây dựng Prompt và tạo câu trả lời
     prompt = f"""Dưới đây là một số thông tin trích dẫn từ văn bản luật giao thông đường bộ Việt Nam.
 Hãy SỬ DỤNG CÁC THÔNG TIN NÀY để trả lời câu hỏi một cách chính xác và đầy đủ.
 Nếu câu hỏi đưa ra nhiều đáp án thì chọn 1 đáp án đúng nhất.
@@ -126,7 +127,13 @@ Nếu câu hỏi đưa ra nhiều đáp án thì chọn 1 đáp án đúng nhấ
 ### Trả lời:"""
     print("--- Bắt đầu tạo câu trả lời từ LLM ---")
-    inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
     generation_config = dict(
         max_new_tokens=256,
@@ -145,4 +152,4 @@ Nếu câu hỏi đưa ra nhiều đáp án thì chọn 1 đáp án đúng nhấ
     response_text = tokenizer.decode(generated_ids, skip_special_tokens=True)
     print("--- Tạo câu trả lời hoàn tất ---")
-    return response_text

 # file: rag_pipeline.py
 import torch
 import json
 import faiss
     """
     print("--- Bắt đầu khởi tạo các thành phần ---")
+    # 1. Tải LLM và Tokenizer (Processor) từ Unsloth
     print("1. Tải mô hình LLM (Unsloth)...")
     model, tokenizer = FastLanguageModel.from_pretrained(
         model_name="unsloth/Llama-3.2-11B-Vision-Instruct-unsloth-bnb-4bit",
+        max_seq_length=4096, # Có thể tăng cho các mô hình mới
         dtype=None,
         load_in_4bit=True,
     )
 def generate_response(query, components):
     """
+    Tạo câu trả lời cho một query (single-turn) bằng cách sử dụng các thành phần đã được khởi tạo.
     """
+    print("--- Bắt đầu quy trình RAG (Single-turn) cho query mới ---")
     # Unpack các thành phần
     llm_model = components["llm_model"]
     tokenizer = components["tokenizer"]
+    # 1. Truy xuất ngữ cảnh trực tiếp từ câu hỏi của người dùng
     retrieved_results = search_relevant_laws(
         query_text=query,
         embedding_model=components["embedding_model"],
             context_parts.append(f"{header}\n{text}")
         context = "\n\n---\n\n".join(context_parts)
+    # 3. Xây dựng Prompt đơn giản (không có lịch sử trò chuyện)
     prompt = f"""Dưới đây là một số thông tin trích dẫn từ văn bản luật giao thông đường bộ Việt Nam.
 Hãy SỬ DỤNG CÁC THÔNG TIN NÀY để trả lời câu hỏi một cách chính xác và đầy đủ.
 Nếu câu hỏi đưa ra nhiều đáp án thì chọn 1 đáp án đúng nhất.
 ### Trả lời:"""
     print("--- Bắt đầu tạo câu trả lời từ LLM ---")
+    # SỬA LỖI CHO VISION MODEL: Sử dụng API tường minh
+    inputs = tokenizer(
+        text=prompt,
+        images=None,
+        return_tensors="pt"
+    ).to("cuda" if torch.cuda.is_available() else "cpu")
     generation_config = dict(
         max_new_tokens=256,
     response_text = tokenizer.decode(generated_ids, skip_special_tokens=True)
     print("--- Tạo câu trả lời hoàn tất ---")
+    return response_text