Spaces:

Pudding48
/

TinyLLamaTesting

Runtime error

App Files Files Community

Pudding48 commited on Jul 6

Commit

b7371e3

verified ·

1 Parent(s): 3202c16

Update qabot.py

Browse files

Files changed (1) hide show

qabot.py +31 -40

qabot.py CHANGED Viewed

@@ -5,70 +5,61 @@ from langchain.chains import RetrievalQA
 from langchain_community.embeddings import GPT4AllEmbeddings
 from langchain_community.vectorstores import FAISS
-# from huggingface_hub import hf_hub_download
 # !pip install llama-cpp-python
-from llama_cpp import Llama
-model_file = Llama.from_pretrained(
-	repo_id="Pudding48/TinyLLamaTest",
-	filename="tinyllama-1.1b-chat-v1.0.Q8_0.gguf",
-)
-# model_file = hf_hub_download(
-#     repo_id="Pudding48/TinyLlamaTest",   # Replace with your model repo
-#     filename="tinyllama-1.1b-chat-v1.0.Q8_0.gguf",
-#     cache_dir="model"  # Will be created in the Space's environment
 # )
-# Cấu hình
-#model_file = "model/tinyllama-1.1b-chat-v1.0.Q8_0.gguf"
 vector_dp_path = "vectorstores/db_faiss"
-# Load LLM
 def load_llm(model_file):
-    llm = CTransformers(
         model=model_file,
         model_type="llama",
         temperature=0.01,
         config={'gpu_layers': 0},
-        max_new_tokens=128,
         context_length=512
     )
-    return llm
-# Tạo prompt template
 def creat_prompt(template):
-    prompt = PromptTemplate(template=template, input_variables=["context","question"])
-    return prompt
-# Tạo pipeline chain (thay cho LLMChain)
 def create_qa_chain(prompt, llm, db):
-    llm_chain = RetrievalQA.from_chain_type(
-        llm = llm,
-        chain_type = "stuff",
-        retriever =db.as_retriever(search_kwargs = {"k":1}),
-        return_source_documents = False,
-        chain_type_kwargs={'prompt':prompt}
     )
-    return llm_chain
 def read_vector_db():
-    embedding_model = GPT4AllEmbeddings(model_file = "model/all-minilm-l6-v2-q4_0.gguf")
-    db = FAISS.load_local(vector_dp_path, embedding_model,allow_dangerous_deserialization=True)
-    return db
 db = read_vector_db()
 llm = load_llm(model_file)
-# Mẫu prompt
 template = """<|im_start|>system\nSử dụng thông tin sau đây để trả lời câu hỏi. Nếu bạn không biết câu trả lời, hãy nói không biết, đừng cố tạo ra câu trả lời\n
-    {context}<|im_end|>\n<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant"""
-# Khởi tạo các thành phần
 prompt = creat_prompt(template)
-llm_chain  =create_qa_chain(prompt, llm, db)
-# Chạy thử chain
-question = "Khoa công nghệ thông tin thành lập năm nào ?"
-response = llm_chain.invoke({"query": question})
-print(response)

 from langchain_community.embeddings import GPT4AllEmbeddings
 from langchain_community.vectorstores import FAISS
+from huggingface_hub import hf_hub_download
 # !pip install llama-cpp-python
+# from llama_cpp import Llama
+# model_file = Llama.from_pretrained(
+# 	repo_id="Pudding48/TinyLLamaTest",
+# 	filename="tinyllama-1.1b-chat-v1.0.Q8_0.gguf",
 # )
+model_file = hf_hub_download(
+    repo_id="Pudding48/TinyLlamaTest",  # 🟢 This must be a model repo, not a Space
+    filename="tinyllama-1.1b-chat-v1.0.Q8_0.gguf",
+    cache_dir="model"
+)
+# Vector store location
 vector_dp_path = "vectorstores/db_faiss"
+# Load LLM with CTransformers
 def load_llm(model_file):
+    return CTransformers(
         model=model_file,
         model_type="llama",
         temperature=0.01,
         config={'gpu_layers': 0},
+        max_new_tokens=128,
         context_length=512
     )
+# Create the prompt
 def creat_prompt(template):
+    return PromptTemplate(template=template, input_variables=["context", "question"])
+# Create QA pipeline
 def create_qa_chain(prompt, llm, db):
+    return RetrievalQA.from_chain_type(
+        llm=llm,
+        chain_type="stuff",
+        retriever=db.as_retriever(search_kwargs={"k": 1}),
+        return_source_documents=False,
+        chain_type_kwargs={'prompt': prompt}
     )
+# Load vector DB
 def read_vector_db():
+    embedding_model = GPT4AllEmbeddings(model_file=model_file)
+    return FAISS.load_local(vector_dp_path, embedding_model, allow_dangerous_deserialization=True)
+# Build everything
 db = read_vector_db()
 llm = load_llm(model_file)
 template = """<|im_start|>system\nSử dụng thông tin sau đây để trả lời câu hỏi. Nếu bạn không biết câu trả lời, hãy nói không biết, đừng cố tạo ra câu trả lời\n
+{context}<|im_end|>\n<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant"""
 prompt = creat_prompt(template)
+llm_chain = create_qa_chain(prompt, llm, db)