File size: 2,202 Bytes
4772d70
 
 
 
 
 
 
b7371e3
4772d70
 
b7371e3
4772d70
b7371e3
 
 
4772d70
 
b7371e3
 
 
 
 
 
 
4772d70
 
707a7bc
 
 
b7371e3
4772d70
b7371e3
4772d70
 
 
 
b7371e3
4772d70
 
 
b7371e3
4772d70
b7371e3
4772d70
b7371e3
4772d70
b7371e3
 
 
 
 
 
4772d70
 
b7371e3
4772d70
b7371e3
 
4772d70
b7371e3
4772d70
 
b7371e3
4772d70
b7371e3
4772d70
 
b7371e3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from langchain_community.llms import CTransformers
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnableSequence
from langchain.chains import RetrievalQA
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.vectorstores import FAISS

from huggingface_hub import hf_hub_download
# !pip install llama-cpp-python

# from llama_cpp import Llama

# model_file = Llama.from_pretrained(
# 	repo_id="Pudding48/TinyLLamaTest",
# 	filename="tinyllama-1.1b-chat-v1.0.Q8_0.gguf",
# )

model_file = hf_hub_download(
    repo_id="Pudding48/TinyLlamaTest",  # 🟢 This must be a model repo, not a Space
    filename="tinyllama-1.1b-chat-v1.0.Q8_0.gguf",
    cache_dir="model"
)

# Vector store location
vector_dp_path = "vectorstores/db_faiss"

from prepare_vector_dp import create_db_from_text
create_db_from_text()

# Load LLM with CTransformers
def load_llm(model_file):
    return CTransformers(
        model=model_file,
        model_type="llama",
        temperature=0.01,
        config={'gpu_layers': 0},
        max_new_tokens=128,
        context_length=512
    )

# Create the prompt
def creat_prompt(template):
    return PromptTemplate(template=template, input_variables=["context", "question"])

# Create QA pipeline
def create_qa_chain(prompt, llm, db):
    return RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=db.as_retriever(search_kwargs={"k": 1}),
        return_source_documents=False,
        chain_type_kwargs={'prompt': prompt}
    )

# Load vector DB
def read_vector_db():
    embedding_model = GPT4AllEmbeddings(model_file=model_file)
    return FAISS.load_local(vector_dp_path, embedding_model, allow_dangerous_deserialization=True)

# Build everything
db = read_vector_db()
llm = load_llm(model_file)

template = """<|im_start|>system\nSử dụng thông tin sau đây để trả lời câu hỏi. Nếu bạn không biết câu trả lời, hãy nói không biết, đừng cố tạo ra câu trả lời\n
{context}<|im_end|>\n<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant"""

prompt = creat_prompt(template)
llm_chain = create_qa_chain(prompt, llm, db)