Spaces:
Runtime error
Runtime error
File size: 2,695 Bytes
fd4565a 7310f1a fd4565a 6871744 fd4565a 3d61420 22c0759 fd4565a 3d61420 22c0759 fd4565a d3bfdc0 fd4565a 3cb1d04 fd4565a eb6eee8 0489f33 3485483 eb6eee8 fd4565a eb6eee8 fd4565a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
from langchain_community.llms import CTransformers
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnableSequence
from langchain.chains import RetrievalQA
#from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from huggingface_hub import hf_hub_download
# !pip install llama-cpp-python
# from llama_cpp import Llama
# model_file = Llama.from_pretrained(
# repo_id="Pudding48/TinyLLamaTest",
# filename="tinyllama-1.1b-chat-v1.0.Q8_0.gguf",
# )
import os
cache_path = "/home/user/app/hf_cache"
os.makedirs(cache_path, exist_ok=True)
model_file = hf_hub_download(
repo_id="Pudding48/TinyLlamaTest",
filename="tinyllama-1.1b-chat-v1.0.Q8_0.gguf",
cache_dir=cache_path
)
# from transformers import AutoTokenizer, AutoModelForCausalLM
# tokenizer = AutoTokenizer.from_pretrained("vinai/PhoGPT-4B", trust_remote_code=True)
# model = AutoModelForCausalLM.from_pretrained("vinai/PhoGPT-4B", trust_remote_code=True)
# Vector store location
vector_dp_path = "/home/user/app/vectorstores/db_faiss"
request_model = "sentence-transformers/distiluse-base-multilingual-cased-v1"
from prepare_vector_dp import create_dp_from_files
#create_db_from_text()
create_dp_from_files(request_model)
# Load LLM with CTransformers
def load_llm(model_file):
return CTransformers(
model=model_file,
model_type="llama",
temperature=0.01,
config={'gpu_layers': 0},
max_new_tokens=128,
context_length=512
)
# Create the prompt
def creat_prompt(template):
return PromptTemplate(template=template, input_variables=["context", "question"])
# Create QA pipeline
def create_qa_chain(prompt, llm, db):
return RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=db.as_retriever(search_kwargs={"k": 1}),
return_source_documents=False,
chain_type_kwargs={'prompt': prompt}
)
# Load vector DB
def read_vector_db():
embedding_model = HuggingFaceEmbeddings(model_name = request_model)
return FAISS.load_local(vector_dp_path, embedding_model, allow_dangerous_deserialization=True)
# Build everything
db = read_vector_db()
llm = load_llm(model_file)
template = """<|im_start|>system\nSử dụng thông tin sau đây để trả lời câu hỏi. Nếu bạn không biết câu trả lời, hãy nói không biết, đừng cố tạo ra câu trả lời\n
{context}<|im_end|>\n<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant"""
prompt = creat_prompt(template)
llm_chain = create_qa_chain(prompt, llm, db) |