Update app.py
Browse files
app.py
CHANGED
@@ -90,6 +90,8 @@ vectordb = FAISS.load_local(CFG.Output_folder + '/faiss_index_ml_papers', embedd
|
|
90 |
def build_model(model_repo = CFG.model_name):
|
91 |
tokenizer = AutoTokenizer.from_pretrained(model_repo)
|
92 |
model = AutoModelForCausalLM.from_pretrained(model_repo, attn_implementation="flash_attention_2", torch_dtype=torch.bfloat16)
|
|
|
|
|
93 |
|
94 |
return tokenizer, model
|
95 |
|
@@ -112,8 +114,6 @@ terminators = [
|
|
112 |
# device = torch.device("cpu")
|
113 |
# print("Using CPU")
|
114 |
|
115 |
-
model = model.to(device)
|
116 |
-
|
117 |
pipe = pipeline(task="text-generation", model=model, tokenizer=tok, eos_token_id=terminators, do_sample=True, max_new_tokens=CFG.max_new_tokens, temperature=CFG.temperature, top_p=CFG.top_p, repetition_penalty=CFG.repetition_penalty)
|
118 |
|
119 |
llm = HuggingFacePipeline(pipeline = pipe)
|
|
|
90 |
def build_model(model_repo = CFG.model_name):
|
91 |
tokenizer = AutoTokenizer.from_pretrained(model_repo)
|
92 |
model = AutoModelForCausalLM.from_pretrained(model_repo, attn_implementation="flash_attention_2", torch_dtype=torch.bfloat16)
|
93 |
+
device = torch.device("cuda")
|
94 |
+
model = model.to(device)
|
95 |
|
96 |
return tokenizer, model
|
97 |
|
|
|
114 |
# device = torch.device("cpu")
|
115 |
# print("Using CPU")
|
116 |
|
|
|
|
|
117 |
pipe = pipeline(task="text-generation", model=model, tokenizer=tok, eos_token_id=terminators, do_sample=True, max_new_tokens=CFG.max_new_tokens, temperature=CFG.temperature, top_p=CFG.top_p, repetition_penalty=CFG.repetition_penalty)
|
118 |
|
119 |
llm = HuggingFacePipeline(pipeline = pipe)
|