justinj92 commited on
Commit
54a2331
·
verified ·
1 Parent(s): d2f3905

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -90,6 +90,8 @@ vectordb = FAISS.load_local(CFG.Output_folder + '/faiss_index_ml_papers', embedd
90
  def build_model(model_repo = CFG.model_name):
91
  tokenizer = AutoTokenizer.from_pretrained(model_repo)
92
  model = AutoModelForCausalLM.from_pretrained(model_repo, attn_implementation="flash_attention_2", torch_dtype=torch.bfloat16)
 
 
93
 
94
  return tokenizer, model
95
 
@@ -112,8 +114,6 @@ terminators = [
112
  # device = torch.device("cpu")
113
  # print("Using CPU")
114
 
115
- model = model.to(device)
116
-
117
  pipe = pipeline(task="text-generation", model=model, tokenizer=tok, eos_token_id=terminators, do_sample=True, max_new_tokens=CFG.max_new_tokens, temperature=CFG.temperature, top_p=CFG.top_p, repetition_penalty=CFG.repetition_penalty)
118
 
119
  llm = HuggingFacePipeline(pipeline = pipe)
 
90
  def build_model(model_repo = CFG.model_name):
91
  tokenizer = AutoTokenizer.from_pretrained(model_repo)
92
  model = AutoModelForCausalLM.from_pretrained(model_repo, attn_implementation="flash_attention_2", torch_dtype=torch.bfloat16)
93
+ device = torch.device("cuda")
94
+ model = model.to(device)
95
 
96
  return tokenizer, model
97
 
 
114
  # device = torch.device("cpu")
115
  # print("Using CPU")
116
 
 
 
117
  pipe = pipeline(task="text-generation", model=model, tokenizer=tok, eos_token_id=terminators, do_sample=True, max_new_tokens=CFG.max_new_tokens, temperature=CFG.temperature, top_p=CFG.top_p, repetition_penalty=CFG.repetition_penalty)
118
 
119
  llm = HuggingFacePipeline(pipeline = pipe)