Phi3-ORPO

Paused

justinj92 commited on May 7, 2024

Commit

54a2331

verified ·

1 Parent(s): d2f3905

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -90,6 +90,8 @@ vectordb = FAISS.load_local(CFG.Output_folder + '/faiss_index_ml_papers', embedd
 def build_model(model_repo = CFG.model_name):
     tokenizer = AutoTokenizer.from_pretrained(model_repo)
     model = AutoModelForCausalLM.from_pretrained(model_repo, attn_implementation="flash_attention_2", torch_dtype=torch.bfloat16)
     return tokenizer, model
@@ -112,8 +114,6 @@ terminators = [
 #     device = torch.device("cpu")
 #     print("Using CPU")
-model = model.to(device)
 pipe = pipeline(task="text-generation", model=model, tokenizer=tok, eos_token_id=terminators, do_sample=True, max_new_tokens=CFG.max_new_tokens, temperature=CFG.temperature, top_p=CFG.top_p, repetition_penalty=CFG.repetition_penalty)
 llm = HuggingFacePipeline(pipeline = pipe)

 def build_model(model_repo = CFG.model_name):
     tokenizer = AutoTokenizer.from_pretrained(model_repo)
     model = AutoModelForCausalLM.from_pretrained(model_repo, attn_implementation="flash_attention_2", torch_dtype=torch.bfloat16)
+    device = torch.device("cuda")
+    model = model.to(device)
     return tokenizer, model
 #     device = torch.device("cpu")
 #     print("Using CPU")
 pipe = pipeline(task="text-generation", model=model, tokenizer=tok, eos_token_id=terminators, do_sample=True, max_new_tokens=CFG.max_new_tokens, temperature=CFG.temperature, top_p=CFG.top_p, repetition_penalty=CFG.repetition_penalty)
 llm = HuggingFacePipeline(pipeline = pipe)