david-thrower commited on
Commit
ed7e400
·
verified ·
1 Parent(s): 3e4dc8c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -6
app.py CHANGED
@@ -10,11 +10,10 @@ print("Loading tokenizer & model…")
10
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
11
  # model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16).to(DEVICE)
12
 
13
- model = AutoModelForCausalLM.from_pretrained(
14
- MODEL_ID,
15
- # load_in_8bit=True, # or try load_in_4bit=True
16
- # device_map=DEVICE
17
- )
18
 
19
  #########
20
 
@@ -68,7 +67,7 @@ def chat_fn(history, enable_thinking, temperature, top_p, top_k, repetition_pena
68
  add_generation_prompt=True,
69
  # xml_tools=TOOLS
70
  )
71
- inputs = tokenizer(text, return_tensors="pt").to(DEVICE)
72
  with torch.inference_mode():
73
  streamer = model.generate(
74
  **inputs,
 
10
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
11
  # model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16).to(DEVICE)
12
 
13
+ model =\
14
+ AutoModelForCausalLM\
15
+ .from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16)\
16
+ .to(DEVICE)
 
17
 
18
  #########
19
 
 
67
  add_generation_prompt=True,
68
  # xml_tools=TOOLS
69
  )
70
+ inputs = tokenizer(text, return_tensors="pt")
71
  with torch.inference_mode():
72
  streamer = model.generate(
73
  **inputs,