Bils commited on
Commit
8bda130
·
verified ·
1 Parent(s): c9a2029

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -2
app.py CHANGED
@@ -48,12 +48,17 @@ class ModelManager:
48
 
49
  def get_llama_pipeline(self, model_id, token):
50
  if model_id not in self.llama_pipelines:
51
- tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
 
 
 
 
52
  model = AutoModelForCausalLM.from_pretrained(
53
  model_id,
54
  use_auth_token=token,
55
  torch_dtype=torch.float16,
56
- device_map="auto"
 
57
  )
58
  self.llama_pipelines[model_id] = pipeline(
59
  "text-generation",
 
48
 
49
  def get_llama_pipeline(self, model_id, token):
50
  if model_id not in self.llama_pipelines:
51
+ tokenizer = AutoTokenizer.from_pretrained(
52
+ model_id,
53
+ use_auth_token=token,
54
+ legacy=False # Important for compatibility
55
+ )
56
  model = AutoModelForCausalLM.from_pretrained(
57
  model_id,
58
  use_auth_token=token,
59
  torch_dtype=torch.float16,
60
+ device_map="auto",
61
+ low_cpu_mem_usage=True # Reduces memory pressure
62
  )
63
  self.llama_pipelines[model_id] = pipeline(
64
  "text-generation",