Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -48,12 +48,17 @@ class ModelManager:
|
|
48 |
|
49 |
def get_llama_pipeline(self, model_id, token):
|
50 |
if model_id not in self.llama_pipelines:
|
51 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
|
|
|
|
|
|
|
|
52 |
model = AutoModelForCausalLM.from_pretrained(
|
53 |
model_id,
|
54 |
use_auth_token=token,
|
55 |
torch_dtype=torch.float16,
|
56 |
-
device_map="auto"
|
|
|
57 |
)
|
58 |
self.llama_pipelines[model_id] = pipeline(
|
59 |
"text-generation",
|
|
|
48 |
|
49 |
def get_llama_pipeline(self, model_id, token):
|
50 |
if model_id not in self.llama_pipelines:
|
51 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
52 |
+
model_id,
|
53 |
+
use_auth_token=token,
|
54 |
+
legacy=False # Important for compatibility
|
55 |
+
)
|
56 |
model = AutoModelForCausalLM.from_pretrained(
|
57 |
model_id,
|
58 |
use_auth_token=token,
|
59 |
torch_dtype=torch.float16,
|
60 |
+
device_map="auto",
|
61 |
+
low_cpu_mem_usage=True # Reduces memory pressure
|
62 |
)
|
63 |
self.llama_pipelines[model_id] = pipeline(
|
64 |
"text-generation",
|