Update app.py
Browse files
app.py
CHANGED
@@ -21,14 +21,14 @@ logger = logging.getLogger(__name__)
|
|
21 |
# Initialize BitNet model and tokenizer
|
22 |
try:
|
23 |
model_name = "1bitLLM/bitnet_b1_58-3B"
|
24 |
-
# Fallback to LLaMA tokenizer
|
25 |
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer", cache_dir="/app/cache")
|
26 |
model = AutoModelForCausalLM.from_pretrained(
|
27 |
model_name,
|
28 |
torch_dtype=torch.float32,
|
29 |
device_map="cpu",
|
|
|
30 |
cache_dir="/app/cache",
|
31 |
-
trust_remote_code=True
|
32 |
)
|
33 |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
|
34 |
except Exception as e:
|
|
|
21 |
# Initialize BitNet model and tokenizer
|
22 |
try:
|
23 |
model_name = "1bitLLM/bitnet_b1_58-3B"
|
|
|
24 |
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer", cache_dir="/app/cache")
|
25 |
model = AutoModelForCausalLM.from_pretrained(
|
26 |
model_name,
|
27 |
torch_dtype=torch.float32,
|
28 |
device_map="cpu",
|
29 |
+
low_cpu_mem_usage=True,
|
30 |
cache_dir="/app/cache",
|
31 |
+
trust_remote_code=True
|
32 |
)
|
33 |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
|
34 |
except Exception as e:
|