Spaces:
Runtime error
Runtime error
Update api.py
Browse files
api.py
CHANGED
@@ -4,22 +4,24 @@ import os
|
|
4 |
from huggingface_hub import login
|
5 |
from flask import Flask, request, jsonify
|
6 |
|
7 |
-
#
|
8 |
login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
|
9 |
|
10 |
-
API_TOKEN = os.getenv("HF_API_TOKEN")
|
11 |
|
12 |
-
#
|
13 |
model_name = "cerebras/btlm-3b-8k-chat"
|
|
|
14 |
torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
|
15 |
os.environ['HF_HOME'] = '/tmp/cache'
|
16 |
|
17 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
18 |
model = AutoModelForCausalLM.from_pretrained(
|
19 |
model_name,
|
20 |
torch_dtype=torch_dtype,
|
21 |
device_map="auto",
|
22 |
-
trust_remote_code=True
|
|
|
23 |
)
|
24 |
|
25 |
generator = pipeline(
|
@@ -81,4 +83,4 @@ def chat_completions():
|
|
81 |
})
|
82 |
|
83 |
if __name__ == "__main__":
|
84 |
-
app.run(host="0.0.0.0", port=8080)
|
|
|
4 |
from huggingface_hub import login
|
5 |
from flask import Flask, request, jsonify
|
6 |
|
7 |
+
# Login with Hugging Face token
|
8 |
login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
|
9 |
|
10 |
+
API_TOKEN = os.getenv("HF_API_TOKEN")
|
11 |
|
12 |
+
# Model and loading config
|
13 |
model_name = "cerebras/btlm-3b-8k-chat"
|
14 |
+
revision = "main" # Pin to specific model revision
|
15 |
torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
|
16 |
os.environ['HF_HOME'] = '/tmp/cache'
|
17 |
|
18 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, revision=revision)
|
19 |
model = AutoModelForCausalLM.from_pretrained(
|
20 |
model_name,
|
21 |
torch_dtype=torch_dtype,
|
22 |
device_map="auto",
|
23 |
+
trust_remote_code=True,
|
24 |
+
revision=revision
|
25 |
)
|
26 |
|
27 |
generator = pipeline(
|
|
|
83 |
})
|
84 |
|
85 |
if __name__ == "__main__":
|
86 |
+
app.run(host="0.0.0.0", port=8080)
|