Euryeth commited on
Commit
85b0ca0
·
verified ·
1 Parent(s): a5e8a2b

Update api.py

Browse files
Files changed (1) hide show
  1. api.py +8 -6
api.py CHANGED
@@ -4,22 +4,24 @@ import os
4
  from huggingface_hub import login
5
  from flask import Flask, request, jsonify
6
 
7
- # Authenticate with Hugging Face token from Secrets
8
  login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
9
 
10
- API_TOKEN = os.getenv("HF_API_TOKEN") # Set this token in your Space Secrets
11
 
12
- # Setup
13
  model_name = "cerebras/btlm-3b-8k-chat"
 
14
  torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
15
  os.environ['HF_HOME'] = '/tmp/cache'
16
 
17
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
18
  model = AutoModelForCausalLM.from_pretrained(
19
  model_name,
20
  torch_dtype=torch_dtype,
21
  device_map="auto",
22
- trust_remote_code=True
 
23
  )
24
 
25
  generator = pipeline(
@@ -81,4 +83,4 @@ def chat_completions():
81
  })
82
 
83
  if __name__ == "__main__":
84
- app.run(host="0.0.0.0", port=8080)
 
4
  from huggingface_hub import login
5
  from flask import Flask, request, jsonify
6
 
7
+ # Login with Hugging Face token
8
  login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
9
 
10
+ API_TOKEN = os.getenv("HF_API_TOKEN")
11
 
12
+ # Model and loading config
13
  model_name = "cerebras/btlm-3b-8k-chat"
14
+ revision = "main" # Pin to specific model revision
15
  torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
16
  os.environ['HF_HOME'] = '/tmp/cache'
17
 
18
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, revision=revision)
19
  model = AutoModelForCausalLM.from_pretrained(
20
  model_name,
21
  torch_dtype=torch_dtype,
22
  device_map="auto",
23
+ trust_remote_code=True,
24
+ revision=revision
25
  )
26
 
27
  generator = pipeline(
 
83
  })
84
 
85
  if __name__ == "__main__":
86
+ app.run(host="0.0.0.0", port=8080)