DesiredName commited on
Commit
e9ad359
·
verified ·
1 Parent(s): 4545ff6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -18
app.py CHANGED
@@ -1,13 +1,11 @@
1
  from fastapi import FastAPI
2
  import uvicorn
3
- from awq import AutoAWQForCausalLM
4
- from transformers import AutoTokenizer
5
 
6
- model_name = "TheBloke/Guanaco-7B-Uncensored-AWQ"
7
 
8
- model = AutoAWQForCausalLM.from_quantized(model_name, fuse_layers=True,
9
- trust_remote_code=False, safetensors=True)
10
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=False)
11
  tokenizer.pad_token = tokenizer.eos_token
12
 
13
  app = FastAPI()
@@ -18,20 +16,12 @@ def greet_json():
18
 
19
  @app.get("/message")
20
  async def message(input: str):
21
- prompt=f'''### Human: {input}
22
- ### Assistant:
23
-
24
- '''
25
-
26
- inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).input_ids.cpu()
27
 
28
  output = model.generate(
29
- inputs,
30
- do_sample=True,
31
- temperature=0.7,
32
- top_p=0.95,
33
- top_k=40,
34
- max_new_tokens=512
35
  )
36
 
37
  response = tokenizer.decode(output[0], skip_special_tokens=True)
 
1
  from fastapi import FastAPI
2
  import uvicorn
3
+ from transformers import AutoTokenizer, AutoModel
 
4
 
5
+ model_name = "TheBloke/Wizard-Vicuna-13B-Uncensored-HF"
6
 
7
+ model = AutoModel.from_quantized(model_name, trust_remote_code=True)
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 
9
  tokenizer.pad_token = tokenizer.eos_token
10
 
11
  app = FastAPI()
 
16
 
17
  @app.get("/message")
18
  async def message(input: str):
19
+ inputs = tokenizer(input, return_tensors="pt", padding=True, truncation=True)
 
 
 
 
 
20
 
21
  output = model.generate(
22
+ input_ids=inputs["input_ids"],
23
+ attention_mask=inputs["attention_mask"],
24
+ max_new_tokens=100,
 
 
 
25
  )
26
 
27
  response = tokenizer.decode(output[0], skip_special_tokens=True)