DesiredName commited on
Commit
9797205
·
verified ·
1 Parent(s): fc7e416

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -10
app.py CHANGED
@@ -1,11 +1,13 @@
1
  from fastapi import FastAPI
2
  import uvicorn
3
- from transformers import AutoTokenizer, AutoModel
 
4
 
5
  model_name = "TheBloke/Guanaco-7B-Uncensored-AWQ"
6
 
7
- model = AutoModel.from_pretrained(model_name)
8
- tokenizer = AutoTokenizer.from_pretrained(model_name)
 
9
 
10
  app = FastAPI()
11
 
@@ -15,15 +17,20 @@ def greet_json():
15
 
16
  @app.get("/message")
17
  async def message(input: str):
18
- prompt = "### Human: " + input + "\n### Assistant:"
19
- inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
 
 
 
 
20
 
21
  output = model.generate(
22
- input_ids=inputs["input_ids"],
23
- attention_mask=inputs["attention_mask"], # Pass attention_mask!
24
- max_new_tokens=100,
25
- temperature=0.0, # Disables randomness
26
- do_sample=False # Greedy decoding
 
27
  )
28
 
29
  response = tokenizer.decode(output[0], skip_special_tokens=True)
 
1
  from fastapi import FastAPI
2
  import uvicorn
3
+ from awq import AutoAWQForCausalLM
4
+ from transformers import AutoTokenizer
5
 
6
  model_name = "TheBloke/Guanaco-7B-Uncensored-AWQ"
7
 
8
+ model = AutoAWQForCausalLM.from_quantized(model_name_or_path, fuse_layers=True,
9
+ trust_remote_code=False, safetensors=True)
10
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=False)
11
 
12
  app = FastAPI()
13
 
 
17
 
18
  @app.get("/message")
19
  async def message(input: str):
20
+ prompt=f'''### Human: {input}
21
+ ### Assistant:
22
+
23
+ '''
24
+
25
+ inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).input_ids.cpu()
26
 
27
  output = model.generate(
28
+ inputs,
29
+ do_sample=True,
30
+ temperature=0.7,
31
+ top_p=0.95,
32
+ top_k=40,
33
+ max_new_tokens=512
34
  )
35
 
36
  response = tokenizer.decode(output[0], skip_special_tokens=True)