DesiredName commited on
Commit
97917f4
·
verified ·
1 Parent(s): 7c4e143

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -8
app.py CHANGED
@@ -1,12 +1,11 @@
1
  from fastapi import FastAPI
2
  import uvicorn
3
- from llama_cpp import Llama
4
 
5
- llm = Llama(
6
- model_path="Wizard-Vicuna-13B-Uncensored.Q4_K_M.gguf", # Path to your .gguf file
7
- n_ctx=2048, # Context length
8
- n_threads=8 # CPU threads (adjust for your hardware)
9
- )
10
 
11
  app = FastAPI()
12
 
@@ -16,8 +15,17 @@ def greet_json():
16
 
17
  @app.get("/message")
18
  async def message(input: str):
19
- output = llm.create_completion(input, max_tokens=100)
20
- response = output["choices"][0]["text"]
 
 
 
 
 
 
 
 
 
21
 
22
  return response
23
 
 
1
  from fastapi import FastAPI
2
  import uvicorn
3
+ from transformers import AutoTokenizer, AutoModel
4
 
5
+ model_name = "TheBloke/Wizard-Vicuna-13B-Uncensored-GPTQ"
6
+
7
+ model = AutoModel.from_pretrained(model_name)
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
 
9
 
10
  app = FastAPI()
11
 
 
15
 
16
  @app.get("/message")
17
  async def message(input: str):
18
+ inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
19
+
20
+ output = model.generate(
21
+ input_ids=inputs["input_ids"],
22
+ attention_mask=inputs["attention_mask"], # Pass attention_mask!
23
+ max_new_tokens=100,
24
+ temperature=0.0, # Disables randomness
25
+ do_sample=False # Greedy decoding
26
+ )
27
+
28
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
29
 
30
  return response
31