Spaces:

DesiredName
/

test

Build error

DesiredName commited on Jul 28

Commit

97917f4

verified ·

1 Parent(s): 7c4e143

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,12 +1,11 @@
 from fastapi import FastAPI
 import uvicorn
-from llama_cpp import Llama
-llm = Llama(
-    model_path="Wizard-Vicuna-13B-Uncensored.Q4_K_M.gguf",  # Path to your .gguf file
-    n_ctx=2048,  # Context length
-    n_threads=8   # CPU threads (adjust for your hardware)
-)
 app = FastAPI()
@@ -16,8 +15,17 @@ def greet_json():
 @app.get("/message")
 async def message(input: str):
-    output = llm.create_completion(input, max_tokens=100)
-    response = output["choices"][0]["text"]
     return response

 from fastapi import FastAPI
 import uvicorn
+from transformers import AutoTokenizer, AutoModel
+model_name = "TheBloke/Wizard-Vicuna-13B-Uncensored-GPTQ"
+model = AutoModel.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
 app = FastAPI()
 @app.get("/message")
 async def message(input: str):
+    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
+    output = model.generate(
+        input_ids=inputs["input_ids"],
+        attention_mask=inputs["attention_mask"],  # Pass attention_mask!
+        max_new_tokens=100,
+        temperature=0.0,  # Disables randomness
+        do_sample=False  # Greedy decoding
+    )
+    response = tokenizer.decode(output[0], skip_special_tokens=True)
     return response