Spaces:

DesiredName
/

test

Build error

DesiredName commited on Jul 28

Commit

8a5a310

verified ·

1 Parent(s): 1f2a2bc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,23 +1,16 @@
 from fastapi import FastAPI
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 import uvicorn
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True,          # Enable 4-bit quantization
-    bnb_4bit_quant_type="nf4",  # Use normalized float 4
-    bnb_4bit_compute_dtype="float16",  # Faster computations
-    bnb_4bit_use_double_quant=True  # Extra compression
-)
 model = AutoModelForCausalLM.from_pretrained(
-    "TheBloke/Wizard-Vicuna-13B-Uncensored-SuperHOT-8K-GPTQ",
-    quantization_config=bnb_config,
-    device_map="auto",  # Auto-distribute across CPU/GPU
-    trust_remote_code=True  # Required for Qwen!
 )
 tokenizer = AutoTokenizer.from_pretrained(
-    "TheBloke/Wizard-Vicuna-13B-Uncensored-SuperHOT-8K-GPTQ",
     trust_remote_code=True
 )
@@ -30,8 +23,18 @@ def greet_json():
 @app.get("/message")
 async def message(input: str):
     inputs = tokenizer(input, return_tensors="pt", padding=True, truncation=True)
-    output = model.generate(**inputs, max_length=50, temperature=0.3)
-    return tokenizer.decode(output[0], skip_special_tokens=True)
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)

 from fastapi import FastAPI
+from transformers import AutoModel, AutoTokenizer
 import uvicorn
+model_name = "TheBloke/Wizard-Vicuna-13B-Uncensored-GGUF"
 model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    trust_remote_code=True
 )
 tokenizer = AutoTokenizer.from_pretrained(
+    model_name,
     trust_remote_code=True
 )
 @app.get("/message")
 async def message(input: str):
     inputs = tokenizer(input, return_tensors="pt", padding=True, truncation=True)
+    output = model.generate(
+        input_ids=inputs["input_ids"],
+        attention_mask=inputs["attention_mask"],  # Pass attention_mask!
+        max_new_tokens=100,
+        temperature=0.0,  # Disables randomness
+        do_sample=False  # Greedy decoding
+    )
+    response = tokenizer.decode(output[0], skip_special_tokens=True)
+    return response
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)