Spaces:

DesiredName
/

test

Build error

File size: 1,209 Bytes

39cc8a5
51e3565
823c760
a5b1f33
51e3565
 
 
 
 
 
 
 
afc73c8
51e3565
 
 
 
 
 
afc73c8
51e3565
 
39cc8a5
 
 
 
 
 
a5b1f33
c6cb00e
65f1222
 
9481fa2
823c760
 
73fcf85

from fastapi import FastAPI
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import uvicorn

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,          # Enable 4-bit quantization
    bnb_4bit_quant_type="nf4",  # Use normalized float 4 
    bnb_4bit_compute_dtype="float16",  # Faster computations
    bnb_4bit_use_double_quant=True  # Extra compression
)

model = AutoModelForCausalLM.from_pretrained(
    "TheBloke/Wizard-Vicuna-13B-Uncensored-SuperHOT-8K-GPTQ",
    quantization_config=bnb_config,
    device_map="auto",  # Auto-distribute across CPU/GPU
    trust_remote_code=True  # Required for Qwen!
)

tokenizer = AutoTokenizer.from_pretrained(
    "TheBloke/Wizard-Vicuna-13B-Uncensored-SuperHOT-8K-GPTQ",
    trust_remote_code=True
)

app = FastAPI()

@app.get("/")
def greet_json():
    return {"Hello": "World!"}

@app.get("/message")
async def message(input: str):
    inputs = tokenizer(input, return_tensors="pt", padding=True, truncation=True)
    output = model.generate(**inputs, max_length=50, temperature=0.3)
    return tokenizer.decode(output[0], skip_special_tokens=True)

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)