llama3-math-tr / app.py
VolkanSimsir's picture
Upload 3 files
9e7e702 verified
raw
history blame contribute delete
708 Bytes
from fastapi import FastAPI
from transformers import pipeline
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
import torch
app = FastAPI()
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16
)
model = AutoModelForCausalLM.from_pretrained(
"VolkanSimsir/LLaMA-3-8B-GRPO-math-tr",
quantization_config=bnb_config,
torch_dtype=torch.float16,
device_map="auto"
)
@app.get("/")
def home():
return {"message": "Hello World"}
@app.get("/generate")
def generate(text: str):
output = pipe(text)
return {"result": output[0]["generated_text"]}