File size: 708 Bytes
f9c313f
 
9e7e702
 
f9c313f
 
 
9e7e702
 
 
 
 
 
 
 
 
 
 
 
f9c313f
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from fastapi import FastAPI
from transformers import pipeline
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
import torch

app = FastAPI()

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForCausalLM.from_pretrained(
    "VolkanSimsir/LLaMA-3-8B-GRPO-math-tr",
    quantization_config=bnb_config,  
    torch_dtype=torch.float16,      
    device_map="auto"           
)

@app.get("/")
def home():
    return {"message": "Hello World"}

@app.get("/generate")
def generate(text: str):

    output = pipe(text)

    return {"result": output[0]["generated_text"]}