from fastapi import FastAPI from transformers import AutoModelForCausalLM, AutoTokenizer import uvicorn tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B") model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3-0.6B") app = FastAPI() @app.get("/") def greet_json(): return {"Hello": "World!"} @app.get("/message") async def message(input: str): inputs = tokenizer(input, return_tensors="pt", padding=True, truncation=True) output = model.generate(**inputs, max_length=50, temperature=0.3) return tokenizer.decode(output[0], skip_special_tokens=True) if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)