from fastapi import FastAPI, Request, Form from fastapi.responses import HTMLResponse from transformers import GPT2LMHeadModel, GPT2Config import torch app = FastAPI() quantized_model_path = "gpt3_mini_quantized_2x_16bits.pth" config = GPT2Config.from_pretrained("Deniskin/gpt3_medium") # Load the config to initialize the model architecture quantized_model = GPT2LMHeadModel(config=config) # Initialize the model # Set the model to evaluation mode model.eval() # Function to generate text using the model def generate_text(prompt): input_ids = tokenizer.encode(prompt, return_tensors="pt") output = model.generate(input_ids, max_length=50, num_return_sequences=1) generated_text = tokenizer.decode(output[0], skip_special_tokens=True) return generated_text @app.get("/", response_class=HTMLResponse) async def home(request: Request): html_content = """