from flask import Flask, request, jsonify import torch from transformers import RobertaTokenizer, RobertaForSequenceClassification, RobertaConfig import os app = Flask(__name__) # Load model and tokenizer def load_model(): # Load saved config and weights checkpoint = torch.load("codebert_readability_scorer.pth", map_location=torch.device('cpu')) config = RobertaConfig.from_dict(checkpoint['config']) # Initialize model with loaded config model = RobertaForSequenceClassification(config) model.load_state_dict(checkpoint['model_state_dict']) model.eval() return model # Load components try: tokenizer = RobertaTokenizer.from_pretrained("./tokenizer") model = load_model() print("Model and tokenizer loaded successfully!") except Exception as e: print(f"Error loading model: {str(e)}") @app.route("/") def home(): return request.url @app.route("/predict") def predict(): try: # Get code from request # data = request.get_json() # if "code" not in data: # return jsonify({"error": "Missing 'code' parameter"}), 400 data = request.get_json(force=True, silent=True) if not data or "code" not in data: return jsonify({"error": f"Missing 'code' parameter. data: {data}"}), 400 code = data["code"] # Tokenize input inputs = tokenizer( code, truncation=True, padding='max_length', max_length=512, return_tensors='pt' ) # Make prediction with torch.no_grad(): outputs = model(**inputs) # Apply sigmoid and format score score = torch.sigmoid(outputs.logits).item() return jsonify({ "readability_score": round(score, 4), "processed_code": code[:500] + "..." if len(code) > 500 else code }) except Exception as e: return jsonify({"error": str(e)}), 500 if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)