from flask import Flask, request, jsonify import torch from transformers import RobertaTokenizer import os from transformers import RobertaForSequenceClassification import torch.serialization import torch from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments from torch.utils.data import Dataset import pandas as pd from sklearn.model_selection import train_test_split import numpy as np # Initialize Flask app app = Flask(__name__) # Load the trained model and tokenizer tokenizer = RobertaTokenizer.from_pretrained("microsoft/codebert-base") torch.serialization.add_safe_globals([RobertaForSequenceClassification]) model = torch.load("model.pth", map_location=torch.device('cpu'), weights_only=False) # Load the trained model # Ensure the model is in evaluation mode model.eval() @app.route("/") def home(): return request.url # @app.route("/predict", methods=["POST"]) @app.route("/predict") def predict(): print("Received code:", request.get_json()["code"]) code = request.get_json()["code"] # Load saved weights and config checkpoint = torch.load("codebert_vulnerability_scorer.pth") config = RobertaConfig.from_dict(checkpoint['config']) # Rebuild the model with correct architecture model = RobertaForSequenceClassification(config) model.load_state_dict(checkpoint['model_state_dict']) model.eval() # Load tokenizer tokenizer = RobertaTokenizer.from_pretrained('./tokenizer') # Prepare input inputs = tokenizer( code, truncation=True, padding='max_length', max_length=512, return_tensors='pt' ) # Make prediction with torch.no_grad(): outputs = model(**inputs) score = torch.sigmoid(outputs.logits).item() return score # Run the Flask app if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)