from transformers import AutoModelForSequenceClassification, AutoTokenizer from fastapi import FastAPI, HTTPException from pydantic import BaseModel import torch from transformers import AutoModelForSequenceClassification, AutoTokenizer import nltk from nltk.corpus import stopwords import re import spacy nltk.download('stopwords') stop_words = set(stopwords.words('english')) def clean_text(text): text = text.lower() # Convert to lowercase text = re.sub(r'[^\w\s]', '', text) # Remove punctuation text = ' '.join([word for word in text.split() if word not in stop_words]) # Remove stopwords return text roberta_model = AutoModelForSequenceClassification.from_pretrained("./roberta-base") roberta_tokenizer = AutoTokenizer.from_pretrained("./roberta-base") # Load BERT model and tokenizer bert_model = AutoModelForSequenceClassification.from_pretrained("./bert-base-uncased") bert_tokenizer = AutoTokenizer.from_pretrained("./bert-base-uncased") app = FastAPI() class TextData(BaseModel): text: str # Helper function to make predictions and convert to 0 (human) or 100 (AI) def predict_text(model, tokenizer, text): text=clean_text(text) # Preprocess the text inputs = tokenizer(text, truncation=True, padding='max_length', max_length=128, return_tensors='pt') # Move to the correct device (GPU/CPU) device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") model.to(device) inputs = {k: v.to(device) for k, v in inputs.items()} # Get model predictions with torch.no_grad(): outputs = model(**inputs) # Convert logits to probabilities logits = outputs.logits probabilities = torch.softmax(logits, dim=-1) predicted_class = torch.argmax(probabilities, dim=-1).item() #ai_prob = probabilities[0][1].item() * 100 #print(ai_prob) # Return 0 for human, 100 for AI return 100 if predicted_class == 1 else 0 # Endpoint to predict with RoBERTa @app.post("/predict_copyleaks_V1") def predict_roberta(data: TextData): predicted_value = predict_text(roberta_model, roberta_tokenizer, data.text) return {"text": data.text, "Score": predicted_value} # Endpoint to predict with BERT @app.post("/predict_copyleaks_V2") def predict_bert(data: TextData): predicted_value = predict_text(bert_model, bert_tokenizer, data.text) return {"text": data.text, "Score": predicted_value}