copyllm / app.py
GautamGaur's picture
Update app.py
9dc3474 verified
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import nltk
from nltk.corpus import stopwords
import re
import spacy
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
def clean_text(text):
text = text.lower() # Convert to lowercase
text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
text = ' '.join([word for word in text.split() if word not in stop_words]) # Remove stopwords
return text
roberta_model = AutoModelForSequenceClassification.from_pretrained("./roberta-base")
roberta_tokenizer = AutoTokenizer.from_pretrained("./roberta-base")
# Load BERT model and tokenizer
bert_model = AutoModelForSequenceClassification.from_pretrained("./bert-base-uncased")
bert_tokenizer = AutoTokenizer.from_pretrained("./bert-base-uncased")
app = FastAPI()
class TextData(BaseModel):
text: str
# Helper function to make predictions and convert to 0 (human) or 100 (AI)
def predict_text(model, tokenizer, text):
text=clean_text(text)
# Preprocess the text
inputs = tokenizer(text, truncation=True, padding='max_length', max_length=128, return_tensors='pt')
# Move to the correct device (GPU/CPU)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)
inputs = {k: v.to(device) for k, v in inputs.items()}
# Get model predictions
with torch.no_grad():
outputs = model(**inputs)
# Convert logits to probabilities
logits = outputs.logits
probabilities = torch.softmax(logits, dim=-1)
predicted_class = torch.argmax(probabilities, dim=-1).item()
#ai_prob = probabilities[0][1].item() * 100
#print(ai_prob)
# Return 0 for human, 100 for AI
return 100 if predicted_class == 1 else 0
# Endpoint to predict with RoBERTa
@app.post("/predict_copyleaks_V1")
def predict_roberta(data: TextData):
predicted_value = predict_text(roberta_model, roberta_tokenizer, data.text)
return {"text": data.text, "Score": predicted_value}
# Endpoint to predict with BERT
@app.post("/predict_copyleaks_V2")
def predict_bert(data: TextData):
predicted_value = predict_text(bert_model, bert_tokenizer, data.text)
return {"text": data.text, "Score": predicted_value}