from transformers import AutoModelForSequenceClassification, AutoTokenizer
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import nltk
from nltk.corpus import stopwords
import re
import spacy

nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = ' '.join([word for word in text.split() if word not in stop_words])  # Remove stopwords
    return text

roberta_model = AutoModelForSequenceClassification.from_pretrained("./roberta-base")
roberta_tokenizer = AutoTokenizer.from_pretrained("./roberta-base")

# Load BERT model and tokenizer
bert_model = AutoModelForSequenceClassification.from_pretrained("./bert-base-uncased")
bert_tokenizer = AutoTokenizer.from_pretrained("./bert-base-uncased")

app = FastAPI()

class TextData(BaseModel):
    text: str

# Helper function to make predictions and convert to 0 (human) or 100 (AI)
def predict_text(model, tokenizer, text):
    text=clean_text(text)
    # Preprocess the text
    inputs = tokenizer(text, truncation=True, padding='max_length', max_length=128, return_tensors='pt')

    # Move to the correct device (GPU/CPU)
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    model.to(device)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Get model predictions
    with torch.no_grad():
        outputs = model(**inputs)
    
    # Convert logits to probabilities
    logits = outputs.logits
    probabilities = torch.softmax(logits, dim=-1)
    predicted_class = torch.argmax(probabilities, dim=-1).item()
    #ai_prob = probabilities[0][1].item() * 100
    #print(ai_prob)
    # Return 0 for human, 100 for AI
    return 100 if predicted_class == 1 else 0

# Endpoint to predict with RoBERTa
@app.post("/predict_copyleaks_V1")
def predict_roberta(data: TextData):
    predicted_value = predict_text(roberta_model, roberta_tokenizer, data.text)
    return {"text": data.text, "Score": predicted_value}

# Endpoint to predict with BERT
@app.post("/predict_copyleaks_V2")
def predict_bert(data: TextData):
    predicted_value = predict_text(bert_model, bert_tokenizer, data.text)
    return {"text": data.text, "Score": predicted_value}