Spaces:
Sleeping
Sleeping
from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel | |
import torch | |
from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
import nltk | |
from nltk.corpus import stopwords | |
import re | |
import spacy | |
nltk.download('stopwords') | |
stop_words = set(stopwords.words('english')) | |
def clean_text(text): | |
text = text.lower() # Convert to lowercase | |
text = re.sub(r'[^\w\s]', '', text) # Remove punctuation | |
text = ' '.join([word for word in text.split() if word not in stop_words]) # Remove stopwords | |
return text | |
roberta_model = AutoModelForSequenceClassification.from_pretrained("./roberta-base") | |
roberta_tokenizer = AutoTokenizer.from_pretrained("./roberta-base") | |
# Load BERT model and tokenizer | |
bert_model = AutoModelForSequenceClassification.from_pretrained("./bert-base-uncased") | |
bert_tokenizer = AutoTokenizer.from_pretrained("./bert-base-uncased") | |
app = FastAPI() | |
class TextData(BaseModel): | |
text: str | |
# Helper function to make predictions and convert to 0 (human) or 100 (AI) | |
def predict_text(model, tokenizer, text): | |
text=clean_text(text) | |
# Preprocess the text | |
inputs = tokenizer(text, truncation=True, padding='max_length', max_length=128, return_tensors='pt') | |
# Move to the correct device (GPU/CPU) | |
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") | |
model.to(device) | |
inputs = {k: v.to(device) for k, v in inputs.items()} | |
# Get model predictions | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
# Convert logits to probabilities | |
logits = outputs.logits | |
probabilities = torch.softmax(logits, dim=-1) | |
predicted_class = torch.argmax(probabilities, dim=-1).item() | |
#ai_prob = probabilities[0][1].item() * 100 | |
#print(ai_prob) | |
# Return 0 for human, 100 for AI | |
return 100 if predicted_class == 1 else 0 | |
# Endpoint to predict with RoBERTa | |
def predict_roberta(data: TextData): | |
predicted_value = predict_text(roberta_model, roberta_tokenizer, data.text) | |
return {"text": data.text, "Score": predicted_value} | |
# Endpoint to predict with BERT | |
def predict_bert(data: TextData): | |
predicted_value = predict_text(bert_model, bert_tokenizer, data.text) | |
return {"text": data.text, "Score": predicted_value} | |