Spaces:

GautamGaur
/

copyllm

Sleeping

App Files Files Community

copyllm / app.py

GautamGaur

Update app.py

9dc3474 verified 10 months ago

raw

history blame contribute delete

2.42 kB

	from transformers import AutoModelForSequenceClassification, AutoTokenizer
	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	import torch
	from transformers import AutoModelForSequenceClassification, AutoTokenizer
	import nltk
	from nltk.corpus import stopwords
	import re
	import spacy

	nltk.download('stopwords')
	stop_words = set(stopwords.words('english'))

	def clean_text(text):
	text = text.lower() # Convert to lowercase
	text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
	text = ' '.join([word for word in text.split() if word not in stop_words]) # Remove stopwords
	return text

	roberta_model = AutoModelForSequenceClassification.from_pretrained("./roberta-base")
	roberta_tokenizer = AutoTokenizer.from_pretrained("./roberta-base")

	# Load BERT model and tokenizer
	bert_model = AutoModelForSequenceClassification.from_pretrained("./bert-base-uncased")
	bert_tokenizer = AutoTokenizer.from_pretrained("./bert-base-uncased")

	app = FastAPI()

	class TextData(BaseModel):
	text: str

	# Helper function to make predictions and convert to 0 (human) or 100 (AI)
	def predict_text(model, tokenizer, text):
	text=clean_text(text)
	# Preprocess the text
	inputs = tokenizer(text, truncation=True, padding='max_length', max_length=128, return_tensors='pt')

	# Move to the correct device (GPU/CPU)
	device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
	model.to(device)
	inputs = {k: v.to(device) for k, v in inputs.items()}

	# Get model predictions
	with torch.no_grad():
	outputs = model(**inputs)

	# Convert logits to probabilities
	logits = outputs.logits
	probabilities = torch.softmax(logits, dim=-1)
	predicted_class = torch.argmax(probabilities, dim=-1).item()
	#ai_prob = probabilities[0][1].item() * 100
	#print(ai_prob)
	# Return 0 for human, 100 for AI
	return 100 if predicted_class == 1 else 0

	# Endpoint to predict with RoBERTa
	@app.post("/predict_copyleaks_V1")
	def predict_roberta(data: TextData):
	predicted_value = predict_text(roberta_model, roberta_tokenizer, data.text)
	return {"text": data.text, "Score": predicted_value}

	# Endpoint to predict with BERT
	@app.post("/predict_copyleaks_V2")
	def predict_bert(data: TextData):
	predicted_value = predict_text(bert_model, bert_tokenizer, data.text)
	return {"text": data.text, "Score": predicted_value}