Spaces:

GautamGaur
/

copyllm

Sleeping

App Files Files Community

GautamGaur commited on Oct 25, 2024

Commit

9dc3474

verified ·

1 Parent(s): d49c2f7

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -67

app.py CHANGED Viewed

@@ -1,67 +1,67 @@
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
-from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
-import torch
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
-import nltk
-from nltk.corpus import stopwords
-import re
-import spacy
-nltk.download('stopwords')
-stop_words = set(stopwords.words('english'))
-def clean_text(text):
-    text = text.lower()  # Convert to lowercase
-    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
-    text = ' '.join([word for word in text.split() if word not in stop_words])  # Remove stopwords
-    return text
-roberta_model = AutoModelForSequenceClassification.from_pretrained("roberta-base")
-roberta_tokenizer = AutoTokenizer.from_pretrained("roberta-base")
-# Load BERT model and tokenizer
-bert_model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
-bert_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
-app = FastAPI()
-class TextData(BaseModel):
-    text: str
-# Helper function to make predictions and convert to 0 (human) or 100 (AI)
-def predict_text(model, tokenizer, text):
-    text=clean_text(text)
-    # Preprocess the text
-    inputs = tokenizer(text, truncation=True, padding='max_length', max_length=128, return_tensors='pt')
-    # Move to the correct device (GPU/CPU)
-    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
-    model.to(device)
-    inputs = {k: v.to(device) for k, v in inputs.items()}
-    # Get model predictions
-    with torch.no_grad():
-        outputs = model(**inputs)
-    # Convert logits to probabilities
-    logits = outputs.logits
-    probabilities = torch.softmax(logits, dim=-1)
-    predicted_class = torch.argmax(probabilities, dim=-1).item()
-    #ai_prob = probabilities[0][1].item() * 100
-    #print(ai_prob)
-    # Return 0 for human, 100 for AI
-    return 100 if predicted_class == 1 else 0
-# Endpoint to predict with RoBERTa
-@app.post("/predict_copyleaks_V1")
-def predict_roberta(data: TextData):
-    predicted_value = predict_text(roberta_model, roberta_tokenizer, data.text)
-    return {"text": data.text, "Score": predicted_value}
-# Endpoint to predict with BERT
-@app.post("/predict_copyleaks_V2")
-def predict_bert(data: TextData):
-    predicted_value = predict_text(bert_model, bert_tokenizer, data.text)
-    return {"text": data.text, "Score": predicted_value}

+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+import nltk
+from nltk.corpus import stopwords
+import re
+import spacy
+nltk.download('stopwords')
+stop_words = set(stopwords.words('english'))
+def clean_text(text):
+    text = text.lower()  # Convert to lowercase
+    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
+    text = ' '.join([word for word in text.split() if word not in stop_words])  # Remove stopwords
+    return text
+roberta_model = AutoModelForSequenceClassification.from_pretrained("./roberta-base")
+roberta_tokenizer = AutoTokenizer.from_pretrained("./roberta-base")
+# Load BERT model and tokenizer
+bert_model = AutoModelForSequenceClassification.from_pretrained("./bert-base-uncased")
+bert_tokenizer = AutoTokenizer.from_pretrained("./bert-base-uncased")
+app = FastAPI()
+class TextData(BaseModel):
+    text: str
+# Helper function to make predictions and convert to 0 (human) or 100 (AI)
+def predict_text(model, tokenizer, text):
+    text=clean_text(text)
+    # Preprocess the text
+    inputs = tokenizer(text, truncation=True, padding='max_length', max_length=128, return_tensors='pt')
+    # Move to the correct device (GPU/CPU)
+    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    model.to(device)
+    inputs = {k: v.to(device) for k, v in inputs.items()}
+    # Get model predictions
+    with torch.no_grad():
+        outputs = model(**inputs)
+    # Convert logits to probabilities
+    logits = outputs.logits
+    probabilities = torch.softmax(logits, dim=-1)
+    predicted_class = torch.argmax(probabilities, dim=-1).item()
+    #ai_prob = probabilities[0][1].item() * 100
+    #print(ai_prob)
+    # Return 0 for human, 100 for AI
+    return 100 if predicted_class == 1 else 0
+# Endpoint to predict with RoBERTa
+@app.post("/predict_copyleaks_V1")
+def predict_roberta(data: TextData):
+    predicted_value = predict_text(roberta_model, roberta_tokenizer, data.text)
+    return {"text": data.text, "Score": predicted_value}
+# Endpoint to predict with BERT
+@app.post("/predict_copyleaks_V2")
+def predict_bert(data: TextData):
+    predicted_value = predict_text(bert_model, bert_tokenizer, data.text)
+    return {"text": data.text, "Score": predicted_value}