GautamGaur commited on
Commit
9dc3474
·
verified ·
1 Parent(s): d49c2f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -67
app.py CHANGED
@@ -1,67 +1,67 @@
1
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
2
- from fastapi import FastAPI, HTTPException
3
- from pydantic import BaseModel
4
- import torch
5
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
6
- import nltk
7
- from nltk.corpus import stopwords
8
- import re
9
- import spacy
10
-
11
- nltk.download('stopwords')
12
- stop_words = set(stopwords.words('english'))
13
-
14
- def clean_text(text):
15
- text = text.lower() # Convert to lowercase
16
- text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
17
- text = ' '.join([word for word in text.split() if word not in stop_words]) # Remove stopwords
18
- return text
19
-
20
- roberta_model = AutoModelForSequenceClassification.from_pretrained("roberta-base")
21
- roberta_tokenizer = AutoTokenizer.from_pretrained("roberta-base")
22
-
23
- # Load BERT model and tokenizer
24
- bert_model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
25
- bert_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
26
-
27
- app = FastAPI()
28
-
29
- class TextData(BaseModel):
30
- text: str
31
-
32
- # Helper function to make predictions and convert to 0 (human) or 100 (AI)
33
- def predict_text(model, tokenizer, text):
34
- text=clean_text(text)
35
- # Preprocess the text
36
- inputs = tokenizer(text, truncation=True, padding='max_length', max_length=128, return_tensors='pt')
37
-
38
- # Move to the correct device (GPU/CPU)
39
- device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
40
- model.to(device)
41
- inputs = {k: v.to(device) for k, v in inputs.items()}
42
-
43
- # Get model predictions
44
- with torch.no_grad():
45
- outputs = model(**inputs)
46
-
47
- # Convert logits to probabilities
48
- logits = outputs.logits
49
- probabilities = torch.softmax(logits, dim=-1)
50
- predicted_class = torch.argmax(probabilities, dim=-1).item()
51
- #ai_prob = probabilities[0][1].item() * 100
52
- #print(ai_prob)
53
- # Return 0 for human, 100 for AI
54
- return 100 if predicted_class == 1 else 0
55
-
56
- # Endpoint to predict with RoBERTa
57
- @app.post("/predict_copyleaks_V1")
58
- def predict_roberta(data: TextData):
59
- predicted_value = predict_text(roberta_model, roberta_tokenizer, data.text)
60
- return {"text": data.text, "Score": predicted_value}
61
-
62
- # Endpoint to predict with BERT
63
- @app.post("/predict_copyleaks_V2")
64
- def predict_bert(data: TextData):
65
- predicted_value = predict_text(bert_model, bert_tokenizer, data.text)
66
- return {"text": data.text, "Score": predicted_value}
67
-
 
1
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
2
+ from fastapi import FastAPI, HTTPException
3
+ from pydantic import BaseModel
4
+ import torch
5
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
6
+ import nltk
7
+ from nltk.corpus import stopwords
8
+ import re
9
+ import spacy
10
+
11
+ nltk.download('stopwords')
12
+ stop_words = set(stopwords.words('english'))
13
+
14
+ def clean_text(text):
15
+ text = text.lower() # Convert to lowercase
16
+ text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
17
+ text = ' '.join([word for word in text.split() if word not in stop_words]) # Remove stopwords
18
+ return text
19
+
20
+ roberta_model = AutoModelForSequenceClassification.from_pretrained("./roberta-base")
21
+ roberta_tokenizer = AutoTokenizer.from_pretrained("./roberta-base")
22
+
23
+ # Load BERT model and tokenizer
24
+ bert_model = AutoModelForSequenceClassification.from_pretrained("./bert-base-uncased")
25
+ bert_tokenizer = AutoTokenizer.from_pretrained("./bert-base-uncased")
26
+
27
+ app = FastAPI()
28
+
29
+ class TextData(BaseModel):
30
+ text: str
31
+
32
+ # Helper function to make predictions and convert to 0 (human) or 100 (AI)
33
+ def predict_text(model, tokenizer, text):
34
+ text=clean_text(text)
35
+ # Preprocess the text
36
+ inputs = tokenizer(text, truncation=True, padding='max_length', max_length=128, return_tensors='pt')
37
+
38
+ # Move to the correct device (GPU/CPU)
39
+ device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
40
+ model.to(device)
41
+ inputs = {k: v.to(device) for k, v in inputs.items()}
42
+
43
+ # Get model predictions
44
+ with torch.no_grad():
45
+ outputs = model(**inputs)
46
+
47
+ # Convert logits to probabilities
48
+ logits = outputs.logits
49
+ probabilities = torch.softmax(logits, dim=-1)
50
+ predicted_class = torch.argmax(probabilities, dim=-1).item()
51
+ #ai_prob = probabilities[0][1].item() * 100
52
+ #print(ai_prob)
53
+ # Return 0 for human, 100 for AI
54
+ return 100 if predicted_class == 1 else 0
55
+
56
+ # Endpoint to predict with RoBERTa
57
+ @app.post("/predict_copyleaks_V1")
58
+ def predict_roberta(data: TextData):
59
+ predicted_value = predict_text(roberta_model, roberta_tokenizer, data.text)
60
+ return {"text": data.text, "Score": predicted_value}
61
+
62
+ # Endpoint to predict with BERT
63
+ @app.post("/predict_copyleaks_V2")
64
+ def predict_bert(data: TextData):
65
+ predicted_value = predict_text(bert_model, bert_tokenizer, data.text)
66
+ return {"text": data.text, "Score": predicted_value}
67
+