|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import torch |
|
import spacy |
|
import subprocess |
|
import nltk |
|
from nltk.corpus import wordnet |
|
|
|
from gensim import downloader as api |
|
|
|
|
|
nltk.download('wordnet') |
|
nltk.download('omw-1.4') |
|
|
|
|
|
try: |
|
nlp = spacy.load("en_core_web_sm") |
|
except OSError: |
|
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"]) |
|
nlp = spacy.load("en_core_web_sm") |
|
|
|
|
|
word_vectors = api.load("glove-wiki-gigaword-50") |
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english") |
|
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device) |
|
|
|
|
|
def detect_ai_generated(text): |
|
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device) |
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
probabilities = torch.softmax(outputs.logits, dim=1) |
|
ai_probability = probabilities[0][1].item() |
|
return f"AI-Generated Content Probability: {ai_probability:.2f}%" |
|
|
|
|
|
def get_synonyms_nltk(word, pos): |
|
synsets = wordnet.synsets(word, pos=pos) |
|
if synsets: |
|
lemmas = synsets[0].lemmas() |
|
return [lemma.name() for lemma in lemmas] |
|
return [] |
|
|
|
|
|
def paraphrase_with_spacy_nltk(text): |
|
doc = nlp(text) |
|
paraphrased_words = [] |
|
|
|
for token in doc: |
|
|
|
pos = None |
|
if token.pos_ in {"NOUN"}: |
|
pos = wordnet.NOUN |
|
elif token.pos_ in {"VERB"}: |
|
pos = wordnet.VERB |
|
elif token.pos_ in {"ADJ"}: |
|
pos = wordnet.ADJ |
|
elif token.pos_ in {"ADV"}: |
|
pos = wordnet.ADV |
|
|
|
synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else [] |
|
|
|
|
|
if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower(): |
|
paraphrased_words.append(synonyms[0]) |
|
else: |
|
paraphrased_words.append(token.text) |
|
|
|
|
|
paraphrased_sentence = ' '.join(paraphrased_words) |
|
return paraphrased_sentence |
|
|
|
|
|
with gr.Blocks() as interface: |
|
with gr.Row(): |
|
with gr.Column(): |
|
text_input = gr.Textbox(lines=5, label="Input Text") |
|
detect_button = gr.Button("AI Detection") |
|
paraphrase_button = gr.Button("Paraphrase with spaCy & NLTK") |
|
with gr.Column(): |
|
output_text = gr.Textbox(label="Output") |
|
|
|
detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text) |
|
paraphrase_button.click(paraphrase_with_spacy_nltk, inputs=text_input, outputs=output_text) |
|
|
|
|
|
interface.launch(debug=False) |
|
|