|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import torch |
|
import spacy |
|
import subprocess |
|
import nltk |
|
from nltk.corpus import wordnet |
|
from gensim import downloader as api |
|
from gingerit.gingerit import GingerIt |
|
|
|
|
|
nltk.download('wordnet') |
|
nltk.download('omw-1.4') |
|
|
|
|
|
try: |
|
nlp = spacy.load("en_core_web_sm") |
|
except OSError: |
|
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"]) |
|
nlp = spacy.load("en_core_web_sm") |
|
|
|
|
|
word_vectors = api.load("glove-wiki-gigaword-50") |
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
|
|
tokenizer_ai = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english") |
|
model_ai = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device) |
|
|
|
|
|
parser = GingerIt() |
|
|
|
|
|
def detect_ai_generated(text): |
|
inputs = tokenizer_ai(text, return_tensors="pt", truncation=True, max_length=512).to(device) |
|
with torch.no_grad(): |
|
outputs = model_ai(**inputs) |
|
probabilities = torch.softmax(outputs.logits, dim=1) |
|
ai_probability = probabilities[0][1].item() |
|
return f"AI-Generated Content Probability: {ai_probability:.2f}%" |
|
|
|
|
|
def get_synonyms_nltk(word, pos): |
|
synsets = wordnet.synsets(word, pos=pos) |
|
if synsets: |
|
lemmas = synsets[0].lemmas() |
|
return [lemma.name() for lemma in lemmas] |
|
return [] |
|
|
|
|
|
def capitalize_sentences_and_nouns(text): |
|
doc = nlp(text) |
|
corrected_text = [] |
|
|
|
for sent in doc.sents: |
|
sentence = [] |
|
for token in sent: |
|
if token.i == sent.start: |
|
sentence.append(token.text.capitalize()) |
|
elif token.pos_ == "PROPN": |
|
sentence.append(token.text.capitalize()) |
|
else: |
|
sentence.append(token.text) |
|
corrected_text.append(' '.join(sentence)) |
|
|
|
return ' '.join(corrected_text) |
|
|
|
|
|
def paraphrase_with_spacy_nltk(text): |
|
doc = nlp(text) |
|
paraphrased_words = [] |
|
|
|
for token in doc: |
|
|
|
pos = None |
|
if token.pos_ in {"NOUN"}: |
|
pos = wordnet.NOUN |
|
elif token.pos_ in {"VERB"}: |
|
pos = wordnet.VERB |
|
elif token.pos_ in {"ADJ"}: |
|
pos = wordnet.ADJ |
|
elif token.pos_ in {"ADV"}: |
|
pos = wordnet.ADV |
|
|
|
synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else [] |
|
|
|
|
|
if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower(): |
|
paraphrased_words.append(synonyms[0]) |
|
else: |
|
paraphrased_words.append(token.text) |
|
|
|
|
|
paraphrased_sentence = ' '.join(paraphrased_words) |
|
|
|
|
|
corrected_text = capitalize_sentences_and_nouns(paraphrased_sentence) |
|
|
|
return corrected_text |
|
|
|
|
|
def correct_grammar(text): |
|
result = parser.parse(text) |
|
return result['result'] |
|
|
|
|
|
def paraphrase_and_correct(text): |
|
|
|
paraphrased_text = paraphrase_with_spacy_nltk(text) |
|
|
|
|
|
capitalized_text = capitalize_sentences_and_nouns(paraphrased_text) |
|
|
|
|
|
final_text = correct_grammar(capitalized_text) |
|
|
|
return final_text |
|
|
|
|
|
with gr.Blocks() as interface: |
|
with gr.Row(): |
|
with gr.Column(): |
|
text_input = gr.Textbox(lines=5, label="Input Text") |
|
detect_button = gr.Button("AI Detection") |
|
paraphrase_button = gr.Button("Paraphrase & Correct") |
|
with gr.Column(): |
|
output_text = gr.Textbox(label="Output") |
|
|
|
detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text) |
|
paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text) |
|
|
|
|
|
interface.launch(debug=False) |
|
|