Spaces:
Runtime error
Runtime error
| from flask import Flask, request | |
| import gradio as gr | |
| import os | |
| import re | |
| app = Flask(__name__) | |
| import torch | |
| from torch import cuda | |
| from tqdm import tqdm | |
| from transformers import GPT2LMHeadModel, GPT2TokenizerFast | |
| device = 'cuda' if cuda.is_available() else 'cpu' | |
| model_id = "gpt2" | |
| modelgpt2 = GPT2LMHeadModel.from_pretrained(model_id).to(device) | |
| tokenizergpt2 = GPT2TokenizerFast.from_pretrained(model_id) | |
| def text_to_sentences(text): | |
| clean_text = text.replace('\n', ' ') | |
| return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', clean_text) | |
| def calculatePerplexity(text): | |
| encodings = tokenizergpt2("\n\n".join([text]), return_tensors="pt") | |
| max_length = modelgpt2.config.n_positions | |
| stride = 512 | |
| seq_len = encodings.input_ids.size(1) | |
| nlls = [] | |
| prev_end_loc = 0 | |
| for begin_loc in range(0, seq_len, stride): | |
| end_loc = min(begin_loc + max_length, seq_len) | |
| trg_len = end_loc - prev_end_loc | |
| input_ids = encodings.input_ids[:, begin_loc:end_loc].to(device) | |
| target_ids = input_ids.clone() | |
| target_ids[:, :-trg_len] = -100 | |
| with torch.no_grad(): | |
| outputs = modelgpt2(input_ids, labels=target_ids) | |
| neg_log_likelihood = outputs.loss * trg_len | |
| nlls.append(neg_log_likelihood) | |
| prev_end_loc = end_loc | |
| if end_loc == seq_len: | |
| break | |
| ppl = torch.exp(torch.stack(nlls).sum() / end_loc) | |
| return ppl.item() | |
| def calculatePerplexities(text): | |
| sentences = text_to_sentences(text) | |
| perplexities = [] | |
| for sentence in sentences: | |
| perplexity = calculatePerplexity(sentence) | |
| label = "Human" | |
| if perplexity<25: | |
| label = "AI" | |
| perplexities.append({"sentence": sentence, "perplexity": perplexity, "label": label}) | |
| return perplexities | |
| demo = gr.Interface( | |
| fn=calculatePerplexities, | |
| inputs=gr.Textbox(placeholder="Copy and paste here...", label="Content Box"), | |
| outputs=gr.JSON(), | |
| interpretation="default", | |
| ) | |
| demo.launch(show_api=False) |