File size: 3,785 Bytes
e142a10
f239413
9a51750
e142a10
7162092
94028a4
7162092
 
783e73c
94028a4
 
 
 
9a51750
 
 
783e73c
94028a4
 
 
6bd6464
783e73c
94028a4
 
 
6bd6464
9a51750
 
f239413
 
4f8084e
 
 
 
 
 
f239413
 
 
 
 
 
 
 
 
 
 
 
7162092
9a51750
 
 
 
 
 
 
 
 
 
 
7162092
 
 
 
 
 
 
 
5f43351
9a51750
 
7776d6e
 
 
9a51750
7162092
 
783e73c
9a51750
 
5f43351
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import gradio as gr
import random
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification

# Models included within the interface
models = ["bert-base-uncased", "roberta-base"]

# Datasets included within the interface
datasets = ["No Dataset Finetuning",
            "vedantgaur/GPTOutputs-MWP - AI Data Only",
            "vedantgaur/GPTOutputs-MWP - Human Data Only",
            "vedantgaur/GPTOutputs-MWP - Both AI and Human Data",
            "dmitva/human_ai_generated_text - Both AI and Human Data"]

# Mapping of user-selected model and dataset to actual model name on Hugging Face
model_mapping = {
    ("bert-base-uncased", "No Dataset Finetuning"): "bert-base-uncased",
    ("bert-base-uncased", "vedantgaur/GPTOutputs-MWP - AI Data Only"): "SkwarczynskiP/bert-base-uncased-finetuned-vedantgaur-AI-generated",
    ("bert-base-uncased", "vedantgaur/GPTOutputs-MWP - Human Data Only"): "SkwarczynskiP/bert-base-uncased-finetuned-vedantgaur-human-generated",
    ("bert-base-uncased", "vedantgaur/GPTOutputs-MWP - Both AI and Human Data"): "SkwarczynskiP/bert-base-uncased-finetuned-vedantgaur-AI-and-human-generated",
    ("bert-base-uncased", "dmitva/human_ai_generated_text - Both AI and Human Data"): "SkwarczynskiP/bert-base-uncased-finetuned-dmitva-AI-and-human-generated",
    ("roberta-base", "No Dataset Finetuning"): "roberta-base",
    ("roberta-base", "vedantgaur/GPTOutputs-MWP - AI Data Only"): "SkwarczynskiP/roberta-base-finetuned-vedantgaur-AI-generated",
    ("roberta-base", "vedantgaur/GPTOutputs-MWP - Human Data Only"): "SkwarczynskiP/roberta-base-finetuned-vedantgaur-human-generated",
    ("roberta-base", "vedantgaur/GPTOutputs-MWP - Both AI and Human Data"): "SkwarczynskiP/roberta-base-finetuned-vedantgaur-AI-and-human-generated",
    ("roberta-base", "dmitva/human_ai_generated_text - Both AI and Human Data"): "SkwarczynskiP/roberta-base-finetuned-dmitva-AI-and-human-generated"
}

# Example text included within the interface
exampleText = [
    ["ex1"],
    ["ex2"],
    ["ex3"],
    ["ex4"]
]

# Example models and datasets included within the interface
exampleModels = ["bert-base-uncased", "roberta-base"]

# Example datasets included within the interface
exampleDatasets = ["No Dataset Finetuning",
            "vedantgaur/GPTOutputs-MWP - AI Data Only",
            "vedantgaur/GPTOutputs-MWP - Human Data Only",
            "vedantgaur/GPTOutputs-MWP - Both AI and Human Data",
            "dmitva/human_ai_generated_text - Both AI and Human Data"]

examples = [[random.choice(exampleModels), random.choice(exampleDatasets), random.choice(exampleText)] for example in exampleText]


def detect_ai_generated_text(model: str, dataset: str, text: str) -> str:
    # Get the fine-tuned model using mapping
    finetuned_model = model_mapping.get((model, dataset))

    # Load the specific fine-tuned model
    tokenizer = AutoTokenizer.from_pretrained(finetuned_model)
    model = AutoModelForSequenceClassification.from_pretrained(finetuned_model)

    # Classify the input based on the fine-tuned model
    classifier = pipeline('text-classification', model=model, tokenizer=tokenizer)
    result = classifier(text)

    # Get the label and score
    label = "AI-generated" if result[0]['label'] == 'LABEL_1' else "Human-written"
    score = result[0]['score']

    return f"{label} with confidence {score * 100:.2f}%"


interface = gr.Interface(
    fn=detect_ai_generated_text,
    inputs=[
        gr.Dropdown(choices=models, label="Model"),
        gr.Dropdown(choices=datasets, label="Dataset"),
        gr.Textbox(lines=5, label="Input Text")
    ],
    outputs=gr.Textbox(label="Output"),
    examples=examples,
    title="AI Generated Text Detection"
)

if __name__ == "__main__":
    interface.launch()