File size: 1,530 Bytes
e142a10
9a51750
e142a10
 
9a51750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7776d6e
 
 
9a51750
2e52b31
9a51750
 
e142a10
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification


models = ["bert-base-uncased", "model2"]
datasets = ["vedantgaur/GPTOutputs-MWP", "dataset2"]

# Mapping of user-selected model and dataset to actual model name on Hugging Face
model_mapping = {
    ("bert-base-uncased", "vedantgaur/GPTOutputs-MWP"): "SkwarczynskiP/bert-base-uncased-finetuned-vedantgaur",
    ("bert-base-uncased", "dataset2"): "finetuned_model1_on_dataset2",
    ("model2", "vedantgaur/GPTOutputs-MWP"): "finetuned_model2_on_dataset1",
    ("model2", "dataset2"): "finetuned_model2_on_dataset2",
}

def detect_ai_generated_text(model: str, dataset: str, text: str) -> str:
    # Get the fine-tuned model using mapping
    finetuned_model = model_mapping.get((model, dataset))

    # Load the specific fine-tuned model
    tokenizer = AutoTokenizer.from_pretrained(finetuned_model)
    model = AutoModelForSequenceClassification.from_pretrained(finetuned_model)

    # Classify the input based on the fine-tuned model
    classifier = pipeline('text-classification', model=model, tokenizer=tokenizer)
    result = classifier(text)
    return "AI-generated" if result[0]['label'] == 'LABEL_1' else "Not AI-generated"

iface = gr.Interface(
    fn=detect_ai_generated_text,
    inputs=[
        gr.Dropdown(choices=models, label="Model"),
        gr.Dropdown(choices=datasets, label="Dataset"),
        gr.Textbox(lines=5, label="Input Text")
    ],
    outputs=gr.Textbox(label="Output"),
)

iface.launch()