File size: 14,810 Bytes
7cd2476
 
525f59d
7cd2476
525f59d
 
d54e3aa
 
 
 
 
 
 
 
 
 
7cd2476
7569f44
 
b3c8697
 
f53d5a5
 
9a481ab
 
 
 
 
 
f53d5a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a481ab
b3c8697
 
 
 
 
 
 
 
 
 
 
 
 
d54e3aa
b3c8697
 
 
 
 
d54e3aa
b3c8697
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d54e3aa
 
 
b3c8697
d54e3aa
 
 
 
b3c8697
 
 
 
 
 
 
 
 
 
d54e3aa
 
 
 
f53d5a5
d54e3aa
f53d5a5
7cd2476
 
 
b587cb6
7cd2476
b587cb6
7cd2476
b587cb6
7cd2476
 
 
 
 
 
 
 
d54e3aa
525f59d
 
d54e3aa
525f59d
 
d54e3aa
525f59d
 
 
 
d54e3aa
525f59d
 
 
 
 
 
 
 
 
 
 
9a481ab
525f59d
7cd2476
b587cb6
7cd2476
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c768ef
7cd2476
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
import gradio as gr
import numpy as np
import os
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import pandas as pd
import numpy as np
import torch
import zlib
from scipy.stats import skew, kurtosis, entropy
from tqdm import tqdm
from torch.nn import CrossEntropyLoss
from pathlib import Path
import spaces
import os

theme = gr.Theme.from_hub("gstaff/xkcd") 

# ===========================================================
@spaces.GPU
def detect_ai_text(text):
    global loaded

    import xgboost as xgb
    model_path = Path(__file__).parent / "model.json"
    model = xgb.XGBClassifier()
    model.load_model(model_path)

    if not loaded:
        return "❗ Model not loaded. We require a GPU to run DivEye.", 0.0, pd.DataFrame({
            "Source": ["AI Generated", "Human Written"],
            "Probability (%)": [0, 0]
        })
    
    text = text.strip()
    if not text or len(text.split()) < 15:
        return (
            "❗ Please enter some text with at least 15 words.",
            0.0,
            pd.DataFrame({
                "Source": ["AI Generated", "Human Written"],
                "Probability (%)": [0, 0]
            })
        )

    # Call software
    # ===========================================================
    global div_model, div_tokenizer, bi_model, bi_tokenizer

    # =====================================================================
    # DivEye features
    diveye_features = []
    # 1. Token log likelihoods
    tokens = div_tokenizer.encode(text, return_tensors="pt", truncation=True, max_length=1024).to(div_model.device)
    with torch.no_grad():
        outputs = div_model(tokens, labels=tokens)
    logits = outputs.logits
    shift_logits = logits[:, :-1, :].squeeze(0)
    shift_labels = tokens[:, 1:].squeeze(0)
    log_probs = torch.log_softmax(shift_logits.float(), dim=-1)
    token_log_likelihoods = log_probs[range(shift_labels.shape[0]), shift_labels].cpu().numpy()
    
    # 2. Surprisal
    surprisals = -token_log_likelihoods

    if len(surprisals) < 10 or len(token_log_likelihoods) < 3:
        diveye_features = [0.0] * 11
    
    s = np.array(surprisals)
    mean_s, std_s, var_s, skew_s, kurt_s = np.mean(s), np.std(s), np.var(s), skew(s), kurtosis(s)
    diff_s = np.diff(s)
    mean_diff, std_diff = np.mean(diff_s), np.std(diff_s)
    first_order_diff = np.diff(token_log_likelihoods)
    second_order_diff = np.diff(first_order_diff)
    var_2nd = np.var(second_order_diff)
    entropy_2nd = entropy(np.histogram(second_order_diff, bins=20, density=True)[0])
    autocorr_2nd = np.corrcoef(second_order_diff[:-1], second_order_diff[1:])[0, 1] if len(second_order_diff) > 1 else 0
    comp_ratio = len(zlib.compress(text.encode('utf-8'))) / len(text.encode('utf-8'))

    diveye_features = [mean_s, std_s, var_s, skew_s, kurt_s, mean_diff, std_diff, var_2nd, entropy_2nd, autocorr_2nd, comp_ratio]
    # =====================================================================

    # =====================================================================
    # BiScope features
    COMPLETION_PROMPT_ONLY = "Complete the following text: "
    prompt_ids = bi_tokenizer(COMPLETION_PROMPT_ONLY, return_tensors='pt').input_ids.to(bi_model.device)
    text_ids = bi_tokenizer(text, return_tensors='pt', max_length=2000, truncation=True).input_ids.to(bi_model.device)
    combined_ids = torch.cat([prompt_ids, text_ids], dim=1)
    text_slice = slice(prompt_ids.shape[1], combined_ids.shape[1])

    outputs = bi_model(input_ids=combined_ids)
    logits = outputs.logits
    targets = combined_ids[0][text_slice]

    fce_loss = CrossEntropyLoss(reduction='none')(
            logits[0, text_slice.start-1:text_slice.stop-1, :],
            targets
        ).detach().cpu().numpy()
    bce_loss = CrossEntropyLoss(reduction='none')(
            logits[0, text_slice, :],
            targets
        ).detach().cpu().numpy()
    
    biscope_features = []
    for p in range(1, 10):
        split = len(fce_loss) * p // 10
        fce_clipped = np.nan_to_num(np.clip(fce_loss[split:], -1e6, 1e6), nan=0.0, posinf=1e6, neginf=-1e6)
        bce_clipped = np.nan_to_num(np.clip(bce_loss[split:], -1e6, 1e6), nan=0.0, posinf=1e6, neginf=-1e6)
        biscope_features.extend([
            np.mean(fce_clipped), np.max(fce_clipped), np.min(fce_clipped), np.std(fce_clipped),
            np.mean(bce_clipped), np.max(bce_clipped), np.min(bce_clipped), np.std(bce_clipped)
        ])
    # =====================================================================    

    for f in biscope_features:
        diveye_features.append(f)

    ai_prob = model.predict_proba([diveye_features])[:, 1][0].item()

    # ===========================================================
    human_prob = 1 - ai_prob

    if ai_prob > 0.7:
        message = f"πŸ€– **Likely AI-generated** (Confidence: {ai_prob:.2%})"
    elif ai_prob > 0.5:
        message = f"⚠️ **Possibly AI-generated** (Confidence: {ai_prob:.2%})"
    else:
        message = f"βœ… **Likely Human-written** (Confidence: {human_prob:.2%})"

    bar_data = pd.DataFrame({
        "Source": ["AI Generated", "Human Written"],
        "Probability (%)": [ai_prob * 100, human_prob * 100]
    })

    return message, round(ai_prob, 3), bar_data

# ==========================================================
# Token from environment variable
token = os.getenv("HF_TOKEN") 
loaded = False

if not torch.cuda.is_available():
    loaded = False
    print("[DivEye] CUDA not available. Running on CPU.")

# Import necessary models and tokenizers
if torch.cuda.is_available():
    loaded = True
    model_name_div = "tiiuae/falcon-7b"
    model_name_bi = "google/gemma-1.1-2b-it"

    div_model = AutoModelForCausalLM.from_pretrained(model_name_div, torch_dtype=torch.float16, device_map="cuda:0", use_auth_token=token)
    div_tokenizer = AutoTokenizer.from_pretrained(model_name_div, use_fast=False, trust_remote_code=True, use_auth_token=token)

    bi_model = AutoModelForCausalLM.from_pretrained(model_name_bi, torch_dtype=torch.float16, device_map="cuda:1", use_auth_token=token)
    bi_tokenizer = AutoTokenizer.from_pretrained(model_name_bi, use_fast=False, trust_remote_code=True, use_auth_token=token)

    div_model.eval()
    bi_model.eval()


# Gradio app setup
with gr.Blocks(title="DivEye", theme=gr.themes.Soft()) as demo:
    gr.HTML("""
    <div style="display: flex; justify-content: space-between; align-items: center; padding: 1.5rem; background: #f0f4f8; border-radius: 12px; margin-bottom: 1rem;">
        <div style="text-align: left; max-width: 70%;">
            <h1 style="font-size: 2.2rem; color: #1a1a1a; margin-bottom: 0.5rem;">Diversity Boosts AI-generated Text Detection</h1>
            <h3 style="color: #1a1a1a; margin: 0;">Authors: Advik Raj Basani<sup style="color: red;">1</sup>, Pin-Yu Chen<sup style="color: red;">2</sup></h3>
            <p style="color: #444; font-size: 0.95rem; margin: 0.2rem 0;">
                <sup style="color: red;">1</sup> Birla Institute of Technology and Science, Goa &nbsp;&nbsp; <sup style="color: red;">2</sup> IBM Research, USA
            </p>
        </div>
        <div style="text-align: right;">
            <a href="https://openreview.net/forum?id=QuDDXJ47nq" target="_blank" style="text-decoration: none; margin-right: 0.5rem;">
                <button style="padding: 0.6rem 1rem; font-size: 1rem; border-radius: 8px; background-color: #3498db; color: white; border: none; cursor: pointer;">
                    πŸ“„ Paper
                </button>
            </a>
            <a href="https://github.com/IBM/diveye" target="_blank" style="text-decoration: none;">
                <button style="padding: 0.6rem 1rem; font-size: 1rem; border-radius: 8px; background-color: #2ecc71; color: white; border: none; cursor: pointer;">
                    πŸ™ GitHub
                </button>
            </a>
            <a href="https://diveye.vercel.app/" target="_blank" style="text-decoration: none;">
                <button style="padding: 0.6rem 1rem; font-size: 1rem; border-radius: 8px; background-color: #2ecc71; color: white; border: none; cursor: pointer;">
                    🌐 Website
                </button>
            </a>
        </div>
    </div>
    """)

    gr.HTML("""
    <div style="margin-bottom: 1.5rem; background: #eef6ff; padding: 1rem 1.5rem; border-radius: 8px;">
        <p style="color: #333; font-size: 1.05rem; margin: 0;">
            <strong style="color: black;">Abstract:</strong> Detecting AI-generated text is an increasing necessity to combat misuse of LLMs in domains such as education, business compliance, journalism, and social media, where synthetic fluency can mask misinformation or deception. Existing detectors often rely on likelihood-based heuristics or black-box classifiers, which struggle against high-quality generations and lack interpretability. In this work, we propose DivEye, a novel detection framework that captures how unpredictability fluctuates across a text using surprisal-based features. Motivated by the observation that human-authored text exhibits richer variability in lexical and structural unpredictability than LLM outputs, DivEye captures this signal through a set of interpretable statistical features. Our method outperforms existing zero-shot detectors by up to 33.2% and achieves competitive performance with fine-tuned baselines across multiple benchmarks. DivEye is robust to paraphrasing and adversarial attacks, generalizes well across domains and models, and improves the performance of existing detectors by up to 18.7% when used as an auxiliary signal. Beyond detection, DivEye provides interpretable insights into why a text is flagged, pointing to rhythmic unpredictability as a powerful and underexplored signal for LLM detection.
        </p>
    </div>
    """)

    gr.Markdown("## πŸ” DivEye")
    gr.Markdown("Enter text below to analyze the probability of it being AI-generated:")

    with gr.Row():
        with gr.Column(scale=2):
            text_input = gr.Textbox(
                label="Input Text",
                lines=10,
                placeholder="Type or paste text here... (15+ words recommended)",
            )
            analyze_button = gr.Button("πŸ” Analyze")

        with gr.Column(scale=1):
            result_output = gr.Markdown("Results will appear here.")
            probability_slider = gr.Slider(
                0, 1, value=0.5, step=0.01,
                label="AI Probability",
                interactive=False
            )
            bar_plot = gr.BarPlot(
                x="Source",
                y="Probability (%)",
                color="Source",
                y_lim=[0, 100],
                height=250,
                title="Confidence Levels"
            )

    gr.Markdown("## πŸ’‘ Examples")
    gr.Examples(
        examples=[
            "πŸ€– Motivated by the recent progress in generative models, we introduce a novel approach for generating images from textual descriptions using attention mechanisms. Our model utilizes a combination of recurrent neural networks and convolutional neural networks to capture intricate details and faithfully render the images described in the captions. Experimental results demonstrate the effectiveness of our proposed method in generating realistic and diverse images, showcasing the potential of leveraging attention for image generation tasks.",
            "πŸ€– Throughout history, some of the most groundbreaking scientific discoveries have occurred not as the result of meticulous planning, but through serendipityβ€”happy accidents that revealed something entirely unexpected. From penicillin to the microwave oven, serendipitous discoveries have changed the world in ways no experiment could have predicted.",
            "πŸ‘€ Many modern multiclass and multilabel problems are characterized by\nincreasingly large output spaces. For these problems, label embeddings have\nbeen shown to be a useful primitive that can improve computational and\nstatistical efficiency. In this work we utilize a correspondence between rank\nconstrained estimation and low dimensional label embeddings that uncovers a\nfast label embedding algorithm which works in both the multiclass and\nmultilabel settings. The result is a randomized algorithm for partial least\nsquares, whose running time is exponentially faster than naive algorithms. We\ndemonstrate our techniques on two large-scale public datasets, from the Large\nScale Hierarchical Text Challenge and the Open Directory Project, where we\nobtain state of the art results.",
            "πŸ‘€ So many times have I walked on ruins, the remainings of places that I loved and got used to.. At first I was scared, each time I could feel my city, my current generation collapse, break into the black hole that thrives within it, I could feel humanity, the way I'm able to feel my body.. After a few hundred years, the pattern became obvious, no longer the war and damage that would devastate me over and over again in the far past was effecting me so dominantly.\nIt's funny, but I felt as if after gaining what I desired so long, what I have lived for my entire life, only then, when I achieved immortality I started truly aging.\n5 world wars have passed, and now they feel like a simple sickeness that would pass by every so often, I could no longer evaluate the individual human as a being of its own, the importance of mortals is merely the same as the importance of my skin cells; They are a part of a mechanism so much more advanced, a mechanism that is so dear to my fallen heart a mechanism that I have seen fall and rise so many times, a mechanism that when lost all of which it had, had me loosing my will to live, for the first time in all of my thousands years of existence.",
            "πŸ€– In the span of just two decades, social media has transformed the way humans connect, share, and communicate. Platforms like Facebook, Twitter (now X), Instagram, TikTok, and WhatsApp have reshaped social interactions, dissolving geographic boundaries and allowing instant communication across the globe. Yet, while the benefits are substantial, this digital evolution also comes with significant social and psychological trade-offs."
        ],
        inputs=text_input,
        label="Click an example to test it instantly."
    )

    analyze_button.click(
        fn=detect_ai_text,
        inputs=text_input,
        outputs=[result_output, probability_slider, bar_plot]
    )

    text_input.change(
        fn=detect_ai_text,
        inputs=text_input,
        outputs=[result_output, probability_slider, bar_plot],
    )

# Run the app
if __name__ == "__main__":
    demo.queue()
    demo.launch()