File size: 7,634 Bytes
3477806
 
 
 
 
2a089a8
3477806
 
 
 
2a089a8
3477806
 
 
2a089a8
3477806
2a089a8
decced4
2a089a8
decced4
2a089a8
 
 
decced4
2a089a8
decced4
2a089a8
 
decced4
 
 
2a089a8
decced4
65cb2f2
2a089a8
 
 
 
 
 
 
 
 
3477806
 
 
2a089a8
 
 
 
 
 
3477806
 
 
 
 
 
 
 
2a089a8
3477806
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2a089a8
3477806
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2a089a8
 
 
3477806
 
 
 
 
2a089a8
3477806
 
 
2a089a8
3477806
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2a089a8
3477806
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
"""
  Orify Text Detector  – Space edition (Zero-GPU ready)

  β€’ Three ModernBERT-base checkpoints (soft-vote)
  β€’ Per-line colour coding, probability tool-tips, top-3 AI model hints
  β€’ Weights auto-downloaded once from the model repo and cached
"""

# ── Imports ──────────────────────────────────────────────────────────────
from pathlib import Path
import re, os, torch, gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from huggingface_hub import hf_hub_download
import spaces
import typing                           # ← fix: use typing.Any

# ──────────────────  robust torch.compile shim  ──────────────────────────
if hasattr(torch, "compile"):
    def _no_compile(model: typing.Any = None, *args, **kwargs):
        """
        1. torch.compile(model, …)          β†’ return the model unchanged
        2. torch.compile(**kw) (decorator) β†’ return a decorator that
           immediately gives back the class/function it decorates
        """
        if callable(model):                     # pattern 1
            return model

        def decorator(fn):                      # pattern 2
            return fn
        return decorator

    torch.compile = _no_compile
    os.environ["TORCHINDUCTOR_DISABLED"] = "1"

# ── Configuration ────────────────────────────────────────────────────────
DEVICE       = torch.device("cuda" if torch.cuda.is_available() else "cpu")
WEIGHT_REPO  = "Sleepyriizi/Orify-Text-Detection-Weights"
FILE_MAP     = {
    "ensamble_1":      "ensamble_1",
    "ensamble_2.bin":  "ensamble_2.bin",
    "ensamble_3":      "ensamble_3",
}

BASE_MODEL_NAME = "answerdotai/ModernBERT-base"
NUM_LABELS      = 41

LABELS = {  # id β†’ friendly label
    0: "13B", 1: "30B", 2: "65B", 3: "7B", 4: "GLM130B", 5: "bloom_7b",
    6: "bloomz", 7: "cohere", 8: "davinci", 9: "dolly", 10: "dolly-v2-12b",
    11: "flan_t5_base", 12: "flan_t5_large", 13: "flan_t5_small",
    14: "flan_t5_xl", 15: "flan_t5_xxl", 16: "gemma-7b-it",
    17: "gemma2-9b-it", 18: "gpt-3.5-turbo", 19: "gpt-35", 20: "gpt-4",
    21: "gpt-4o", 22: "gpt-j", 23: "gpt-neox", 24: "human",
    25: "llama3-70b", 26: "llama3-8b", 27: "mixtral-8x7b",
    28: "opt-1.3b", 29: "opt-125m", 30: "opt-13b",
    31: "opt-2.7b", 32: "opt-30b", 33: "opt-350m",
    34: "opt-6.7b", 35: "opt-iml-30b", 36: "opt-iml-max-1.3b",
    37: "t0-11b", 38: "t0-3b", 39: "text-davinci-002", 40: "text-davinci-003"
}

# ── CSS (unchanged) ──────────────────────────────────────────────────────
CSS = Path(__file__).with_name("style.css").read_text() if Path(__file__).with_name("style.css").exists() else """
:root{--clr-ai:#ff4d4f;--clr-human:#52c41a;--border:2px solid var(--clr-ai);--radius:10px}
body{font-family:'Roboto Mono',monospace;margin:0 auto;max-width:900px;padding:32px}
textarea,.output-box{width:100%;box-sizing:border-box;padding:16px;font-size:1rem;border:var(--border);border-radius:var(--radius)}
.output-box{min-height:160px}.ai-line{background:rgba(255,77,79,.12);padding:2px 4px;border-radius:4px}
.human-line{background:rgba(82,196,26,.12);padding:2px 4px;border-radius:4px}
.prob-tooltip{cursor:help;border-bottom:1px dotted currentColor}
"""

# ── Model loading (download once, then cached) ───────────────────────────
print("πŸ”„ Downloading weights …")
local_paths = {alias: hf_hub_download(WEIGHT_REPO, fname, resume_download=True)
               for alias, fname in FILE_MAP.items()}

print("🧩 Loading tokenizer & models …")
tokeniser = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)

models = []
for _, path in local_paths.items():
    net = AutoModelForSequenceClassification.from_pretrained(
              BASE_MODEL_NAME, num_labels=NUM_LABELS)
    net.load_state_dict(torch.load(path, map_location=DEVICE))
    net.to(DEVICE).eval()
    models.append(net)

# ── Helpers ──────────────────────────────────────────────────────────────
def tidy(txt: str) -> str:
    txt = txt.replace("\r\n", "\n").replace("\r", "\n")
    txt = re.sub(r"\n\s*\n+", "\n\n", txt)
    txt = re.sub(r"[ \t]+", " ", txt)
    txt = re.sub(r"(\w+)-\n(\w+)", r"\1\2", txt)
    txt = re.sub(r"(?<!\n)\n(?!\n)", " ", txt)
    return txt.strip()

def infer(segment: str):
    """Return (human%, ai%, list of top-3 AI model names)."""
    inputs = tokeniser(segment, return_tensors="pt",
                       truncation=True, padding=True).to(DEVICE)
    with torch.no_grad():
        probs = torch.stack([
            torch.softmax(m(**inputs).logits, dim=1) for m in models
        ]).mean(dim=0)[0]

    ai_probs = probs.clone(); ai_probs[24] = 0
    ai_score   = ai_probs.sum().item() * 100
    human_score = 100 - ai_score
    top3 = torch.topk(ai_probs, 3).indices.tolist()
    return human_score, ai_score, [LABELS[i] for i in top3]

# ── Inference + explanation ──────────────────────────────────────────────
@spaces.GPU
def analyse(text: str):
    if not text.strip():
        return "✏️ Please paste or type some text to analyse…"

    lines = tidy(text).split("\n")
    highlighted, h_tot, ai_tot, n = [], 0.0, 0.0, 0

    for ln in lines:
        if not ln.strip():
            highlighted.append("<br>")
            continue
        n += 1
        h, ai, top3 = infer(ln)
        h_tot += h; ai_tot += ai
        tooltip = (f"AI {ai:.2f}% β€’ Top-3: {', '.join(top3)}"
                   if ai > h else f"Human {h:.2f}%")
        cls = "ai-line" if ai > h else "human-line"
        span = (f"<span class='{cls} prob-tooltip' title='{tooltip}'>"
                f"{gr.utils.sanitize_html(ln)}</span>")
        highlighted.append(span)

    verdict = (f"<p><strong>Overall verdict:</strong> "
               f"<span class='human-line' style='padding:4px 8px;'>"
               f"Human-written {h_tot/n:.2f}%</span>"
               if h_tot >= ai_tot else
               f"<p><strong>Overall verdict:</strong> "
               f"<span class='ai-line' style='padding:4px 8px;'>"
               f"AI-generated {ai_tot/n:.2f}%</span>")
    return verdict + "<hr>" + "<br>".join(highlighted)

# ── Gradio interface ─────────────────────────────────────────────────────
with gr.Blocks(css=CSS, title="Orify Text Detector") as demo:
    gr.Markdown("""
    ### Orify Text Detector  
    Paste any English text and press **Analyse**.  
    <span class='human-line'>Green</span> = human | <span class='ai-line'>Red</span> = AI.  
    Hover a line to see confidence and the top-3 AI models it resembles.
    """)
    inp = gr.Textbox(lines=8, placeholder="Paste text here …",
                     elem_classes=["input-area"])
    out = gr.HTML("", elem_classes=["output-box"])
    gr.Button("Analyse").click(analyse, inp, out)
    gr.Markdown("<sub>Powered by ModernBERT + Orify Ensemble Β© 2025</sub>")

if __name__ == "__main__":
    demo.launch()