Spaces:
Sleeping
Sleeping
File size: 7,634 Bytes
3477806 2a089a8 3477806 2a089a8 3477806 2a089a8 3477806 2a089a8 decced4 2a089a8 decced4 2a089a8 decced4 2a089a8 decced4 2a089a8 decced4 2a089a8 decced4 65cb2f2 2a089a8 3477806 2a089a8 3477806 2a089a8 3477806 2a089a8 3477806 2a089a8 3477806 2a089a8 3477806 2a089a8 3477806 2a089a8 3477806 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
"""
Orify Text Detector β Space edition (Zero-GPU ready)
β’ Three ModernBERT-base checkpoints (soft-vote)
β’ Per-line colour coding, probability tool-tips, top-3 AI model hints
β’ Weights auto-downloaded once from the model repo and cached
"""
# ββ Imports ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
from pathlib import Path
import re, os, torch, gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from huggingface_hub import hf_hub_download
import spaces
import typing # β fix: use typing.Any
# ββββββββββββββββββ robust torch.compile shim ββββββββββββββββββββββββββ
if hasattr(torch, "compile"):
def _no_compile(model: typing.Any = None, *args, **kwargs):
"""
1. torch.compile(model, β¦) β return the model unchanged
2. torch.compile(**kw) (decorator) β return a decorator that
immediately gives back the class/function it decorates
"""
if callable(model): # pattern 1
return model
def decorator(fn): # pattern 2
return fn
return decorator
torch.compile = _no_compile
os.environ["TORCHINDUCTOR_DISABLED"] = "1"
# ββ Configuration ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
WEIGHT_REPO = "Sleepyriizi/Orify-Text-Detection-Weights"
FILE_MAP = {
"ensamble_1": "ensamble_1",
"ensamble_2.bin": "ensamble_2.bin",
"ensamble_3": "ensamble_3",
}
BASE_MODEL_NAME = "answerdotai/ModernBERT-base"
NUM_LABELS = 41
LABELS = { # id β friendly label
0: "13B", 1: "30B", 2: "65B", 3: "7B", 4: "GLM130B", 5: "bloom_7b",
6: "bloomz", 7: "cohere", 8: "davinci", 9: "dolly", 10: "dolly-v2-12b",
11: "flan_t5_base", 12: "flan_t5_large", 13: "flan_t5_small",
14: "flan_t5_xl", 15: "flan_t5_xxl", 16: "gemma-7b-it",
17: "gemma2-9b-it", 18: "gpt-3.5-turbo", 19: "gpt-35", 20: "gpt-4",
21: "gpt-4o", 22: "gpt-j", 23: "gpt-neox", 24: "human",
25: "llama3-70b", 26: "llama3-8b", 27: "mixtral-8x7b",
28: "opt-1.3b", 29: "opt-125m", 30: "opt-13b",
31: "opt-2.7b", 32: "opt-30b", 33: "opt-350m",
34: "opt-6.7b", 35: "opt-iml-30b", 36: "opt-iml-max-1.3b",
37: "t0-11b", 38: "t0-3b", 39: "text-davinci-002", 40: "text-davinci-003"
}
# ββ CSS (unchanged) ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
CSS = Path(__file__).with_name("style.css").read_text() if Path(__file__).with_name("style.css").exists() else """
:root{--clr-ai:#ff4d4f;--clr-human:#52c41a;--border:2px solid var(--clr-ai);--radius:10px}
body{font-family:'Roboto Mono',monospace;margin:0 auto;max-width:900px;padding:32px}
textarea,.output-box{width:100%;box-sizing:border-box;padding:16px;font-size:1rem;border:var(--border);border-radius:var(--radius)}
.output-box{min-height:160px}.ai-line{background:rgba(255,77,79,.12);padding:2px 4px;border-radius:4px}
.human-line{background:rgba(82,196,26,.12);padding:2px 4px;border-radius:4px}
.prob-tooltip{cursor:help;border-bottom:1px dotted currentColor}
"""
# ββ Model loading (download once, then cached) βββββββββββββββββββββββββββ
print("π Downloading weights β¦")
local_paths = {alias: hf_hub_download(WEIGHT_REPO, fname, resume_download=True)
for alias, fname in FILE_MAP.items()}
print("π§© Loading tokenizer & models β¦")
tokeniser = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
models = []
for _, path in local_paths.items():
net = AutoModelForSequenceClassification.from_pretrained(
BASE_MODEL_NAME, num_labels=NUM_LABELS)
net.load_state_dict(torch.load(path, map_location=DEVICE))
net.to(DEVICE).eval()
models.append(net)
# ββ Helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def tidy(txt: str) -> str:
txt = txt.replace("\r\n", "\n").replace("\r", "\n")
txt = re.sub(r"\n\s*\n+", "\n\n", txt)
txt = re.sub(r"[ \t]+", " ", txt)
txt = re.sub(r"(\w+)-\n(\w+)", r"\1\2", txt)
txt = re.sub(r"(?<!\n)\n(?!\n)", " ", txt)
return txt.strip()
def infer(segment: str):
"""Return (human%, ai%, list of top-3 AI model names)."""
inputs = tokeniser(segment, return_tensors="pt",
truncation=True, padding=True).to(DEVICE)
with torch.no_grad():
probs = torch.stack([
torch.softmax(m(**inputs).logits, dim=1) for m in models
]).mean(dim=0)[0]
ai_probs = probs.clone(); ai_probs[24] = 0
ai_score = ai_probs.sum().item() * 100
human_score = 100 - ai_score
top3 = torch.topk(ai_probs, 3).indices.tolist()
return human_score, ai_score, [LABELS[i] for i in top3]
# ββ Inference + explanation ββββββββββββββββββββββββββββββββββββββββββββββ
@spaces.GPU
def analyse(text: str):
if not text.strip():
return "βοΈ Please paste or type some text to analyseβ¦"
lines = tidy(text).split("\n")
highlighted, h_tot, ai_tot, n = [], 0.0, 0.0, 0
for ln in lines:
if not ln.strip():
highlighted.append("<br>")
continue
n += 1
h, ai, top3 = infer(ln)
h_tot += h; ai_tot += ai
tooltip = (f"AI {ai:.2f}% β’ Top-3: {', '.join(top3)}"
if ai > h else f"Human {h:.2f}%")
cls = "ai-line" if ai > h else "human-line"
span = (f"<span class='{cls} prob-tooltip' title='{tooltip}'>"
f"{gr.utils.sanitize_html(ln)}</span>")
highlighted.append(span)
verdict = (f"<p><strong>Overall verdict:</strong> "
f"<span class='human-line' style='padding:4px 8px;'>"
f"Human-written {h_tot/n:.2f}%</span>"
if h_tot >= ai_tot else
f"<p><strong>Overall verdict:</strong> "
f"<span class='ai-line' style='padding:4px 8px;'>"
f"AI-generated {ai_tot/n:.2f}%</span>")
return verdict + "<hr>" + "<br>".join(highlighted)
# ββ Gradio interface βββββββββββββββββββββββββββββββββββββββββββββββββββββ
with gr.Blocks(css=CSS, title="Orify Text Detector") as demo:
gr.Markdown("""
### Orify Text Detector
Paste any English text and press **Analyse**.
<span class='human-line'>Green</span> = humanβ|β<span class='ai-line'>Red</span> = AI.
Hover a line to see confidence and the top-3 AI models it resembles.
""")
inp = gr.Textbox(lines=8, placeholder="Paste text here β¦",
elem_classes=["input-area"])
out = gr.HTML("", elem_classes=["output-box"])
gr.Button("Analyse").click(analyse, inp, out)
gr.Markdown("<sub>Powered by ModernBERT + Orify Ensemble Β© 2025</sub>")
if __name__ == "__main__":
demo.launch()
|