|
import gradio as gr |
|
import os |
|
import json |
|
import uuid |
|
import torch |
|
import datetime |
|
import pandas as pd |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
from huggingface_hub import HfApi, create_repo, upload_file |
|
from datasets import Dataset |
|
|
|
|
|
MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english" |
|
HF_DATASET_REPO = "M2ai/mgtd-logs" |
|
HF_TOKEN = os.getenv("Mgtd") |
|
DATASET_CREATED = False |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
|
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) |
|
|
|
|
|
os.makedirs("logs", exist_ok=True) |
|
|
|
def setup_hf_dataset(): |
|
global DATASET_CREATED |
|
if not DATASET_CREATED and HF_TOKEN: |
|
try: |
|
api = HfApi() |
|
create_repo(HF_DATASET_REPO, repo_type="dataset", token=HF_TOKEN, exist_ok=True) |
|
DATASET_CREATED = True |
|
print(f"Dataset {HF_DATASET_REPO} is ready") |
|
except Exception as e: |
|
print(f"Error setting up dataset: {e}") |
|
elif not HF_TOKEN: |
|
print("Warning: HF_TOKEN not set. Logs will be saved locally only.") |
|
|
|
def infer_and_log(text_input): |
|
inputs = tokenizer(text_input, return_tensors="pt", truncation=True) |
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
logits = outputs.logits.tolist() |
|
predicted = torch.argmax(outputs.logits, dim=-1).item() |
|
label = model.config.id2label[predicted] |
|
|
|
timestamp = datetime.datetime.now().isoformat() |
|
submission_id = str(uuid.uuid4()) |
|
log_data = { |
|
"id": submission_id, |
|
"timestamp": timestamp, |
|
"input": text_input, |
|
"logits": logits |
|
} |
|
|
|
log_file = f"logs/{timestamp.replace(':', '_')}.json" |
|
with open(log_file, "w") as f: |
|
json.dump(log_data, f, indent=2) |
|
|
|
if HF_TOKEN and DATASET_CREATED: |
|
try: |
|
api = HfApi() |
|
api.upload_file( |
|
path_or_fileobj=log_file, |
|
path_in_repo=f"logs/{os.path.basename(log_file)}", |
|
repo_id=HF_DATASET_REPO, |
|
repo_type="dataset", |
|
token=HF_TOKEN |
|
) |
|
print(f"Uploaded log {submission_id} to {HF_DATASET_REPO}") |
|
except Exception as e: |
|
print(f"Error uploading to HF dataset: {e}") |
|
|
|
return label |
|
|
|
def clear_fields(): |
|
return "", "" |
|
|
|
|
|
setup_hf_dataset() |
|
|
|
with gr.Blocks() as app: |
|
gr.Markdown("## AI Text Detector") |
|
|
|
with gr.Row(): |
|
input_box = gr.Textbox(label="Input Text", lines=10, interactive=True) |
|
output_box = gr.Textbox(label="Output", lines=2, interactive=False) |
|
|
|
with gr.Row(): |
|
submit_btn = gr.Button("Submit") |
|
clear_btn = gr.Button("Clear") |
|
|
|
submit_btn.click(fn=infer_and_log, inputs=input_box, outputs=output_box) |
|
clear_btn.click(fn=clear_fields, outputs=[input_box, output_box]) |
|
|
|
if __name__ == "__main__": |
|
app.launch() |
|
|