Update app.py
Browse files
app.py
CHANGED
@@ -1,34 +1,39 @@
|
|
1 |
import gradio as gr
|
2 |
-
import datetime
|
3 |
-
import torch
|
4 |
import os
|
5 |
-
|
6 |
-
|
|
|
|
|
7 |
import pandas as pd
|
8 |
-
from
|
|
|
|
|
9 |
|
10 |
-
#
|
11 |
-
MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english"
|
12 |
-
HF_DATASET_REPO = "M2ai/mgtd-logs"
|
13 |
-
# Token from environment in Spaces
|
14 |
HF_TOKEN = os.getenv("Mgtd")
|
15 |
-
|
|
|
|
|
16 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
17 |
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
|
18 |
-
|
19 |
-
|
|
|
20 |
|
21 |
def setup_hf_dataset():
|
22 |
global DATASET_CREATED
|
23 |
if not DATASET_CREATED and HF_TOKEN:
|
24 |
try:
|
25 |
api = HfApi()
|
26 |
-
create_repo(
|
27 |
DATASET_CREATED = True
|
28 |
-
print(f"Dataset {
|
29 |
-
except Exception as e:
|
|
|
30 |
elif not HF_TOKEN:
|
31 |
-
print("Warning: HF_TOKEN not set.
|
32 |
|
33 |
def infer_and_log(text_input):
|
34 |
inputs = tokenizer(text_input, return_tensors="pt", truncation=True)
|
@@ -36,39 +41,49 @@ def infer_and_log(text_input):
|
|
36 |
outputs = model(**inputs)
|
37 |
logits = outputs.logits.tolist()
|
38 |
predicted = torch.argmax(outputs.logits, dim=-1).item()
|
39 |
-
|
40 |
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
43 |
"input": text_input,
|
44 |
-
"logits": logits
|
45 |
-
}
|
46 |
|
47 |
-
|
|
|
|
|
48 |
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
-
|
53 |
-
if not HF_TOKEN:
|
54 |
-
return "No Hugging Face token found in environment. Cannot push dataset."
|
55 |
|
56 |
-
|
57 |
-
|
58 |
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
dataset.push_to_hub(HF_DATASET_REPO, token=HF_TOKEN)
|
63 |
-
log_entries.clear()
|
64 |
-
return f"Pushed {len(df)} logs to {HF_DATASET_REPO}!"
|
65 |
|
66 |
-
with gr.Blocks() as
|
67 |
-
gr.Markdown("## AI
|
68 |
|
69 |
with gr.Row():
|
70 |
-
input_box = gr.Textbox(label="Input Text", lines=
|
71 |
-
output_box = gr.Textbox(label="Output
|
72 |
|
73 |
with gr.Row():
|
74 |
submit_btn = gr.Button("Submit")
|
@@ -78,4 +93,4 @@ with gr.Blocks() as demo:
|
|
78 |
clear_btn.click(fn=clear_fields, outputs=[input_box, output_box])
|
79 |
|
80 |
if __name__ == "__main__":
|
81 |
-
|
|
|
1 |
import gradio as gr
|
|
|
|
|
2 |
import os
|
3 |
+
import json
|
4 |
+
import uuid
|
5 |
+
import torch
|
6 |
+
import datetime
|
7 |
import pandas as pd
|
8 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
9 |
+
from huggingface_hub import HfApi, create_repo, upload_file
|
10 |
+
from datasets import Dataset
|
11 |
|
12 |
+
# Configuration
|
13 |
+
MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english"
|
14 |
+
HF_DATASET_REPO = "M2ai/mgtd-logs"
|
|
|
15 |
HF_TOKEN = os.getenv("Mgtd")
|
16 |
+
DATASET_CREATED = False
|
17 |
+
|
18 |
+
# Load model and tokenizer
|
19 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
20 |
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
|
21 |
+
|
22 |
+
# Make directories
|
23 |
+
os.makedirs("logs", exist_ok=True)
|
24 |
|
25 |
def setup_hf_dataset():
|
26 |
global DATASET_CREATED
|
27 |
if not DATASET_CREATED and HF_TOKEN:
|
28 |
try:
|
29 |
api = HfApi()
|
30 |
+
create_repo(HF_DATASET_REPO, repo_type="dataset", token=HF_TOKEN, exist_ok=True)
|
31 |
DATASET_CREATED = True
|
32 |
+
print(f"Dataset {HF_DATASET_REPO} is ready")
|
33 |
+
except Exception as e:
|
34 |
+
print(f"Error setting up dataset: {e}")
|
35 |
elif not HF_TOKEN:
|
36 |
+
print("Warning: HF_TOKEN not set. Logs will be saved locally only.")
|
37 |
|
38 |
def infer_and_log(text_input):
|
39 |
inputs = tokenizer(text_input, return_tensors="pt", truncation=True)
|
|
|
41 |
outputs = model(**inputs)
|
42 |
logits = outputs.logits.tolist()
|
43 |
predicted = torch.argmax(outputs.logits, dim=-1).item()
|
44 |
+
label = model.config.id2label[predicted]
|
45 |
|
46 |
+
timestamp = datetime.datetime.now().isoformat()
|
47 |
+
submission_id = str(uuid.uuid4())
|
48 |
+
log_data = {
|
49 |
+
"id": submission_id,
|
50 |
+
"timestamp": timestamp,
|
51 |
"input": text_input,
|
52 |
+
"logits": logits
|
53 |
+
}
|
54 |
|
55 |
+
log_file = f"logs/{timestamp.replace(':', '_')}.json"
|
56 |
+
with open(log_file, "w") as f:
|
57 |
+
json.dump(log_data, f, indent=2)
|
58 |
|
59 |
+
if HF_TOKEN and DATASET_CREATED:
|
60 |
+
try:
|
61 |
+
api = HfApi()
|
62 |
+
api.upload_file(
|
63 |
+
path_or_fileobj=log_file,
|
64 |
+
path_in_repo=f"logs/{os.path.basename(log_file)}",
|
65 |
+
repo_id=HF_DATASET_REPO,
|
66 |
+
repo_type="dataset",
|
67 |
+
token=HF_TOKEN
|
68 |
+
)
|
69 |
+
print(f"Uploaded log {submission_id} to {HF_DATASET_REPO}")
|
70 |
+
except Exception as e:
|
71 |
+
print(f"Error uploading to HF dataset: {e}")
|
72 |
|
73 |
+
return label
|
|
|
|
|
74 |
|
75 |
+
def clear_fields():
|
76 |
+
return "", ""
|
77 |
|
78 |
+
# Setup the dataset on startup
|
79 |
+
setup_hf_dataset()
|
|
|
|
|
|
|
|
|
80 |
|
81 |
+
with gr.Blocks() as app:
|
82 |
+
gr.Markdown("## AI Text Detector")
|
83 |
|
84 |
with gr.Row():
|
85 |
+
input_box = gr.Textbox(label="Input Text", lines=10, interactive=True)
|
86 |
+
output_box = gr.Textbox(label="Output", lines=2, interactive=False)
|
87 |
|
88 |
with gr.Row():
|
89 |
submit_btn = gr.Button("Submit")
|
|
|
93 |
clear_btn.click(fn=clear_fields, outputs=[input_box, output_box])
|
94 |
|
95 |
if __name__ == "__main__":
|
96 |
+
app.launch()
|