minemaster01 commited on
Commit
132a2dd
·
verified ·
1 Parent(s): a37c9c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -40
app.py CHANGED
@@ -1,34 +1,39 @@
1
  import gradio as gr
2
- import datetime
3
- import torch
4
  import os
5
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
6
- from datasets import Dataset, DatasetDict, disable_caching
 
 
7
  import pandas as pd
8
- from huggingface_hub import HfApi, HfFolder
 
 
9
 
10
- # CONFIG
11
- MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english" # Change if needed
12
- HF_DATASET_REPO = "M2ai/mgtd-logs" # Must be created beforehand
13
- # Token from environment in Spaces
14
  HF_TOKEN = os.getenv("Mgtd")
15
- # Load model + tokenizer
 
 
16
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
17
  model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
18
- # Log entries
19
- log_entries = []
 
20
 
21
  def setup_hf_dataset():
22
  global DATASET_CREATED
23
  if not DATASET_CREATED and HF_TOKEN:
24
  try:
25
  api = HfApi()
26
- create_repo(DATASET_NAME, repo_type="dataset", token=HF_TOKEN, exist_ok=True)
27
  DATASET_CREATED = True
28
- print(f"Dataset {DATASET_NAME} is ready")
29
- except Exception as e: print(f"Error setting up dataset: {e}")
 
30
  elif not HF_TOKEN:
31
- print("Warning: HF_TOKEN not set. Data will be stored locally only.")
32
 
33
  def infer_and_log(text_input):
34
  inputs = tokenizer(text_input, return_tensors="pt", truncation=True)
@@ -36,39 +41,49 @@ def infer_and_log(text_input):
36
  outputs = model(**inputs)
37
  logits = outputs.logits.tolist()
38
  predicted = torch.argmax(outputs.logits, dim=-1).item()
39
- output_label = model.config.id2label[predicted]
40
 
41
- log_entries.append({
42
- "timestamp": datetime.datetime.now().isoformat(),
 
 
 
43
  "input": text_input,
44
- "logits": logits,
45
- })
46
 
47
- return output_label
 
 
48
 
49
- def clear_fields():
50
- return "", ""
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- def save_to_hf():
53
- if not HF_TOKEN:
54
- return "No Hugging Face token found in environment. Cannot push dataset."
55
 
56
- if not log_entries:
57
- return "No logs to push."
58
 
59
- df = pd.DataFrame(log_entries)
60
- dataset = Dataset.from_pandas(df)
61
-
62
- dataset.push_to_hub(HF_DATASET_REPO, token=HF_TOKEN)
63
- log_entries.clear()
64
- return f"Pushed {len(df)} logs to {HF_DATASET_REPO}!"
65
 
66
- with gr.Blocks() as demo:
67
- gr.Markdown("## AI-generated text detector")
68
 
69
  with gr.Row():
70
- input_box = gr.Textbox(label="Input Text", lines=18, interactive=True)
71
- output_box = gr.Textbox(label="Output Detected", lines=18)
72
 
73
  with gr.Row():
74
  submit_btn = gr.Button("Submit")
@@ -78,4 +93,4 @@ with gr.Blocks() as demo:
78
  clear_btn.click(fn=clear_fields, outputs=[input_box, output_box])
79
 
80
  if __name__ == "__main__":
81
- demo.launch()
 
1
  import gradio as gr
 
 
2
  import os
3
+ import json
4
+ import uuid
5
+ import torch
6
+ import datetime
7
  import pandas as pd
8
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
9
+ from huggingface_hub import HfApi, create_repo, upload_file
10
+ from datasets import Dataset
11
 
12
+ # Configuration
13
+ MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english"
14
+ HF_DATASET_REPO = "M2ai/mgtd-logs"
 
15
  HF_TOKEN = os.getenv("Mgtd")
16
+ DATASET_CREATED = False
17
+
18
+ # Load model and tokenizer
19
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
20
  model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
21
+
22
+ # Make directories
23
+ os.makedirs("logs", exist_ok=True)
24
 
25
  def setup_hf_dataset():
26
  global DATASET_CREATED
27
  if not DATASET_CREATED and HF_TOKEN:
28
  try:
29
  api = HfApi()
30
+ create_repo(HF_DATASET_REPO, repo_type="dataset", token=HF_TOKEN, exist_ok=True)
31
  DATASET_CREATED = True
32
+ print(f"Dataset {HF_DATASET_REPO} is ready")
33
+ except Exception as e:
34
+ print(f"Error setting up dataset: {e}")
35
  elif not HF_TOKEN:
36
+ print("Warning: HF_TOKEN not set. Logs will be saved locally only.")
37
 
38
  def infer_and_log(text_input):
39
  inputs = tokenizer(text_input, return_tensors="pt", truncation=True)
 
41
  outputs = model(**inputs)
42
  logits = outputs.logits.tolist()
43
  predicted = torch.argmax(outputs.logits, dim=-1).item()
44
+ label = model.config.id2label[predicted]
45
 
46
+ timestamp = datetime.datetime.now().isoformat()
47
+ submission_id = str(uuid.uuid4())
48
+ log_data = {
49
+ "id": submission_id,
50
+ "timestamp": timestamp,
51
  "input": text_input,
52
+ "logits": logits
53
+ }
54
 
55
+ log_file = f"logs/{timestamp.replace(':', '_')}.json"
56
+ with open(log_file, "w") as f:
57
+ json.dump(log_data, f, indent=2)
58
 
59
+ if HF_TOKEN and DATASET_CREATED:
60
+ try:
61
+ api = HfApi()
62
+ api.upload_file(
63
+ path_or_fileobj=log_file,
64
+ path_in_repo=f"logs/{os.path.basename(log_file)}",
65
+ repo_id=HF_DATASET_REPO,
66
+ repo_type="dataset",
67
+ token=HF_TOKEN
68
+ )
69
+ print(f"Uploaded log {submission_id} to {HF_DATASET_REPO}")
70
+ except Exception as e:
71
+ print(f"Error uploading to HF dataset: {e}")
72
 
73
+ return label
 
 
74
 
75
+ def clear_fields():
76
+ return "", ""
77
 
78
+ # Setup the dataset on startup
79
+ setup_hf_dataset()
 
 
 
 
80
 
81
+ with gr.Blocks() as app:
82
+ gr.Markdown("## AI Text Detector")
83
 
84
  with gr.Row():
85
+ input_box = gr.Textbox(label="Input Text", lines=10, interactive=True)
86
+ output_box = gr.Textbox(label="Output", lines=2, interactive=False)
87
 
88
  with gr.Row():
89
  submit_btn = gr.Button("Submit")
 
93
  clear_btn.click(fn=clear_fields, outputs=[input_box, output_box])
94
 
95
  if __name__ == "__main__":
96
+ app.launch()