Spaces:
Running
Running
Commit
·
8092547
1
Parent(s):
8dc0e4d
fix-uid-bug-and-add-debug-section (#23)
Browse files- fix uid reload and show log files (6e92592af2988e14cab1358ac455a313679bef52)
- fix typo (97c6f8e64fe737eb05234ccd746d062163c7fbe9)
- add closing files (a2810d675bbc1566b5a57b99774675ba1dc2d527)
- clean up code (94e80555ec10c4ab120d5abaa3f16508d55330ee)
Co-authored-by: zcy <[email protected]>
- app.py +3 -0
- app_debug.py +37 -0
- app_text_classification.py +19 -11
- io_utils.py +12 -1
- text_classification.py +0 -1
- text_classification_ui_helpers.py +1 -1
app.py
CHANGED
|
@@ -3,6 +3,7 @@ import gradio as gr
|
|
| 3 |
|
| 4 |
from app_leaderboard import get_demo as get_demo_leaderboard
|
| 5 |
from app_text_classification import get_demo as get_demo_text_classification
|
|
|
|
| 6 |
from run_jobs import start_process_run_job, stop_thread
|
| 7 |
|
| 8 |
try:
|
|
@@ -11,6 +12,8 @@ try:
|
|
| 11 |
get_demo_text_classification(demo)
|
| 12 |
with gr.Tab("Leaderboard"):
|
| 13 |
get_demo_leaderboard()
|
|
|
|
|
|
|
| 14 |
|
| 15 |
start_process_run_job()
|
| 16 |
|
|
|
|
| 3 |
|
| 4 |
from app_leaderboard import get_demo as get_demo_leaderboard
|
| 5 |
from app_text_classification import get_demo as get_demo_text_classification
|
| 6 |
+
from app_debug import get_demo as get_demo_debug
|
| 7 |
from run_jobs import start_process_run_job, stop_thread
|
| 8 |
|
| 9 |
try:
|
|
|
|
| 12 |
get_demo_text_classification(demo)
|
| 13 |
with gr.Tab("Leaderboard"):
|
| 14 |
get_demo_leaderboard()
|
| 15 |
+
with gr.Tab("Logs(Debug)"):
|
| 16 |
+
get_demo_debug(demo)
|
| 17 |
|
| 18 |
start_process_run_job()
|
| 19 |
|
app_debug.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pipe
|
| 3 |
+
from os import listdir
|
| 4 |
+
from os.path import isfile, join
|
| 5 |
+
LOG_PATH = "./tmp"
|
| 6 |
+
CONFIG_PATH = "./cicd/configs"
|
| 7 |
+
|
| 8 |
+
def get_accordions_of_files(path, files):
|
| 9 |
+
components = []
|
| 10 |
+
for file in files:
|
| 11 |
+
with gr.Row():
|
| 12 |
+
with gr.Accordion(label=file, open=False):
|
| 13 |
+
with gr.Row():
|
| 14 |
+
with open(join(path, file), "r") as f:
|
| 15 |
+
gr.Markdown(f.read())
|
| 16 |
+
return components
|
| 17 |
+
|
| 18 |
+
def get_accordions_of_log_files():
|
| 19 |
+
log_files = [f for f in listdir(LOG_PATH) if isfile(join(LOG_PATH, f)) and f.endswith("_log")]
|
| 20 |
+
return get_accordions_of_files(LOG_PATH, log_files)
|
| 21 |
+
|
| 22 |
+
def get_accordions_of_config_files():
|
| 23 |
+
config_files = [f for f in listdir(CONFIG_PATH) if isfile(join(CONFIG_PATH, f)) and f.endswith(".yaml")]
|
| 24 |
+
return get_accordions_of_files(CONFIG_PATH, config_files)
|
| 25 |
+
|
| 26 |
+
def get_demo(demo):
|
| 27 |
+
with gr.Row():
|
| 28 |
+
# check if jobs is an attribute of pipe
|
| 29 |
+
if hasattr(pipe, "jobs"):
|
| 30 |
+
gr.Markdown(f"current jobs in queue: {len(pipe.jobs)}")
|
| 31 |
+
with gr.Accordion(label="Config Files", open=False):
|
| 32 |
+
config_accordion = get_accordions_of_config_files()
|
| 33 |
+
demo.load(get_accordions_of_config_files, outputs=config_accordion, every=1)
|
| 34 |
+
with gr.Accordion(label="Log Files", open=False):
|
| 35 |
+
log_accordions = get_accordions_of_log_files()
|
| 36 |
+
demo.load(get_accordions_of_log_files, outputs=log_accordions, every=1)
|
| 37 |
+
|
app_text_classification.py
CHANGED
|
@@ -27,11 +27,10 @@ CONFIG_PATH = "./config.yaml"
|
|
| 27 |
|
| 28 |
|
| 29 |
def get_demo(demo):
|
| 30 |
-
uid = uuid.uuid4()
|
| 31 |
with gr.Row():
|
| 32 |
gr.Markdown(INTRODUCTION_MD)
|
| 33 |
uid_label = gr.Textbox(
|
| 34 |
-
label="Evaluation ID:", value=
|
| 35 |
)
|
| 36 |
with gr.Row():
|
| 37 |
model_id_input = gr.Textbox(
|
|
@@ -70,19 +69,28 @@ def get_demo(demo):
|
|
| 70 |
|
| 71 |
with gr.Accordion(label="Model Wrap Advance Config (optional)", open=False):
|
| 72 |
run_local = gr.Checkbox(value=True, label="Run in this Space")
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
inference_token = gr.Textbox(value="", label="HF Token for Inference API", visible=False, interactive=True)
|
| 76 |
|
| 77 |
with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
|
| 78 |
-
selected = read_scanners(uid)
|
| 79 |
-
# currently we remove data_leakage from the default scanners
|
| 80 |
-
# Reason: data_leakage barely raises any issues and takes too many requests
|
| 81 |
-
# when using inference API, causing rate limit error
|
| 82 |
-
scan_config = selected + ["data_leakage"]
|
| 83 |
scanners = gr.CheckboxGroup(
|
| 84 |
-
|
| 85 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
with gr.Row():
|
| 88 |
run_btn = gr.Button(
|
|
@@ -98,7 +106,7 @@ def get_demo(demo):
|
|
| 98 |
|
| 99 |
dataset_id_input.change(
|
| 100 |
check_dataset_and_get_config,
|
| 101 |
-
inputs=[dataset_id_input
|
| 102 |
)
|
| 103 |
|
| 104 |
dataset_config_input.change(
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
def get_demo(demo):
|
|
|
|
| 30 |
with gr.Row():
|
| 31 |
gr.Markdown(INTRODUCTION_MD)
|
| 32 |
uid_label = gr.Textbox(
|
| 33 |
+
label="Evaluation ID:", value=uuid.uuid4, visible=False, interactive=False
|
| 34 |
)
|
| 35 |
with gr.Row():
|
| 36 |
model_id_input = gr.Textbox(
|
|
|
|
| 69 |
|
| 70 |
with gr.Accordion(label="Model Wrap Advance Config (optional)", open=False):
|
| 71 |
run_local = gr.Checkbox(value=True, label="Run in this Space")
|
| 72 |
+
run_inference = gr.Checkbox(value="False", label="Run with Inference API")
|
| 73 |
+
@gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[run_inference])
|
| 74 |
+
def get_run_mode(uid):
|
| 75 |
+
return (
|
| 76 |
+
gr.update(value=read_inference_type(uid) == "hf_inference_api" and not run_local.value)
|
| 77 |
+
)
|
| 78 |
inference_token = gr.Textbox(value="", label="HF Token for Inference API", visible=False, interactive=True)
|
| 79 |
|
| 80 |
with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
scanners = gr.CheckboxGroup(
|
| 82 |
+
label="Scan Settings", visible=True
|
| 83 |
)
|
| 84 |
+
@gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[scanners])
|
| 85 |
+
def get_scanners(uid):
|
| 86 |
+
selected = read_scanners(uid)
|
| 87 |
+
# currently we remove data_leakage from the default scanners
|
| 88 |
+
# Reason: data_leakage barely raises any issues and takes too many requests
|
| 89 |
+
# when using inference API, causing rate limit error
|
| 90 |
+
scan_config = selected + ["data_leakage"]
|
| 91 |
+
return (gr.update(
|
| 92 |
+
choices=scan_config, value=selected, label="Scan Settings", visible=True
|
| 93 |
+
))
|
| 94 |
|
| 95 |
with gr.Row():
|
| 96 |
run_btn = gr.Button(
|
|
|
|
| 106 |
|
| 107 |
dataset_id_input.change(
|
| 108 |
check_dataset_and_get_config,
|
| 109 |
+
inputs=[dataset_id_input], outputs=[dataset_config_input]
|
| 110 |
)
|
| 111 |
|
| 112 |
dataset_config_input.change(
|
io_utils.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
import os
|
| 2 |
import subprocess
|
| 3 |
-
import gradio as gr
|
| 4 |
import yaml
|
| 5 |
|
| 6 |
import pipe
|
|
@@ -28,6 +27,7 @@ def read_scanners(uid):
|
|
| 28 |
with open(get_yaml_path(uid), "r") as f:
|
| 29 |
config = yaml.load(f, Loader=yaml.FullLoader)
|
| 30 |
scanners = config.get("detectors", [])
|
|
|
|
| 31 |
return scanners
|
| 32 |
|
| 33 |
|
|
@@ -37,9 +37,12 @@ def write_scanners(scanners, uid):
|
|
| 37 |
config = yaml.load(f, Loader=yaml.FullLoader)
|
| 38 |
if config:
|
| 39 |
config["detectors"] = scanners
|
|
|
|
| 40 |
# save scanners to detectors in yaml
|
| 41 |
with open(get_yaml_path(uid), "w") as f:
|
| 42 |
yaml.dump(config, f, Dumper=Dumper)
|
|
|
|
|
|
|
| 43 |
|
| 44 |
|
| 45 |
# read model_type from yaml file
|
|
@@ -48,6 +51,7 @@ def read_inference_type(uid):
|
|
| 48 |
with open(get_yaml_path(uid), "r") as f:
|
| 49 |
config = yaml.load(f, Loader=yaml.FullLoader)
|
| 50 |
inference_type = config.get("inference_type", "")
|
|
|
|
| 51 |
return inference_type
|
| 52 |
|
| 53 |
|
|
@@ -62,9 +66,11 @@ def write_inference_type(use_inference, inference_token, uid):
|
|
| 62 |
config["inference_type"] = "hf_pipeline"
|
| 63 |
# FIXME: A quick and temp fix for missing token
|
| 64 |
config["inference_token"] = ""
|
|
|
|
| 65 |
# save inference_type to inference_type in yaml
|
| 66 |
with open(get_yaml_path(uid), "w") as f:
|
| 67 |
yaml.dump(config, f, Dumper=Dumper)
|
|
|
|
| 68 |
|
| 69 |
|
| 70 |
|
|
@@ -75,6 +81,7 @@ def read_column_mapping(uid):
|
|
| 75 |
config = yaml.load(f, Loader=yaml.FullLoader)
|
| 76 |
if config:
|
| 77 |
column_mapping = config.get("column_mapping", dict())
|
|
|
|
| 78 |
return column_mapping
|
| 79 |
|
| 80 |
|
|
@@ -82,6 +89,7 @@ def read_column_mapping(uid):
|
|
| 82 |
def write_column_mapping(mapping, uid):
|
| 83 |
with open(get_yaml_path(uid), "r") as f:
|
| 84 |
config = yaml.load(f, Loader=yaml.FullLoader)
|
|
|
|
| 85 |
|
| 86 |
if config is None:
|
| 87 |
return
|
|
@@ -92,6 +100,8 @@ def write_column_mapping(mapping, uid):
|
|
| 92 |
|
| 93 |
with open(get_yaml_path(uid), "w") as f:
|
| 94 |
yaml.dump(config, f, Dumper=Dumper)
|
|
|
|
|
|
|
| 95 |
|
| 96 |
|
| 97 |
# convert column mapping dataframe to json
|
|
@@ -114,6 +124,7 @@ def get_logs_file(uid):
|
|
| 114 |
def write_log_to_user_file(id, log):
|
| 115 |
with open(f"./tmp/{id}_log", "a") as f:
|
| 116 |
f.write(log)
|
|
|
|
| 117 |
|
| 118 |
|
| 119 |
def save_job_to_pipe(id, job, lock):
|
|
|
|
| 1 |
import os
|
| 2 |
import subprocess
|
|
|
|
| 3 |
import yaml
|
| 4 |
|
| 5 |
import pipe
|
|
|
|
| 27 |
with open(get_yaml_path(uid), "r") as f:
|
| 28 |
config = yaml.load(f, Loader=yaml.FullLoader)
|
| 29 |
scanners = config.get("detectors", [])
|
| 30 |
+
f.close()
|
| 31 |
return scanners
|
| 32 |
|
| 33 |
|
|
|
|
| 37 |
config = yaml.load(f, Loader=yaml.FullLoader)
|
| 38 |
if config:
|
| 39 |
config["detectors"] = scanners
|
| 40 |
+
f.close()
|
| 41 |
# save scanners to detectors in yaml
|
| 42 |
with open(get_yaml_path(uid), "w") as f:
|
| 43 |
yaml.dump(config, f, Dumper=Dumper)
|
| 44 |
+
f.close()
|
| 45 |
+
|
| 46 |
|
| 47 |
|
| 48 |
# read model_type from yaml file
|
|
|
|
| 51 |
with open(get_yaml_path(uid), "r") as f:
|
| 52 |
config = yaml.load(f, Loader=yaml.FullLoader)
|
| 53 |
inference_type = config.get("inference_type", "")
|
| 54 |
+
f.close()
|
| 55 |
return inference_type
|
| 56 |
|
| 57 |
|
|
|
|
| 66 |
config["inference_type"] = "hf_pipeline"
|
| 67 |
# FIXME: A quick and temp fix for missing token
|
| 68 |
config["inference_token"] = ""
|
| 69 |
+
f.close()
|
| 70 |
# save inference_type to inference_type in yaml
|
| 71 |
with open(get_yaml_path(uid), "w") as f:
|
| 72 |
yaml.dump(config, f, Dumper=Dumper)
|
| 73 |
+
f.close()
|
| 74 |
|
| 75 |
|
| 76 |
|
|
|
|
| 81 |
config = yaml.load(f, Loader=yaml.FullLoader)
|
| 82 |
if config:
|
| 83 |
column_mapping = config.get("column_mapping", dict())
|
| 84 |
+
f.close()
|
| 85 |
return column_mapping
|
| 86 |
|
| 87 |
|
|
|
|
| 89 |
def write_column_mapping(mapping, uid):
|
| 90 |
with open(get_yaml_path(uid), "r") as f:
|
| 91 |
config = yaml.load(f, Loader=yaml.FullLoader)
|
| 92 |
+
f.close()
|
| 93 |
|
| 94 |
if config is None:
|
| 95 |
return
|
|
|
|
| 100 |
|
| 101 |
with open(get_yaml_path(uid), "w") as f:
|
| 102 |
yaml.dump(config, f, Dumper=Dumper)
|
| 103 |
+
f.close()
|
| 104 |
+
|
| 105 |
|
| 106 |
|
| 107 |
# convert column mapping dataframe to json
|
|
|
|
| 124 |
def write_log_to_user_file(id, log):
|
| 125 |
with open(f"./tmp/{id}_log", "a") as f:
|
| 126 |
f.write(log)
|
| 127 |
+
f.close()
|
| 128 |
|
| 129 |
|
| 130 |
def save_job_to_pipe(id, job, lock):
|
text_classification.py
CHANGED
|
@@ -171,7 +171,6 @@ def infer_output_label_column(
|
|
| 171 |
str(i): id2label_mapping[label]
|
| 172 |
for i, label in zip(id2label.keys(), dataset_labels)
|
| 173 |
}
|
| 174 |
-
# print('>>>>> column_mapping >>>>>', column_mapping)
|
| 175 |
|
| 176 |
id2label_df = pd.DataFrame(
|
| 177 |
{
|
|
|
|
| 171 |
str(i): id2label_mapping[label]
|
| 172 |
for i, label in zip(id2label.keys(), dataset_labels)
|
| 173 |
}
|
|
|
|
| 174 |
|
| 175 |
id2label_df = pd.DataFrame(
|
| 176 |
{
|
text_classification_ui_helpers.py
CHANGED
|
@@ -23,7 +23,7 @@ HF_SPACE_ID = "SPACE_ID"
|
|
| 23 |
HF_WRITE_TOKEN = "HF_WRITE_TOKEN"
|
| 24 |
|
| 25 |
|
| 26 |
-
def check_dataset_and_get_config(dataset_id
|
| 27 |
try:
|
| 28 |
# write_column_mapping(None, uid) # reset column mapping
|
| 29 |
configs = datasets.get_dataset_config_names(dataset_id)
|
|
|
|
| 23 |
HF_WRITE_TOKEN = "HF_WRITE_TOKEN"
|
| 24 |
|
| 25 |
|
| 26 |
+
def check_dataset_and_get_config(dataset_id):
|
| 27 |
try:
|
| 28 |
# write_column_mapping(None, uid) # reset column mapping
|
| 29 |
configs = datasets.get_dataset_config_names(dataset_id)
|