giskard-evaluator

Running

App Files Files Community

200

weixuan-giskard commited on Jan 12, 2024

Commit

f25dac2

1 Parent(s): 02f1357

Use "giskard-bot/evaluator-leaderboard" and fix any None in cli

Browse files

Files changed (1) hide show

text_classification_ui_helpers.py +40 -24

text_classification_ui_helpers.py CHANGED Viewed

@@ -7,16 +7,14 @@ import threading
 import datasets
 import gradio as gr
 from transformers.pipelines import TextClassificationPipeline
-from wordings import get_styled_input
 from io_utils import (get_yaml_path, read_column_mapping, save_job_to_pipe,
-                      write_column_mapping,
-                      write_log_to_user_file)
 from text_classification import (check_model, get_example_prediction,
                                  get_labels_and_features_from_dataset)
 from wordings import (CHECK_CONFIG_OR_SPLIT_RAW,
                       CONFIRM_MAPPING_DETAILS_FAIL_RAW,
-                      MAPPING_STYLED_ERROR_WARNING)
 MAX_LABELS = 40
 MAX_FEATURES = 20
@@ -65,9 +63,7 @@ def deselect_run_inference(run_local):
         return (gr.update(visible=True), gr.update(value=True))
-def write_column_mapping_to_config(
-    uid, *labels
-):
     # TODO: Substitute 'text' with more features for zero-shot
     # we are not using ds features because we only support "text" for now
     all_mappings = read_column_mapping(uid)
@@ -75,10 +71,16 @@ def write_column_mapping_to_config(
     if labels is None:
         return
     all_mappings = export_mappings(all_mappings, "labels", None, labels[:MAX_LABELS])
-    all_mappings = export_mappings(all_mappings, "features", ["text"], labels[MAX_LABELS : (MAX_LABELS + MAX_FEATURES)])
     write_column_mapping(all_mappings, uid)
 def export_mappings(all_mappings, key, subkeys, values):
     if key not in all_mappings.keys():
         all_mappings[key] = dict()
@@ -88,12 +90,13 @@ def export_mappings(all_mappings, key, subkeys, values):
     if not subkeys:
         logging.debug(f"subkeys is empty for {key}")
         return all_mappings
     for i, subkey in enumerate(subkeys):
         if subkey:
             all_mappings[key][subkey] = values[i % len(values)]
     return all_mappings
 def list_labels_and_features_from_dataset(ds_labels, ds_features, model_id2label, uid):
     model_labels = list(model_id2label.values())
     all_mappings = read_column_mapping(uid)
@@ -141,7 +144,10 @@ def list_labels_and_features_from_dataset(ds_labels, ds_features, model_id2label
     return lables + features
-def precheck_model_ds_enable_example_btn(model_id, dataset_id, dataset_config, dataset_split):
     ppl = check_model(model_id)
     if ppl is None or not isinstance(ppl, TextClassificationPipeline):
         gr.Warning("Please check your model.")
@@ -155,6 +161,7 @@ def precheck_model_ds_enable_example_btn(model_id, dataset_id, dataset_config, d
     return gr.update(interactive=True)
 def align_columns_and_show_prediction(
     model_id, dataset_id, dataset_config, dataset_split, uid
 ):
@@ -230,6 +237,7 @@ def align_columns_and_show_prediction(
         *column_mappings,
     )
 def check_column_mapping_keys_validity(all_mappings):
     if all_mappings is None:
         gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
@@ -239,6 +247,7 @@ def check_column_mapping_keys_validity(all_mappings):
         gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
         return (gr.update(interactive=True), gr.update(visible=False))
 def construct_label_and_feature_mapping(all_mappings):
     label_mapping = {}
     for i, label in zip(
@@ -252,6 +261,7 @@ def construct_label_and_feature_mapping(all_mappings):
     feature_mapping = all_mappings["features"]
     return label_mapping, feature_mapping
 def try_submit(m_id, d_id, config, split, local, inference, inference_token, uid):
     all_mappings = read_column_mapping(uid)
     check_column_mapping_keys_validity(all_mappings)
@@ -259,8 +269,8 @@ def try_submit(m_id, d_id, config, split, local, inference, inference_token, uid
     leaderboard_dataset = None
     if os.environ.get("SPACE_ID") == "giskardai/giskard-evaluator":
-        leaderboard_dataset = "ZeroCommand/test-giskard-report"
     if local:
         inference_type = "hf_pipeline"
     if inference and inference_token:
@@ -279,10 +289,6 @@ def try_submit(m_id, d_id, config, split, local, inference, inference_token, uid
         config,
         "--dataset_split",
         split,
-        "--hf_token",
-        os.environ.get(HF_WRITE_TOKEN),
-        "--discussion_repo",
-        os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID),
         "--output_format",
         "markdown",
         "--output_portal",
@@ -293,13 +299,28 @@ def try_submit(m_id, d_id, config, split, local, inference, inference_token, uid
         json.dumps(label_mapping),
         "--scan_config",
         get_yaml_path(uid),
-        "--leaderboard_dataset",
-        leaderboard_dataset,
         "--inference_type",
         inference_type,
         "--inference_api_token",
         inference_token,
     ]
     if os.environ.get(HF_GSK_HUB_KEY):
         command.append("--giskard_hub_api_key")
         command.append(os.environ.get(HF_GSK_HUB_KEY))
@@ -327,11 +348,6 @@ def try_submit(m_id, d_id, config, split, local, inference, inference_token, uid
     gr.Info(f"Start local evaluation on {eval_str}")
     return (
-        gr.update(interactive=False),
         gr.update(lines=5, visible=True, interactive=False),
     )
-    # TODO: Submit task to an endpoint")
-    # return (gr.update(interactive=True), gr.update(visible=False))  # Submit button

 import datasets
 import gradio as gr
 from transformers.pipelines import TextClassificationPipeline
 from io_utils import (get_yaml_path, read_column_mapping, save_job_to_pipe,
+                      write_column_mapping, write_log_to_user_file)
 from text_classification import (check_model, get_example_prediction,
                                  get_labels_and_features_from_dataset)
 from wordings import (CHECK_CONFIG_OR_SPLIT_RAW,
                       CONFIRM_MAPPING_DETAILS_FAIL_RAW,
+                      MAPPING_STYLED_ERROR_WARNING, get_styled_input)
 MAX_LABELS = 40
 MAX_FEATURES = 20
         return (gr.update(visible=True), gr.update(value=True))
+def write_column_mapping_to_config(uid, *labels):
     # TODO: Substitute 'text' with more features for zero-shot
     # we are not using ds features because we only support "text" for now
     all_mappings = read_column_mapping(uid)
     if labels is None:
         return
     all_mappings = export_mappings(all_mappings, "labels", None, labels[:MAX_LABELS])
+    all_mappings = export_mappings(
+        all_mappings,
+        "features",
+        ["text"],
+        labels[MAX_LABELS : (MAX_LABELS + MAX_FEATURES)],
+    )
     write_column_mapping(all_mappings, uid)
 def export_mappings(all_mappings, key, subkeys, values):
     if key not in all_mappings.keys():
         all_mappings[key] = dict()
     if not subkeys:
         logging.debug(f"subkeys is empty for {key}")
         return all_mappings
     for i, subkey in enumerate(subkeys):
         if subkey:
             all_mappings[key][subkey] = values[i % len(values)]
     return all_mappings
 def list_labels_and_features_from_dataset(ds_labels, ds_features, model_id2label, uid):
     model_labels = list(model_id2label.values())
     all_mappings = read_column_mapping(uid)
     return lables + features
+def precheck_model_ds_enable_example_btn(
+    model_id, dataset_id, dataset_config, dataset_split
+):
     ppl = check_model(model_id)
     if ppl is None or not isinstance(ppl, TextClassificationPipeline):
         gr.Warning("Please check your model.")
     return gr.update(interactive=True)
 def align_columns_and_show_prediction(
     model_id, dataset_id, dataset_config, dataset_split, uid
 ):
         *column_mappings,
     )
 def check_column_mapping_keys_validity(all_mappings):
     if all_mappings is None:
         gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
         gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
         return (gr.update(interactive=True), gr.update(visible=False))
 def construct_label_and_feature_mapping(all_mappings):
     label_mapping = {}
     for i, label in zip(
     feature_mapping = all_mappings["features"]
     return label_mapping, feature_mapping
 def try_submit(m_id, d_id, config, split, local, inference, inference_token, uid):
     all_mappings = read_column_mapping(uid)
     check_column_mapping_keys_validity(all_mappings)
     leaderboard_dataset = None
     if os.environ.get("SPACE_ID") == "giskardai/giskard-evaluator":
+        leaderboard_dataset = "giskard-bot/evaluator-leaderboard"
     if local:
         inference_type = "hf_pipeline"
     if inference and inference_token:
         config,
         "--dataset_split",
         split,
         "--output_format",
         "markdown",
         "--output_portal",
         json.dumps(label_mapping),
         "--scan_config",
         get_yaml_path(uid),
         "--inference_type",
         inference_type,
         "--inference_api_token",
         inference_token,
     ]
+    # The token to publish post
+    if os.environ.get(HF_WRITE_TOKEN):
+        command.append("--hf_token")
+        command.append(os.environ.get(HF_WRITE_TOKEN))
+    # The repo to publish post
+    if os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID):
+        command.append("--discussion_repo")
+        # TODO: Replace by the model id
+        command.append(os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID))
+    # The repo to publish for ranking
+    if leaderboard_dataset:
+        command.append("--leaderboard_dataset")
+        command.append(leaderboard_dataset)
+    # The info to upload to Giskard hub
     if os.environ.get(HF_GSK_HUB_KEY):
         command.append("--giskard_hub_api_key")
         command.append(os.environ.get(HF_GSK_HUB_KEY))
     gr.Info(f"Start local evaluation on {eval_str}")
     return (
+        gr.update(interactive=False),   # Submit button
         gr.update(lines=5, visible=True, interactive=False),
     )