File size: 4,894 Bytes
c7ac965
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b688574
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7ac965
 
 
b688574
 
c7ac965
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# import json
# import os
# from collections import defaultdict
#
# import huggingface_hub
# from huggingface_hub import ModelCard
# from huggingface_hub.hf_api import ModelInfo
# from transformers import AutoConfig
# from transformers.models.auto.tokenization_auto import AutoTokenizer
#
#
# def check_model_card(repo_id: str) -> tuple[bool, str]:
#     """Checks if the model card and license exist and have been filled"""
#     try:
#         card = ModelCard.load(repo_id)
#     except huggingface_hub.utils.EntryNotFoundError:
#         return (
#             False,
#             "Please add a model card to your model to explain how you trained/fine-tuned it.",
#         )
#
#     # Enforce license metadata
#     if card.data.license is None:
#         if not ("license_name" in card.data and "license_link" in card.data):
#             return False, (
#                 "License not found. Please add a license to your model card using the `license` metadata or a"
#                 " `license_name`/`license_link` pair."
#             )
#
#     # Enforce card content
#     if len(card.text) < 200:
#         return False, "Please add a description to your model card, it is too short."
#
#     return True, ""
#
#
def is_model_on_hub(
    model_name: str,
    revision: str,
    token: str | None = None,
    trust_remote_code=False,
    test_tokenizer=False,
) -> tuple[bool, str]:
    """Checks if the model model_name is on the hub,
    and whether it (and its tokenizer) can be loaded with AutoClasses."""
    raise NotImplementedError("Replace with huggingface_hub API")
    # try:
    #     config = AutoConfig.from_pretrained(
    #         model_name,
    #         revision=revision,
    #         trust_remote_code=trust_remote_code,
    #         token=token,
    #     )
    #     if test_tokenizer:
    #         try:
    #             tk = AutoTokenizer.from_pretrained(
    #                 model_name,
    #                 revision=revision,
    #                 trust_remote_code=trust_remote_code,
    #                 token=token,
    #             )
    #         except ValueError as e:
    #             return (
    #                 False,
    #                 f"uses a tokenizer which is not in a transformers release: {e}",
    #                 None,
    #             )
    #         except Exception:
    #             return (
    #                 False,
    #                 "'s tokenizer cannot be loaded. Is your tokenizer class in a
    #                 stable transformers release, and correctly configured?",
    #                 None,
    #             )
    #     return True, None, config
    #
    # except ValueError:
    #     return (
    #         False,
    #         "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow
    #         these models to be automatically submitted to the leaderboard.",
    #         None,
    #     )
    #
    # except Exception:
    #     return False, "was not found on hub!", None


#
#
# def get_model_size(model_info: ModelInfo, precision: str):
#     """Gets the model size from the configuration, or the model name if the
#     configuration does not contain the information."""
#     try:
#         model_size = round(model_info.safetensors["total"] / 1e9, 3)
#     except (AttributeError, TypeError):
#         return 0  # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
#
#     size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.modelId.lower()) else 1
#     model_size = size_factor * model_size
#     return model_size
#
#
# def get_model_arch(model_info: ModelInfo):
#     """Gets the model architecture from the configuration"""
#     return model_info.config.get("architectures", "Unknown")
#
#
# def already_submitted_models(requested_models_dir: str) -> set[str]:
#     """Gather a list of already submitted models to avoid duplicates"""
#     depth = 1
#     file_names = []
#     users_to_submission_dates = defaultdict(list)
#
#     for root, _, files in os.walk(requested_models_dir):
#         current_depth = root.count(os.sep) - requested_models_dir.count(os.sep)
#         if current_depth == depth:
#             for file in files:
#                 if not file.endswith(".json"):
#                     continue
#                 with open(os.path.join(root, file)) as f:
#                     info = json.load(f)
#                     file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}")
#
#                     # Select organisation
#                     if info["model"].count("/") == 0 or "submitted_time" not in info:
#                         continue
#                     organisation, _ = info["model"].split("/")
#                     users_to_submission_dates[organisation].append(info["submitted_time"])
#
#     return set(file_names), users_to_submission_dates