utensil muellerzr commited on
Commit
a168076
·
0 Parent(s):

Duplicate from hf-accelerate/model-memory-usage

Browse files

Co-authored-by: Zachary Mueller <[email protected]>

Files changed (6) hide show
  1. .gitattributes +35 -0
  2. README.md +14 -0
  3. app.py +169 -0
  4. measure_model_size.png +0 -0
  5. pre-requirements.txt +1 -0
  6. requirements.txt +6 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Model Memory Utility
3
+ emoji: 🚀
4
+ colorFrom: pink
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.40.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ duplicated_from: hf-accelerate/model-memory-usage
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import webbrowser
4
+ import pandas as pd
5
+ import gradio as gr
6
+ from huggingface_hub import HfApi
7
+ from huggingface_hub.utils import RepositoryNotFoundError, GatedRepoError
8
+ from accelerate.commands.estimate import create_empty_model, check_has_model
9
+ from accelerate.utils import convert_bytes, calculate_maximum_sizes
10
+
11
+ # We need to store them as globals because gradio doesn't have a way for us to pass them in to the button
12
+ HAS_DISCUSSION = True
13
+ MODEL_NAME = None
14
+ LIBRARY = None
15
+ USER_TOKEN = None
16
+ TOKEN = os.environ.get("HUGGINGFACE_API_LOGIN", None)
17
+
18
+ def check_for_discussion(model_name:str):
19
+ "Checks if an automated discussion has been opened on the model by `model-sizer-bot`"
20
+ global TOKEN
21
+ api = HfApi(token=TOKEN)
22
+ discussions = list(api.get_repo_discussions(model_name))
23
+ return any(discussion.title == "[AUTOMATED] Model Memory Requirements" and discussion.author == "model-sizer-bot" for discussion in discussions)
24
+
25
+ def report_results():
26
+ "Reports the results of a memory calculation to the model's discussion page, and opens a new tab to it afterwards"
27
+ global MODEL_NAME, LIBRARY, TOKEN, USER_TOKEN
28
+ api = HfApi(token=TOKEN)
29
+ results, data = calculate_memory(MODEL_NAME, LIBRARY, ["fp32", "fp16", "int8", "int4"], access_token=USER_TOKEN, raw=True)
30
+ minimum = data[0]
31
+
32
+ USER_TOKEN = None
33
+ post = f"""# Model Memory Requirements\n
34
+
35
+ You will need about {minimum[1]} VRAM to load this model for inference, and {minimum[3]} VRAM to train it using Adam.
36
+
37
+ These calculations were measured from the [Model Memory Utility Space](https://hf.co/spaces/hf-accelerate/model-memory-utility) on the Hub.
38
+
39
+ The minimum recommended vRAM needed for this model assumes using [Accelerate or `device_map="auto"`](https://huggingface.co/docs/accelerate/usage_guides/big_modeling) and is denoted by the size of the "largest layer".
40
+ When performing inference, expect to add up to an additional 20% to this, as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/). More tests will be performed in the future to get a more accurate benchmark for each model.
41
+
42
+ When training with `Adam`, you can expect roughly 4x the reported results to be used. (1x for the model, 1x for the gradients, and 2x for the optimizer).
43
+
44
+ ## Results:
45
+
46
+ {results}
47
+ """
48
+ discussion = api.create_discussion(MODEL_NAME, "[AUTOMATED] Model Memory Requirements", description=post)
49
+ webbrowser.open_new_tab(discussion.url)
50
+
51
+ def convert_url_to_name(url:str):
52
+ "Converts a model URL to its name on the Hub"
53
+ results = re.findall(r"huggingface.co\/(.*?)#", url)
54
+ if len(results) < 1:
55
+ raise ValueError(f"URL {url} is not a valid model URL to the Hugging Face Hub")
56
+ return results[0]
57
+
58
+ def calculate_memory(model_name:str, library:str, options:list, access_token:str, raw=False):
59
+ "Calculates the memory usage for a model"
60
+ if library == "auto":
61
+ library = None
62
+ if "http" in model_name and "//" in model_name:
63
+ try:
64
+ model_name = convert_url_to_name(model_name)
65
+ except ValueError:
66
+ raise gr.Error(f"URL `{model_name}` is not a valid model URL to the Hugging Face Hub")
67
+ try:
68
+ model = create_empty_model(model_name, library_name=library, trust_remote_code=True, access_token=access_token)
69
+ except GatedRepoError:
70
+ raise gr.Error(f"Model `{model_name}` is a gated model, please ensure to pass in your access token and try again if you have access. You can find your access token here : https://huggingface.co/settings/tokens. ")
71
+ except RepositoryNotFoundError:
72
+ raise gr.Error(f"Model `{model_name}` was not found on the Hub, please try another model name.")
73
+ except ValueError as e:
74
+ raise gr.Error(f"Model `{model_name}` does not have any library metadata on the Hub, please manually select a library_name to use (such as `transformers`)")
75
+ except (RuntimeError, OSError) as e:
76
+ library = check_has_model(e)
77
+ if library != "unknown":
78
+ raise gr.Error(f"Tried to load `{model_name}` with `{library}` but a possible model to load was not found inside the repo.")
79
+
80
+ total_size, largest_layer = calculate_maximum_sizes(model)
81
+
82
+ data = []
83
+
84
+ title = f"Memory Usage for '{model_name}'"
85
+ for dtype in options:
86
+ dtype_total_size = total_size
87
+ dtype_largest_layer = largest_layer[0]
88
+ if dtype in ("fp16", "bf16", "float16/bfloat16"):
89
+ dtype_total_size /= 2
90
+ dtype_largest_layer /= 2
91
+ elif dtype == "int8":
92
+ dtype_total_size /= 4
93
+ dtype_largest_layer /= 4
94
+ elif dtype == "int4":
95
+ dtype_total_size /= 8
96
+ dtype_largest_layer /= 8
97
+ dtype_training_size = convert_bytes(dtype_total_size * 4)
98
+ dtype_total_size = convert_bytes(dtype_total_size)
99
+ dtype_largest_layer = convert_bytes(dtype_largest_layer)
100
+ data.append({
101
+ "dtype": dtype,
102
+ "Largest Layer or Residual Group": dtype_largest_layer,
103
+ "Total Size": dtype_total_size,
104
+ "Training using Adam": dtype_training_size
105
+ })
106
+ global HAS_DISCUSSION, MODEL_NAME, LIBRARY
107
+ HAS_DISCUSSION = check_for_discussion(model_name)
108
+ MODEL_NAME = model_name
109
+ LIBRARY = library
110
+
111
+ if raw:
112
+ return pd.DataFrame(data).to_markdown(index=False), data
113
+
114
+ results = [
115
+ f'## {title}',
116
+ gr.update(visible=True, value=pd.DataFrame(data)),
117
+ gr.update(visible=not HAS_DISCUSSION)
118
+ ]
119
+ return results
120
+
121
+ with gr.Blocks() as demo:
122
+ with gr.Column():
123
+ gr.Markdown(
124
+ """<img src="https://huggingface.co/spaces/hf-accelerate/model-memory-usage/resolve/main/measure_model_size.png" style="float: left;" width="250" height="250"><h1>🤗 Model Memory Calculator</h1>
125
+
126
+ This tool will help you calculate how much vRAM is needed to train and perform big model inference
127
+ on a model hosted on the 🤗 Hugging Face Hub. The minimum recommended vRAM needed for a model
128
+ is denoted as the size of the "largest layer", and training of a model is roughly 4x its size (for Adam).
129
+
130
+ These calculations are accurate within a few percent at most, such as `bert-base-cased` being 413.68 MB and the calculator estimating 413.18 MB.
131
+
132
+ When performing inference, expect to add up to an additional 20% to this as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/).
133
+ More tests will be performed in the future to get a more accurate benchmark for each model.
134
+
135
+ Currently this tool supports all models hosted that use `transformers` and `timm`.
136
+
137
+ To use this tool pass in the URL or model name of the model you want to calculate the memory usage for,
138
+ select which framework it originates from ("auto" will try and detect it from the model metadata), and
139
+ what precisions you want to use."""
140
+ )
141
+ out_text = gr.Markdown()
142
+ out = gr.DataFrame(
143
+ headers=["dtype", "Largest Layer", "Total Size", "Training using Adam"],
144
+ interactive=False,
145
+ visible=False,
146
+ )
147
+ with gr.Row():
148
+ inp = gr.Textbox(label="Model Name or URL", value="bert-base-cased")
149
+ with gr.Row():
150
+ library = gr.Radio(["auto", "transformers", "timm"], label="Library", value="auto")
151
+ options = gr.CheckboxGroup(
152
+ ["float32", "float16/bfloat16", "int8", "int4"],
153
+ value="float32",
154
+ label="Model Precision",
155
+ )
156
+ access_token = gr.Textbox(label="API Token", placeholder="Optional (for gated models)")
157
+ with gr.Row():
158
+ btn = gr.Button("Calculate Memory Usage")
159
+ post_to_hub = gr.Button(value = "Report results in this model repo's discussions!\n(Will open in a new tab)", visible=False)
160
+ USER_TOKEN = access_token
161
+
162
+ btn.click(
163
+ calculate_memory, inputs=[inp, library, options, access_token], outputs=[out_text, out, post_to_hub],
164
+ )
165
+
166
+ post_to_hub.click(report_results).then(lambda: gr.Button.update(visible=False), outputs=post_to_hub)
167
+
168
+
169
+ demo.launch()
measure_model_size.png ADDED
pre-requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ pip >= 23.2.0
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ accelerate @ git+https://github.com/huggingface/accelerate
2
+ transformers
3
+ timm
4
+ huggingface_hub
5
+ tabulate
6
+ einops