Heng666 muellerzr commited on
Commit
40aa4b1
·
0 Parent(s):

Duplicate from hf-accelerate/model-memory-usage

Browse files

Co-authored-by: Zachary Mueller <[email protected]>

Files changed (6) hide show
  1. .gitattributes +35 -0
  2. README.md +14 -0
  3. app.py +182 -0
  4. measure_model_size.png +0 -0
  5. pre-requirements.txt +1 -0
  6. requirements.txt +6 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Model Memory Utility
3
+ emoji: 🚀
4
+ colorFrom: pink
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.40.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ duplicated_from: hf-accelerate/model-memory-usage
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import webbrowser
4
+ import pandas as pd
5
+ import gradio as gr
6
+ from huggingface_hub import HfApi
7
+ from huggingface_hub.utils import RepositoryNotFoundError, GatedRepoError
8
+ from accelerate.commands.estimate import create_empty_model, check_has_model
9
+ from accelerate.utils import convert_bytes, calculate_maximum_sizes
10
+
11
+ # We need to store them as globals because gradio doesn't have a way for us to pass them in to the button
12
+ HAS_DISCUSSION = True
13
+ MODEL_NAME = None
14
+ LIBRARY = None
15
+ USER_TOKEN = None
16
+ TOKEN = os.environ.get("HUGGINGFACE_API_LOGIN", None)
17
+
18
+ def translate_llama2(text):
19
+ "Translates llama-2 to its hf counterpart"
20
+ if not text.endswith("-hf"):
21
+ return text + "-hf"
22
+ return text
23
+
24
+ def check_for_discussion(model_name:str):
25
+ "Checks if an automated discussion has been opened on the model by `model-sizer-bot`"
26
+ global TOKEN
27
+ api = HfApi(token=TOKEN)
28
+ discussions = list(api.get_repo_discussions(model_name))
29
+ return any(discussion.title == "[AUTOMATED] Model Memory Requirements" and discussion.author == "model-sizer-bot" for discussion in discussions)
30
+
31
+ def report_results():
32
+ "Reports the results of a memory calculation to the model's discussion page, and opens a new tab to it afterwards"
33
+ global MODEL_NAME, LIBRARY, TOKEN, USER_TOKEN
34
+ api = HfApi(token=TOKEN)
35
+ results, data = calculate_memory(MODEL_NAME, LIBRARY, ["fp32", "fp16", "int8", "int4"], access_token=USER_TOKEN, raw=True)
36
+ minimum = data[0]
37
+
38
+ USER_TOKEN = None
39
+ post = f"""# Model Memory Requirements\n
40
+
41
+ You will need about {minimum[1]} VRAM to load this model for inference, and {minimum[3]} VRAM to train it using Adam.
42
+
43
+ These calculations were measured from the [Model Memory Utility Space](https://hf.co/spaces/hf-accelerate/model-memory-utility) on the Hub.
44
+
45
+ The minimum recommended vRAM needed for this model assumes using [Accelerate or `device_map="auto"`](https://huggingface.co/docs/accelerate/usage_guides/big_modeling) and is denoted by the size of the "largest layer".
46
+ When performing inference, expect to add up to an additional 20% to this, as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/). More tests will be performed in the future to get a more accurate benchmark for each model.
47
+
48
+ When training with `Adam`, you can expect roughly 4x the reported results to be used. (1x for the model, 1x for the gradients, and 2x for the optimizer).
49
+
50
+ ## Results:
51
+
52
+ {results}
53
+ """
54
+ discussion = api.create_discussion(MODEL_NAME, "[AUTOMATED] Model Memory Requirements", description=post)
55
+ webbrowser.open_new_tab(discussion.url)
56
+
57
+ def convert_url_to_name(url:str):
58
+ "Converts a model URL to its name on the Hub"
59
+ results = re.findall(r"huggingface.co\/(.*?)#", url)
60
+ if len(results) < 1:
61
+ raise ValueError(f"URL {url} is not a valid model URL to the Hugging Face Hub")
62
+ return results[0]
63
+
64
+ def calculate_memory(model_name:str, library:str, options:list, access_token:str, raw=False):
65
+ "Calculates the memory usage for a model"
66
+ if "meta-llama" in model_name:
67
+ model_name = translate_llama2(model_name)
68
+ if library == "auto":
69
+ library = None
70
+ if "http" in model_name and "//" in model_name:
71
+ try:
72
+ model_name = convert_url_to_name(model_name)
73
+ except ValueError:
74
+ raise gr.Error(f"URL `{model_name}` is not a valid model URL to the Hugging Face Hub")
75
+ try:
76
+ model = create_empty_model(model_name, library_name=library, trust_remote_code=True, access_token=access_token)
77
+ except GatedRepoError:
78
+ raise gr.Error(f"Model `{model_name}` is a gated model, please ensure to pass in your access token and try again if you have access. You can find your access token here : https://huggingface.co/settings/tokens. ")
79
+ except RepositoryNotFoundError:
80
+ raise gr.Error(f"Model `{model_name}` was not found on the Hub, please try another model name.")
81
+ except ValueError as e:
82
+ raise gr.Error(f"Model `{model_name}` does not have any library metadata on the Hub, please manually select a library_name to use (such as `transformers`)")
83
+ except (RuntimeError, OSError) as e:
84
+ library = check_has_model(e)
85
+ if library != "unknown":
86
+ raise gr.Error(f"Tried to load `{model_name}` with `{library}` but a possible model to load was not found inside the repo.")
87
+ raise gr.Error(f"Model `{model_name}` had an error, please open a discussion on the model's page with the error message and name: `{e}`")
88
+ except ImportError:
89
+ # hacky way to check if it works with `trust_remote_code=False`
90
+ model = create_empty_model(model_name, library_name=library, trust_remote_code=False, access_token=access_token)
91
+ except Exception as e:
92
+ raise gr.Error(f"Model `{model_name}` had an error, please open a discussion on the model's page with the error message and name: `{e}`")
93
+ total_size, largest_layer = calculate_maximum_sizes(model)
94
+
95
+ data = []
96
+
97
+ title = f"Memory Usage for '{model_name}'"
98
+ for dtype in options:
99
+ dtype_total_size = total_size
100
+ dtype_largest_layer = largest_layer[0]
101
+ if dtype in ("fp16", "bf16", "float16/bfloat16"):
102
+ dtype_total_size /= 2
103
+ dtype_largest_layer /= 2
104
+ elif dtype == "int8":
105
+ dtype_total_size /= 4
106
+ dtype_largest_layer /= 4
107
+ elif dtype == "int4":
108
+ dtype_total_size /= 8
109
+ dtype_largest_layer /= 8
110
+ dtype_training_size = convert_bytes(dtype_total_size * 4)
111
+ dtype_total_size = convert_bytes(dtype_total_size)
112
+ dtype_largest_layer = convert_bytes(dtype_largest_layer)
113
+ data.append({
114
+ "dtype": dtype,
115
+ "Largest Layer or Residual Group": dtype_largest_layer,
116
+ "Total Size": dtype_total_size,
117
+ "Training using Adam": dtype_training_size
118
+ })
119
+ global HAS_DISCUSSION, MODEL_NAME, LIBRARY
120
+ HAS_DISCUSSION = check_for_discussion(model_name)
121
+ MODEL_NAME = model_name
122
+ LIBRARY = library
123
+
124
+ if raw:
125
+ return pd.DataFrame(data).to_markdown(index=False), data
126
+
127
+ results = [
128
+ f'## {title}',
129
+ gr.update(visible=True, value=pd.DataFrame(data)),
130
+ gr.update(visible=not HAS_DISCUSSION)
131
+ ]
132
+ return results
133
+
134
+ with gr.Blocks() as demo:
135
+ with gr.Column():
136
+ gr.Markdown(
137
+ """<img src="https://huggingface.co/spaces/hf-accelerate/model-memory-usage/resolve/main/measure_model_size.png" style="float: left;" width="250" height="250"><h1>🤗 Model Memory Calculator</h1>
138
+
139
+ This tool will help you calculate how much vRAM is needed to train and perform big model inference
140
+ on a model hosted on the 🤗 Hugging Face Hub. The minimum recommended vRAM needed for a model
141
+ is denoted as the size of the "largest layer", and training of a model is roughly 4x its size (for Adam).
142
+
143
+ These calculations are accurate within a few percent at most, such as `bert-base-cased` being 413.68 MB and the calculator estimating 413.18 MB.
144
+
145
+ When performing inference, expect to add up to an additional 20% to this as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/).
146
+ More tests will be performed in the future to get a more accurate benchmark for each model.
147
+
148
+ Currently this tool supports all models hosted that use `transformers` and `timm`.
149
+
150
+ To use this tool pass in the URL or model name of the model you want to calculate the memory usage for,
151
+ select which framework it originates from ("auto" will try and detect it from the model metadata), and
152
+ what precisions you want to use."""
153
+ )
154
+ out_text = gr.Markdown()
155
+ out = gr.DataFrame(
156
+ headers=["dtype", "Largest Layer", "Total Size", "Training using Adam"],
157
+ interactive=False,
158
+ visible=False,
159
+ )
160
+ with gr.Row():
161
+ inp = gr.Textbox(label="Model Name or URL", value="bert-base-cased")
162
+ with gr.Row():
163
+ library = gr.Radio(["auto", "transformers", "timm"], label="Library", value="auto")
164
+ options = gr.CheckboxGroup(
165
+ ["float32", "float16/bfloat16", "int8", "int4"],
166
+ value="float32",
167
+ label="Model Precision",
168
+ )
169
+ access_token = gr.Textbox(label="API Token", placeholder="Optional (for gated models)")
170
+ with gr.Row():
171
+ btn = gr.Button("Calculate Memory Usage")
172
+ post_to_hub = gr.Button(value = "Report results in this model repo's discussions!\n(Will open in a new tab)", visible=False)
173
+ USER_TOKEN = access_token
174
+
175
+ btn.click(
176
+ calculate_memory, inputs=[inp, library, options, access_token], outputs=[out_text, out, post_to_hub],
177
+ )
178
+
179
+ post_to_hub.click(report_results).then(lambda: gr.Button.update(visible=False), outputs=post_to_hub)
180
+
181
+
182
+ demo.launch()
measure_model_size.png ADDED
pre-requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ pip >= 23.2.0
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ accelerate @ git+https://github.com/huggingface/accelerate
2
+ transformers
3
+ timm
4
+ huggingface_hub
5
+ tabulate
6
+ einops