Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -77,12 +77,12 @@ MODEL_CONFIG = {
|
|
77 |
"medium": { # 10B-30B
|
78 |
"max_memory": {0: "40GiB"},
|
79 |
"offload": False,
|
80 |
-
"quantization":
|
81 |
},
|
82 |
"large": { # >30B
|
83 |
"max_memory": {0: "70GiB"},
|
84 |
"offload": True,
|
85 |
-
"quantization":
|
86 |
}
|
87 |
}
|
88 |
|
@@ -166,8 +166,17 @@ def load_model(model_names):
|
|
166 |
"trust_remote_code": True,
|
167 |
}
|
168 |
|
169 |
-
#
|
170 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
# ์์ํ ์ ์ฉ
|
172 |
from transformers import BitsAndBytesConfig
|
173 |
quantization_config = BitsAndBytesConfig(
|
@@ -180,7 +189,7 @@ def load_model(model_names):
|
|
180 |
device_map="auto",
|
181 |
max_memory=config["max_memory"],
|
182 |
torch_dtype=DTYPE,
|
183 |
-
quantization_config=quantization_config
|
184 |
offload_folder="offload" if config["offload"] else None,
|
185 |
**common_params
|
186 |
)
|
|
|
77 |
"medium": { # 10B-30B
|
78 |
"max_memory": {0: "40GiB"},
|
79 |
"offload": False,
|
80 |
+
"quantization": None # BitsAndBytes ๋ฌธ์ ๋ก ์์ํ ๋นํ์ฑํ
|
81 |
},
|
82 |
"large": { # >30B
|
83 |
"max_memory": {0: "70GiB"},
|
84 |
"offload": True,
|
85 |
+
"quantization": None # BitsAndBytes ๋ฌธ์ ๋ก ์์ํ ๋นํ์ฑํ
|
86 |
}
|
87 |
}
|
88 |
|
|
|
166 |
"trust_remote_code": True,
|
167 |
}
|
168 |
|
169 |
+
# BitsAndBytes ์ฌ์ฉ ๊ฐ๋ฅ ์ฌ๋ถ ํ์ธ
|
170 |
+
try:
|
171 |
+
import bitsandbytes
|
172 |
+
has_bitsandbytes = True
|
173 |
+
print("BitsAndBytes ๋ผ์ด๋ธ๋ฌ๋ฆฌ ๋ก๋ ์ฑ๊ณต")
|
174 |
+
except ImportError:
|
175 |
+
has_bitsandbytes = False
|
176 |
+
print("BitsAndBytes ๋ผ์ด๋ธ๋ฌ๋ฆฌ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค. ์์ํ ์์ด ๋ชจ๋ธ์ ๋ก๋ํฉ๋๋ค.")
|
177 |
+
|
178 |
+
# ์์ํ ์ค์ ์ด ํ์ํ๊ณ BitsAndBytes๋ฅผ ์ฌ์ฉํ ์ ์๋ ๊ฒฝ์ฐ
|
179 |
+
if config["quantization"] and has_bitsandbytes:
|
180 |
# ์์ํ ์ ์ฉ
|
181 |
from transformers import BitsAndBytesConfig
|
182 |
quantization_config = BitsAndBytesConfig(
|
|
|
189 |
device_map="auto",
|
190 |
max_memory=config["max_memory"],
|
191 |
torch_dtype=DTYPE,
|
192 |
+
quantization_config=quantization_config,
|
193 |
offload_folder="offload" if config["offload"] else None,
|
194 |
**common_params
|
195 |
)
|