openfree commited on
Commit
5d8d623
ยท
verified ยท
1 Parent(s): 2198075

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -5
app.py CHANGED
@@ -77,12 +77,12 @@ MODEL_CONFIG = {
77
  "medium": { # 10B-30B
78
  "max_memory": {0: "40GiB"},
79
  "offload": False,
80
- "quantization": "4bit"
81
  },
82
  "large": { # >30B
83
  "max_memory": {0: "70GiB"},
84
  "offload": True,
85
- "quantization": "4bit"
86
  }
87
  }
88
 
@@ -166,8 +166,17 @@ def load_model(model_names):
166
  "trust_remote_code": True,
167
  }
168
 
169
- # BF16 ์ •๋ฐ€๋„ ์‚ฌ์šฉ (A100์— ์ตœ์ ํ™”)
170
- if config["quantization"]:
 
 
 
 
 
 
 
 
 
171
  # ์–‘์žํ™” ์ ์šฉ
172
  from transformers import BitsAndBytesConfig
173
  quantization_config = BitsAndBytesConfig(
@@ -180,7 +189,7 @@ def load_model(model_names):
180
  device_map="auto",
181
  max_memory=config["max_memory"],
182
  torch_dtype=DTYPE,
183
- quantization_config=quantization_config if config["quantization"] else None,
184
  offload_folder="offload" if config["offload"] else None,
185
  **common_params
186
  )
 
77
  "medium": { # 10B-30B
78
  "max_memory": {0: "40GiB"},
79
  "offload": False,
80
+ "quantization": None # BitsAndBytes ๋ฌธ์ œ๋กœ ์–‘์žํ™” ๋น„ํ™œ์„ฑํ™”
81
  },
82
  "large": { # >30B
83
  "max_memory": {0: "70GiB"},
84
  "offload": True,
85
+ "quantization": None # BitsAndBytes ๋ฌธ์ œ๋กœ ์–‘์žํ™” ๋น„ํ™œ์„ฑํ™”
86
  }
87
  }
88
 
 
166
  "trust_remote_code": True,
167
  }
168
 
169
+ # BitsAndBytes ์‚ฌ์šฉ ๊ฐ€๋Šฅ ์—ฌ๋ถ€ ํ™•์ธ
170
+ try:
171
+ import bitsandbytes
172
+ has_bitsandbytes = True
173
+ print("BitsAndBytes ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ๋กœ๋“œ ์„ฑ๊ณต")
174
+ except ImportError:
175
+ has_bitsandbytes = False
176
+ print("BitsAndBytes ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ์–‘์žํ™” ์—†์ด ๋ชจ๋ธ์„ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.")
177
+
178
+ # ์–‘์žํ™” ์„ค์ •์ด ํ•„์š”ํ•˜๊ณ  BitsAndBytes๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ๋Š” ๊ฒฝ์šฐ
179
+ if config["quantization"] and has_bitsandbytes:
180
  # ์–‘์žํ™” ์ ์šฉ
181
  from transformers import BitsAndBytesConfig
182
  quantization_config = BitsAndBytesConfig(
 
189
  device_map="auto",
190
  max_memory=config["max_memory"],
191
  torch_dtype=DTYPE,
192
+ quantization_config=quantization_config,
193
  offload_folder="offload" if config["offload"] else None,
194
  **common_params
195
  )