Spaces:
Running
Running
Oleg Shulyakov
commited on
Commit
·
c96815e
1
Parent(s):
67040c8
Move base model creation into separate method
Browse files
app.py
CHANGED
@@ -15,8 +15,25 @@ from apscheduler.schedulers.background import BackgroundScheduler
|
|
15 |
SPACE_ID = os.environ.get("SPACE_ID")
|
16 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
17 |
|
18 |
-
#
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
# escape HTML for logging
|
22 |
def escape(s: str) -> str:
|
@@ -27,6 +44,9 @@ def escape(s: str) -> str:
|
|
27 |
s = s.replace("\n", "<br/>")
|
28 |
return s
|
29 |
|
|
|
|
|
|
|
30 |
def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
|
31 |
if not os.path.isfile(model_path):
|
32 |
raise Exception(f"Model file not found: {model_path}")
|
@@ -59,8 +79,8 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token:
|
|
59 |
print(f"Model path: {model_path}")
|
60 |
print(f"Output dir: {outdir}")
|
61 |
|
62 |
-
if oauth_token is
|
63 |
-
raise
|
64 |
|
65 |
split_cmd = [
|
66 |
"llama-gguf-split",
|
@@ -101,8 +121,8 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token:
|
|
101 |
api = HfApi(token=oauth_token.token)
|
102 |
for file in sharded_model_files:
|
103 |
file_path = os.path.join(outdir, file)
|
104 |
-
print(f"Uploading file: {file_path}")
|
105 |
try:
|
|
|
106 |
api.upload_file(
|
107 |
path_or_fileobj=file_path,
|
108 |
path_in_repo=file,
|
@@ -115,23 +135,16 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token:
|
|
115 |
|
116 |
print("Sharded model has been uploaded successfully!")
|
117 |
|
118 |
-
def
|
119 |
-
|
120 |
-
raise gr.Error("You must be logged in to use GGUF-my-repo")
|
121 |
-
|
122 |
-
# validate the oauth token
|
123 |
-
try:
|
124 |
-
whoami(oauth_token.token)
|
125 |
-
except Exception as e:
|
126 |
-
raise gr.Error("You must be logged in to use GGUF-my-repo")
|
127 |
-
|
128 |
-
model_name = model_id.split('/')[-1]
|
129 |
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
|
|
134 |
|
|
|
135 |
pattern = (
|
136 |
"*.safetensors"
|
137 |
if any(
|
@@ -144,40 +157,50 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
144 |
else "*.bin"
|
145 |
)
|
146 |
|
|
|
147 |
dl_pattern += [pattern]
|
148 |
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
|
182 |
imatrix_path = Path(outdir)/"imatrix.dat"
|
183 |
|
@@ -197,6 +220,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
197 |
print("Not using imatrix quantization.")
|
198 |
|
199 |
# Quantize the model
|
|
|
200 |
quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
|
201 |
quantized_gguf_path = str(Path(outdir)/quantized_gguf_name)
|
202 |
if use_imatrix:
|
@@ -214,12 +238,13 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
214 |
stderr_str = result.stderr.decode("utf-8")
|
215 |
raise Exception(f"Error quantizing: {stderr_str}")
|
216 |
print(f"Quantized successfully with {imatrix_q_method if use_imatrix else q_method} option!")
|
217 |
-
print(f"Quantized model path: {quantized_gguf_path}")
|
218 |
|
219 |
# Create empty repo
|
220 |
username = whoami(oauth_token.token)["name"]
|
221 |
|
222 |
repo_name = f"{username}/{model_name}-GGUF"
|
|
|
223 |
new_repo_url = api.create_repo(repo_id=repo_name, exist_ok=True, private=private_repo)
|
224 |
new_repo_id = new_repo_url.repo_id
|
225 |
print("Repo created successfully!", new_repo_url)
|
@@ -329,6 +354,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
329 |
"llama.png",
|
330 |
)
|
331 |
except Exception as e:
|
|
|
332 |
return (f'<h1>❌ ERROR</h1><br/><pre style="white-space:pre-wrap;">{escape(str(e))}</pre>', "error.png")
|
333 |
|
334 |
|
|
|
15 |
SPACE_ID = os.environ.get("SPACE_ID")
|
16 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
17 |
|
18 |
+
# Folder
|
19 |
+
DOWNLOAD_FOLDER = "./downloads"
|
20 |
+
OUTPUT_FOLDER = "./outputs"
|
21 |
+
|
22 |
+
def create_folder(folder_name: str):
|
23 |
+
if not os.path.exists(folder_name):
|
24 |
+
print(f"Creating folder: {folder_name}")
|
25 |
+
os.makedirs(folder_name)
|
26 |
+
|
27 |
+
def is_valid_token(oauth_token):
|
28 |
+
if oauth_token is None or oauth_token.token is None:
|
29 |
+
return False
|
30 |
+
|
31 |
+
try:
|
32 |
+
whoami(oauth_token.token)
|
33 |
+
except Exception as e:
|
34 |
+
return False
|
35 |
+
|
36 |
+
return True
|
37 |
|
38 |
# escape HTML for logging
|
39 |
def escape(s: str) -> str:
|
|
|
44 |
s = s.replace("\n", "<br/>")
|
45 |
return s
|
46 |
|
47 |
+
def get_model_name(model_id: str):
|
48 |
+
return model_id.split('/')[-1]
|
49 |
+
|
50 |
def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
|
51 |
if not os.path.isfile(model_path):
|
52 |
raise Exception(f"Model file not found: {model_path}")
|
|
|
79 |
print(f"Model path: {model_path}")
|
80 |
print(f"Output dir: {outdir}")
|
81 |
|
82 |
+
if is_valid_token(oauth_token) is False:
|
83 |
+
raise gr.Error("You have to be logged in.")
|
84 |
|
85 |
split_cmd = [
|
86 |
"llama-gguf-split",
|
|
|
121 |
api = HfApi(token=oauth_token.token)
|
122 |
for file in sharded_model_files:
|
123 |
file_path = os.path.join(outdir, file)
|
|
|
124 |
try:
|
125 |
+
print(f"Uploading file: {file_path}")
|
126 |
api.upload_file(
|
127 |
path_or_fileobj=file_path,
|
128 |
path_in_repo=file,
|
|
|
135 |
|
136 |
print("Sharded model has been uploaded successfully!")
|
137 |
|
138 |
+
def download_base_model(token: str, model_id: str, outdir: tempfile.TemporaryDirectory):
|
139 |
+
model_name = get_model_name(model_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
|
141 |
+
with tempfile.TemporaryDirectory(dir=DOWNLOAD_FOLDER) as tmpdir:
|
142 |
+
# Download model
|
143 |
+
print(f"Downloading model {model_name}")
|
144 |
+
local_dir = Path(tmpdir)/model_name # Keep the model name as the dirname so the model name metadata is populated correctly
|
145 |
+
print(f"Local directory: {os.path.abspath(local_dir)}")
|
146 |
|
147 |
+
api = HfApi(token=token)
|
148 |
pattern = (
|
149 |
"*.safetensors"
|
150 |
if any(
|
|
|
157 |
else "*.bin"
|
158 |
)
|
159 |
|
160 |
+
dl_pattern = ["*.md", "*.json", "*.model"]
|
161 |
dl_pattern += [pattern]
|
162 |
|
163 |
+
api.snapshot_download(repo_id=model_id, local_dir=local_dir, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
|
164 |
+
print("Model downloaded successfully!")
|
165 |
+
|
166 |
+
print(f"Model directory contents: {os.listdir(local_dir)}")
|
167 |
+
config_dir = local_dir/"config.json"
|
168 |
+
adapter_config_dir = local_dir/"adapter_config.json"
|
169 |
+
if os.path.exists(adapter_config_dir) and not os.path.exists(config_dir):
|
170 |
+
raise Exception('adapter_config.json is present.<br/><br/>If you are converting a LoRA adapter to GGUF, please use <a href="https://huggingface.co/spaces/ggml-org/gguf-my-lora" target="_blank" style="text-decoration:underline">GGUF-my-lora</a>.')
|
171 |
+
|
172 |
+
# Convert HF to GGUF
|
173 |
+
fp16_model = str(Path(outdir)/f"{model_name}_fp16.gguf")
|
174 |
+
print(f"Converting to GGUF FP16: {os.path.abspath(fp16_model)}")
|
175 |
+
result = subprocess.run(
|
176 |
+
[
|
177 |
+
"python3", "/app/convert_hf_to_gguf.py", local_dir, "--outtype", "f16", "--outfile", fp16_model
|
178 |
+
],
|
179 |
+
shell=False,
|
180 |
+
capture_output=True
|
181 |
+
)
|
182 |
+
print(f"Model directory contents: {result}")
|
183 |
+
if result.returncode != 0:
|
184 |
+
stderr_str = result.stderr.decode("utf-8")
|
185 |
+
raise Exception(f"Error converting to fp16: {stderr_str}")
|
186 |
+
|
187 |
+
print("Model converted to fp16 successfully!")
|
188 |
+
print(f"Converted model path: {os.path.abspath(fp16_model)}")
|
189 |
+
|
190 |
+
return fp16_model
|
191 |
+
|
192 |
+
def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_repo, train_data_file, split_model, split_max_tensors, split_max_size, repo_name, gguf_name, oauth_token: gr.OAuthToken | None):
|
193 |
+
# validate the oauth token
|
194 |
+
if is_valid_token(oauth_token) is False:
|
195 |
+
raise gr.Error("You must be logged in to use GGUF-my-repo")
|
196 |
+
|
197 |
+
print(f"Current working directory: {os.path.abspath(os.getcwd())}")
|
198 |
+
create_folder(DOWNLOAD_FOLDER)
|
199 |
+
create_folder(OUTPUT_FOLDER)
|
200 |
+
|
201 |
+
try:
|
202 |
+
with tempfile.TemporaryDirectory(dir=OUTPUT_FOLDER) as outdir:
|
203 |
+
fp16 = download_base_model(oauth_token.token, model_id, outdir)
|
204 |
|
205 |
imatrix_path = Path(outdir)/"imatrix.dat"
|
206 |
|
|
|
220 |
print("Not using imatrix quantization.")
|
221 |
|
222 |
# Quantize the model
|
223 |
+
model_name = get_model_name(model_id)
|
224 |
quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
|
225 |
quantized_gguf_path = str(Path(outdir)/quantized_gguf_name)
|
226 |
if use_imatrix:
|
|
|
238 |
stderr_str = result.stderr.decode("utf-8")
|
239 |
raise Exception(f"Error quantizing: {stderr_str}")
|
240 |
print(f"Quantized successfully with {imatrix_q_method if use_imatrix else q_method} option!")
|
241 |
+
print(f"Quantized model path: {os.path.abspath(quantized_gguf_path)}")
|
242 |
|
243 |
# Create empty repo
|
244 |
username = whoami(oauth_token.token)["name"]
|
245 |
|
246 |
repo_name = f"{username}/{model_name}-GGUF"
|
247 |
+
api = HfApi(token=oauth_token.token)
|
248 |
new_repo_url = api.create_repo(repo_id=repo_name, exist_ok=True, private=private_repo)
|
249 |
new_repo_id = new_repo_url.repo_id
|
250 |
print("Repo created successfully!", new_repo_url)
|
|
|
354 |
"llama.png",
|
355 |
)
|
356 |
except Exception as e:
|
357 |
+
print((f"Error processing model: {e}"))
|
358 |
return (f'<h1>❌ ERROR</h1><br/><pre style="white-space:pre-wrap;">{escape(str(e))}</pre>', "error.png")
|
359 |
|
360 |
|