Oleg Shulyakov commited on
Commit
c96815e
·
1 Parent(s): 67040c8

Move base model creation into separate method

Browse files
Files changed (1) hide show
  1. app.py +79 -53
app.py CHANGED
@@ -15,8 +15,25 @@ from apscheduler.schedulers.background import BackgroundScheduler
15
  SPACE_ID = os.environ.get("SPACE_ID")
16
  HF_TOKEN = os.environ.get("HF_TOKEN")
17
 
18
- # Llama.cpp Safetensors to GGUF F16
19
- CONVERSION_SCRIPT = "/app/convert_hf_to_gguf.py"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  # escape HTML for logging
22
  def escape(s: str) -> str:
@@ -27,6 +44,9 @@ def escape(s: str) -> str:
27
  s = s.replace("\n", "<br/>")
28
  return s
29
 
 
 
 
30
  def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
31
  if not os.path.isfile(model_path):
32
  raise Exception(f"Model file not found: {model_path}")
@@ -59,8 +79,8 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token:
59
  print(f"Model path: {model_path}")
60
  print(f"Output dir: {outdir}")
61
 
62
- if oauth_token is None or oauth_token.token is None:
63
- raise ValueError("You have to be logged in.")
64
 
65
  split_cmd = [
66
  "llama-gguf-split",
@@ -101,8 +121,8 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token:
101
  api = HfApi(token=oauth_token.token)
102
  for file in sharded_model_files:
103
  file_path = os.path.join(outdir, file)
104
- print(f"Uploading file: {file_path}")
105
  try:
 
106
  api.upload_file(
107
  path_or_fileobj=file_path,
108
  path_in_repo=file,
@@ -115,23 +135,16 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token:
115
 
116
  print("Sharded model has been uploaded successfully!")
117
 
118
- def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_repo, train_data_file, split_model, split_max_tensors, split_max_size, repo_name, gguf_name, oauth_token: gr.OAuthToken | None):
119
- if oauth_token is None or oauth_token.token is None:
120
- raise gr.Error("You must be logged in to use GGUF-my-repo")
121
-
122
- # validate the oauth token
123
- try:
124
- whoami(oauth_token.token)
125
- except Exception as e:
126
- raise gr.Error("You must be logged in to use GGUF-my-repo")
127
-
128
- model_name = model_id.split('/')[-1]
129
 
130
- try:
131
- api = HfApi(token=oauth_token.token)
132
-
133
- dl_pattern = ["*.md", "*.json", "*.model"]
 
134
 
 
135
  pattern = (
136
  "*.safetensors"
137
  if any(
@@ -144,40 +157,50 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
144
  else "*.bin"
145
  )
146
 
 
147
  dl_pattern += [pattern]
148
 
149
- if not os.path.exists("downloads"):
150
- os.makedirs("downloads")
151
-
152
- if not os.path.exists("outputs"):
153
- os.makedirs("outputs")
154
-
155
- with tempfile.TemporaryDirectory(dir="outputs") as outdir:
156
- fp16 = str(Path(outdir)/f"{model_name}.fp16.gguf")
157
-
158
- with tempfile.TemporaryDirectory(dir="downloads") as tmpdir:
159
- # Keep the model name as the dirname so the model name metadata is populated correctly
160
- local_dir = Path(tmpdir)/model_name
161
- print(local_dir)
162
- api.snapshot_download(repo_id=model_id, local_dir=local_dir, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
163
- print("Model downloaded successfully!")
164
- print(f"Current working directory: {os.getcwd()}")
165
- print(f"Model directory contents: {os.listdir(local_dir)}")
166
-
167
- config_dir = local_dir/"config.json"
168
- adapter_config_dir = local_dir/"adapter_config.json"
169
- if os.path.exists(adapter_config_dir) and not os.path.exists(config_dir):
170
- raise Exception('adapter_config.json is present.<br/><br/>If you are converting a LoRA adapter to GGUF, please use <a href="https://huggingface.co/spaces/ggml-org/gguf-my-lora" target="_blank" style="text-decoration:underline">GGUF-my-lora</a>.')
171
-
172
- result = subprocess.run([
173
- "python3", CONVERSION_SCRIPT, local_dir, "--outtype", "f16", "--outfile", fp16
174
- ], shell=False, capture_output=True)
175
- print(result)
176
- if result.returncode != 0:
177
- stderr_str = result.stderr.decode("utf-8")
178
- raise Exception(f"Error converting to fp16: {stderr_str}")
179
- print("Model converted to fp16 successfully!")
180
- print(f"Converted model path: {fp16}")
 
 
 
 
 
 
 
 
 
181
 
182
  imatrix_path = Path(outdir)/"imatrix.dat"
183
 
@@ -197,6 +220,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
197
  print("Not using imatrix quantization.")
198
 
199
  # Quantize the model
 
200
  quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
201
  quantized_gguf_path = str(Path(outdir)/quantized_gguf_name)
202
  if use_imatrix:
@@ -214,12 +238,13 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
214
  stderr_str = result.stderr.decode("utf-8")
215
  raise Exception(f"Error quantizing: {stderr_str}")
216
  print(f"Quantized successfully with {imatrix_q_method if use_imatrix else q_method} option!")
217
- print(f"Quantized model path: {quantized_gguf_path}")
218
 
219
  # Create empty repo
220
  username = whoami(oauth_token.token)["name"]
221
 
222
  repo_name = f"{username}/{model_name}-GGUF"
 
223
  new_repo_url = api.create_repo(repo_id=repo_name, exist_ok=True, private=private_repo)
224
  new_repo_id = new_repo_url.repo_id
225
  print("Repo created successfully!", new_repo_url)
@@ -329,6 +354,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
329
  "llama.png",
330
  )
331
  except Exception as e:
 
332
  return (f'<h1>❌ ERROR</h1><br/><pre style="white-space:pre-wrap;">{escape(str(e))}</pre>', "error.png")
333
 
334
 
 
15
  SPACE_ID = os.environ.get("SPACE_ID")
16
  HF_TOKEN = os.environ.get("HF_TOKEN")
17
 
18
+ # Folder
19
+ DOWNLOAD_FOLDER = "./downloads"
20
+ OUTPUT_FOLDER = "./outputs"
21
+
22
+ def create_folder(folder_name: str):
23
+ if not os.path.exists(folder_name):
24
+ print(f"Creating folder: {folder_name}")
25
+ os.makedirs(folder_name)
26
+
27
+ def is_valid_token(oauth_token):
28
+ if oauth_token is None or oauth_token.token is None:
29
+ return False
30
+
31
+ try:
32
+ whoami(oauth_token.token)
33
+ except Exception as e:
34
+ return False
35
+
36
+ return True
37
 
38
  # escape HTML for logging
39
  def escape(s: str) -> str:
 
44
  s = s.replace("\n", "<br/>")
45
  return s
46
 
47
+ def get_model_name(model_id: str):
48
+ return model_id.split('/')[-1]
49
+
50
  def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
51
  if not os.path.isfile(model_path):
52
  raise Exception(f"Model file not found: {model_path}")
 
79
  print(f"Model path: {model_path}")
80
  print(f"Output dir: {outdir}")
81
 
82
+ if is_valid_token(oauth_token) is False:
83
+ raise gr.Error("You have to be logged in.")
84
 
85
  split_cmd = [
86
  "llama-gguf-split",
 
121
  api = HfApi(token=oauth_token.token)
122
  for file in sharded_model_files:
123
  file_path = os.path.join(outdir, file)
 
124
  try:
125
+ print(f"Uploading file: {file_path}")
126
  api.upload_file(
127
  path_or_fileobj=file_path,
128
  path_in_repo=file,
 
135
 
136
  print("Sharded model has been uploaded successfully!")
137
 
138
+ def download_base_model(token: str, model_id: str, outdir: tempfile.TemporaryDirectory):
139
+ model_name = get_model_name(model_id)
 
 
 
 
 
 
 
 
 
140
 
141
+ with tempfile.TemporaryDirectory(dir=DOWNLOAD_FOLDER) as tmpdir:
142
+ # Download model
143
+ print(f"Downloading model {model_name}")
144
+ local_dir = Path(tmpdir)/model_name # Keep the model name as the dirname so the model name metadata is populated correctly
145
+ print(f"Local directory: {os.path.abspath(local_dir)}")
146
 
147
+ api = HfApi(token=token)
148
  pattern = (
149
  "*.safetensors"
150
  if any(
 
157
  else "*.bin"
158
  )
159
 
160
+ dl_pattern = ["*.md", "*.json", "*.model"]
161
  dl_pattern += [pattern]
162
 
163
+ api.snapshot_download(repo_id=model_id, local_dir=local_dir, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
164
+ print("Model downloaded successfully!")
165
+
166
+ print(f"Model directory contents: {os.listdir(local_dir)}")
167
+ config_dir = local_dir/"config.json"
168
+ adapter_config_dir = local_dir/"adapter_config.json"
169
+ if os.path.exists(adapter_config_dir) and not os.path.exists(config_dir):
170
+ raise Exception('adapter_config.json is present.<br/><br/>If you are converting a LoRA adapter to GGUF, please use <a href="https://huggingface.co/spaces/ggml-org/gguf-my-lora" target="_blank" style="text-decoration:underline">GGUF-my-lora</a>.')
171
+
172
+ # Convert HF to GGUF
173
+ fp16_model = str(Path(outdir)/f"{model_name}_fp16.gguf")
174
+ print(f"Converting to GGUF FP16: {os.path.abspath(fp16_model)}")
175
+ result = subprocess.run(
176
+ [
177
+ "python3", "/app/convert_hf_to_gguf.py", local_dir, "--outtype", "f16", "--outfile", fp16_model
178
+ ],
179
+ shell=False,
180
+ capture_output=True
181
+ )
182
+ print(f"Model directory contents: {result}")
183
+ if result.returncode != 0:
184
+ stderr_str = result.stderr.decode("utf-8")
185
+ raise Exception(f"Error converting to fp16: {stderr_str}")
186
+
187
+ print("Model converted to fp16 successfully!")
188
+ print(f"Converted model path: {os.path.abspath(fp16_model)}")
189
+
190
+ return fp16_model
191
+
192
+ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_repo, train_data_file, split_model, split_max_tensors, split_max_size, repo_name, gguf_name, oauth_token: gr.OAuthToken | None):
193
+ # validate the oauth token
194
+ if is_valid_token(oauth_token) is False:
195
+ raise gr.Error("You must be logged in to use GGUF-my-repo")
196
+
197
+ print(f"Current working directory: {os.path.abspath(os.getcwd())}")
198
+ create_folder(DOWNLOAD_FOLDER)
199
+ create_folder(OUTPUT_FOLDER)
200
+
201
+ try:
202
+ with tempfile.TemporaryDirectory(dir=OUTPUT_FOLDER) as outdir:
203
+ fp16 = download_base_model(oauth_token.token, model_id, outdir)
204
 
205
  imatrix_path = Path(outdir)/"imatrix.dat"
206
 
 
220
  print("Not using imatrix quantization.")
221
 
222
  # Quantize the model
223
+ model_name = get_model_name(model_id)
224
  quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
225
  quantized_gguf_path = str(Path(outdir)/quantized_gguf_name)
226
  if use_imatrix:
 
238
  stderr_str = result.stderr.decode("utf-8")
239
  raise Exception(f"Error quantizing: {stderr_str}")
240
  print(f"Quantized successfully with {imatrix_q_method if use_imatrix else q_method} option!")
241
+ print(f"Quantized model path: {os.path.abspath(quantized_gguf_path)}")
242
 
243
  # Create empty repo
244
  username = whoami(oauth_token.token)["name"]
245
 
246
  repo_name = f"{username}/{model_name}-GGUF"
247
+ api = HfApi(token=oauth_token.token)
248
  new_repo_url = api.create_repo(repo_id=repo_name, exist_ok=True, private=private_repo)
249
  new_repo_id = new_repo_url.repo_id
250
  print("Repo created successfully!", new_repo_url)
 
354
  "llama.png",
355
  )
356
  except Exception as e:
357
+ print((f"Error processing model: {e}"))
358
  return (f'<h1>❌ ERROR</h1><br/><pre style="white-space:pre-wrap;">{escape(str(e))}</pre>', "error.png")
359
 
360