Oleg Shulyakov commited on
Commit
c0d1d96
·
1 Parent(s): 17f9e2b

Add RUN_LOCALLY flag

Browse files
Files changed (2) hide show
  1. app.py +46 -19
  2. docker-compose.yml +1 -0
app.py CHANGED
@@ -16,6 +16,8 @@ SPACE_ID = os.environ.get("SPACE_ID") if os.environ.get("SPACE_ID") else ""
16
  SPACE_URL = "https://" + SPACE_ID.replace("/", "-") + ".hf.space/" if SPACE_ID else "http://localhost:7860/"
17
  HF_TOKEN = os.environ.get("HF_TOKEN")
18
 
 
 
19
  # Folder
20
  DOWNLOAD_FOLDER = "./downloads"
21
  OUTPUT_FOLDER = "./outputs"
@@ -27,6 +29,8 @@ def create_folder(folder_name: str):
27
  print(f"Creating folder: {folder_name}")
28
  os.makedirs(folder_name)
29
 
 
 
30
  def validate_token(oauth_token):
31
  if oauth_token is None or oauth_token.token is None:
32
  raise gr.Error(ERROR_LOGIN)
@@ -51,6 +55,18 @@ def get_model_creator(model_id: str):
51
  def get_model_name(model_id: str):
52
  return model_id.split('/')[-1]
53
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
55
  if not os.path.isfile(model_path):
56
  raise Exception(f"Model file not found: {model_path}")
@@ -120,12 +136,12 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, token: str, s
120
  sharded_model_files = [f for f in os.listdir(outdir) if f.startswith(model_file_prefix) and f.endswith(".gguf")]
121
  if sharded_model_files:
122
  print(f"Sharded model files: {sharded_model_files}")
123
- api = HfApi(token=token)
124
  for file in sharded_model_files:
125
  file_path = os.path.join(outdir, file)
126
  try:
127
  print(f"Uploading file: {file_path}")
128
- api.upload_file(
 
129
  path_or_fileobj=file_path,
130
  path_in_repo=file,
131
  repo_id=repo_id,
@@ -137,15 +153,21 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, token: str, s
137
 
138
  print("Sharded model has been uploaded successfully!")
139
 
140
- def download_base_model(token: str, model_id: str, outdir: tempfile.TemporaryDirectory):
141
  model_name = get_model_name(model_id)
 
 
 
 
 
 
142
 
143
  with tempfile.TemporaryDirectory(dir=DOWNLOAD_FOLDER) as tmpdir:
144
- # Download model
145
- print(f"Downloading model {model_name}")
146
- local_dir = Path(tmpdir)/model_name # Keep the model name as the dirname so the model name metadata is populated correctly
147
  print(f"Local directory: {os.path.abspath(local_dir)}")
148
 
 
149
  api = HfApi(token=token)
150
  pattern = (
151
  "*.safetensors"
@@ -172,7 +194,6 @@ def download_base_model(token: str, model_id: str, outdir: tempfile.TemporaryDir
172
  raise Exception('adapter_config.json is present.<br/><br/>If you are converting a LoRA adapter to GGUF, please use <a href="https://huggingface.co/spaces/ggml-org/gguf-my-lora" target="_blank" style="text-decoration:underline">GGUF-my-lora</a>.')
173
 
174
  # Convert HF to GGUF
175
- fp16_model = str(Path(outdir)/f"{model_name}-fp16.gguf")
176
  print(f"Converting to GGUF FP16: {os.path.abspath(fp16_model)}")
177
  result = subprocess.run(
178
  [
@@ -192,7 +213,7 @@ def download_base_model(token: str, model_id: str, outdir: tempfile.TemporaryDir
192
  return fp16_model
193
 
194
  def quantize_model(
195
- outdir: tempfile.TemporaryDirectory,
196
  gguf_name: str,
197
  fp16: str,
198
  q_method: str,
@@ -235,7 +256,7 @@ def quantize_model(
235
  else:
236
  print("Not using imatrix quantization.")
237
 
238
- quantized_gguf = str(Path(outdir)/gguf_name)
239
  quantize_cmd.append(fp16)
240
  quantize_cmd.append(quantized_gguf)
241
 
@@ -254,7 +275,7 @@ def quantize_model(
254
  print(f"Quantized model path: {os.path.abspath(quantized_gguf)}")
255
  return quantized_gguf
256
 
257
- def generate_readme(outdir: tempfile.TemporaryDirectory, token: str, model_id: str, new_repo_id: str, gguf_name: str):
258
  creator = get_model_creator(model_id)
259
  model_name = get_model_name(model_id)
260
  username = whoami(token)["name"]
@@ -307,7 +328,7 @@ llama-server --hf-repo "{new_repo_id}" --hf-file "{gguf_name}" -c 4096
307
  ```
308
  """
309
  )
310
- readme_path = Path(outdir)/"README.md"
311
  card.save(readme_path)
312
  return readme_path
313
 
@@ -331,16 +352,19 @@ def process_model(
331
  oauth_token: gr.OAuthToken | None,
332
  ):
333
  validate_token(oauth_token)
334
-
335
  token = oauth_token.token
 
336
  print(f"Current working directory: {os.path.abspath(os.getcwd())}")
337
  create_folder(DOWNLOAD_FOLDER)
338
  create_folder(OUTPUT_FOLDER)
339
 
 
 
340
  try:
341
- with tempfile.TemporaryDirectory(dir=OUTPUT_FOLDER) as outdir:
 
342
  fp16 = download_base_model(token, model_id, outdir)
343
- imatrix_file = Path(outdir)/f"{get_model_name(model_id)}-imatrix.dat"
344
  quantized_gguf = quantize_model(outdir, gguf_name, fp16, q_method, use_imatrix, imatrix_q_method, imatrix_file, quant_embedding, embedding_tensor_method, leave_output, quant_output, output_tensor_method)
345
 
346
  # Create empty repo
@@ -352,11 +376,12 @@ def process_model(
352
  # Upload model
353
  if split_model:
354
  print(f"Splitting quantized model: {os.path.abspath(quantized_gguf)}")
355
- split_upload_model(str(quantized_gguf), outdir, new_repo_id, token, split_max_tensors, split_max_size)
356
  else:
357
  try:
358
  print(f"Uploading quantized model: {os.path.abspath(quantized_gguf)}")
359
- api.upload_file(
 
360
  path_or_fileobj=quantized_gguf,
361
  path_in_repo=gguf_name,
362
  repo_id=new_repo_id,
@@ -367,7 +392,8 @@ def process_model(
367
  if os.path.isfile(imatrix_file):
368
  try:
369
  print(f"Uploading imatrix.dat: {os.path.abspath(output_path)}")
370
- api.upload_file(
 
371
  path_or_fileobj=imatrix_file,
372
  path_in_repo="imatrix.dat",
373
  repo_id=new_repo_id,
@@ -378,7 +404,8 @@ def process_model(
378
  # Upload README.md
379
  readme_path = generate_readme(outdir, token, model_id, new_repo_id, gguf_name)
380
 
381
- api.upload_file(
 
382
  path_or_fileobj=readme_path,
383
  path_in_repo="README.md",
384
  repo_id=new_repo_id,
@@ -629,8 +656,8 @@ with gr.Blocks(css=css) as demo:
629
  submit_btn.render()
630
 
631
  with gr.Column() as outputs:
632
- output_label.render()
633
  output_image.render()
 
634
 
635
  #####
636
  # Button Click handlers
 
16
  SPACE_URL = "https://" + SPACE_ID.replace("/", "-") + ".hf.space/" if SPACE_ID else "http://localhost:7860/"
17
  HF_TOKEN = os.environ.get("HF_TOKEN")
18
 
19
+ RUN_LOCALLY = os.environ.get("RUN_LOCALLY")
20
+
21
  # Folder
22
  DOWNLOAD_FOLDER = "./downloads"
23
  OUTPUT_FOLDER = "./outputs"
 
29
  print(f"Creating folder: {folder_name}")
30
  os.makedirs(folder_name)
31
 
32
+ return folder_name
33
+
34
  def validate_token(oauth_token):
35
  if oauth_token is None or oauth_token.token is None:
36
  raise gr.Error(ERROR_LOGIN)
 
55
  def get_model_name(model_id: str):
56
  return model_id.split('/')[-1]
57
 
58
+ def upload_file(token, path_or_fileobj, path_in_repo, repo_id):
59
+ if RUN_LOCALLY == "1":
60
+ print("Skipping upload...")
61
+ return
62
+
63
+ api = HfApi(token=token)
64
+ api.upload_file(
65
+ path_or_fileobj=path_or_fileobj,
66
+ path_in_repo=path_in_repo,
67
+ repo_id=repo_id,
68
+ )
69
+
70
  def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
71
  if not os.path.isfile(model_path):
72
  raise Exception(f"Model file not found: {model_path}")
 
136
  sharded_model_files = [f for f in os.listdir(outdir) if f.startswith(model_file_prefix) and f.endswith(".gguf")]
137
  if sharded_model_files:
138
  print(f"Sharded model files: {sharded_model_files}")
 
139
  for file in sharded_model_files:
140
  file_path = os.path.join(outdir, file)
141
  try:
142
  print(f"Uploading file: {file_path}")
143
+ upload_file(
144
+ token=token,
145
  path_or_fileobj=file_path,
146
  path_in_repo=file,
147
  repo_id=repo_id,
 
153
 
154
  print("Sharded model has been uploaded successfully!")
155
 
156
+ def download_base_model(token: str, model_id: str, outdir: str):
157
  model_name = get_model_name(model_id)
158
+ print(f"Downloading model {model_name}")
159
+
160
+ fp16_model = f"{outdir}/{model_name}-fp16.gguf"
161
+ if os.path.exists(fp16_model):
162
+ print("Skipping fp16 convertion...")
163
+ print(f"Converted model path: {os.path.abspath(fp16_model)}")
164
 
165
  with tempfile.TemporaryDirectory(dir=DOWNLOAD_FOLDER) as tmpdir:
166
+ # Keep the model name as the dirname so the model name metadata is populated correctly
167
+ local_dir = f"{Path(tmpdir)}/{model_name}"
 
168
  print(f"Local directory: {os.path.abspath(local_dir)}")
169
 
170
+ # Download model
171
  api = HfApi(token=token)
172
  pattern = (
173
  "*.safetensors"
 
194
  raise Exception('adapter_config.json is present.<br/><br/>If you are converting a LoRA adapter to GGUF, please use <a href="https://huggingface.co/spaces/ggml-org/gguf-my-lora" target="_blank" style="text-decoration:underline">GGUF-my-lora</a>.')
195
 
196
  # Convert HF to GGUF
 
197
  print(f"Converting to GGUF FP16: {os.path.abspath(fp16_model)}")
198
  result = subprocess.run(
199
  [
 
213
  return fp16_model
214
 
215
  def quantize_model(
216
+ outdir: str,
217
  gguf_name: str,
218
  fp16: str,
219
  q_method: str,
 
256
  else:
257
  print("Not using imatrix quantization.")
258
 
259
+ quantized_gguf = f"{outdir}/{gguf_name}"
260
  quantize_cmd.append(fp16)
261
  quantize_cmd.append(quantized_gguf)
262
 
 
275
  print(f"Quantized model path: {os.path.abspath(quantized_gguf)}")
276
  return quantized_gguf
277
 
278
+ def generate_readme(outdir: str, token: str, model_id: str, new_repo_id: str, gguf_name: str):
279
  creator = get_model_creator(model_id)
280
  model_name = get_model_name(model_id)
281
  username = whoami(token)["name"]
 
328
  ```
329
  """
330
  )
331
+ readme_path = f"{outdir}/README.md"
332
  card.save(readme_path)
333
  return readme_path
334
 
 
352
  oauth_token: gr.OAuthToken | None,
353
  ):
354
  validate_token(oauth_token)
 
355
  token = oauth_token.token
356
+
357
  print(f"Current working directory: {os.path.abspath(os.getcwd())}")
358
  create_folder(DOWNLOAD_FOLDER)
359
  create_folder(OUTPUT_FOLDER)
360
 
361
+ model_name = get_model_name(model_id)
362
+
363
  try:
364
+ with tempfile.TemporaryDirectory(dir=OUTPUT_FOLDER) as outDirObj:
365
+ outdir = create_folder(os.path.join(OUTPUT_FOLDER, model_name)) if RUN_LOCALLY == "1" else Path(outDirObj)
366
  fp16 = download_base_model(token, model_id, outdir)
367
+ imatrix_file = f"{outdir}/{model_name}-imatrix.dat"
368
  quantized_gguf = quantize_model(outdir, gguf_name, fp16, q_method, use_imatrix, imatrix_q_method, imatrix_file, quant_embedding, embedding_tensor_method, leave_output, quant_output, output_tensor_method)
369
 
370
  # Create empty repo
 
376
  # Upload model
377
  if split_model:
378
  print(f"Splitting quantized model: {os.path.abspath(quantized_gguf)}")
379
+ split_upload_model(quantized_gguf, outdir, new_repo_id, token, split_max_tensors, split_max_size)
380
  else:
381
  try:
382
  print(f"Uploading quantized model: {os.path.abspath(quantized_gguf)}")
383
+ upload_file(
384
+ token=token,
385
  path_or_fileobj=quantized_gguf,
386
  path_in_repo=gguf_name,
387
  repo_id=new_repo_id,
 
392
  if os.path.isfile(imatrix_file):
393
  try:
394
  print(f"Uploading imatrix.dat: {os.path.abspath(output_path)}")
395
+ upload_file(
396
+ token=token,
397
  path_or_fileobj=imatrix_file,
398
  path_in_repo="imatrix.dat",
399
  repo_id=new_repo_id,
 
404
  # Upload README.md
405
  readme_path = generate_readme(outdir, token, model_id, new_repo_id, gguf_name)
406
 
407
+ upload_file(
408
+ token=token,
409
  path_or_fileobj=readme_path,
410
  path_in_repo="README.md",
411
  repo_id=new_repo_id,
 
656
  submit_btn.render()
657
 
658
  with gr.Column() as outputs:
 
659
  output_image.render()
660
+ output_label.render()
661
 
662
  #####
663
  # Button Click handlers
docker-compose.yml CHANGED
@@ -11,5 +11,6 @@ services:
11
  - .:/home/user/app
12
  environment:
13
  - RUN_CUDA=0
 
14
  - HF_TOKEN=${HF_TOKEN}
15
  - HF_HUB_CACHE=/home/user/app/downloads
 
11
  - .:/home/user/app
12
  environment:
13
  - RUN_CUDA=0
14
+ - RUN_LOCALLY=0
15
  - HF_TOKEN=${HF_TOKEN}
16
  - HF_HUB_CACHE=/home/user/app/downloads