Spaces:
Sleeping
Sleeping
Oleg Shulyakov
commited on
Commit
·
c0d1d96
1
Parent(s):
17f9e2b
Add RUN_LOCALLY flag
Browse files- app.py +46 -19
- docker-compose.yml +1 -0
app.py
CHANGED
@@ -16,6 +16,8 @@ SPACE_ID = os.environ.get("SPACE_ID") if os.environ.get("SPACE_ID") else ""
|
|
16 |
SPACE_URL = "https://" + SPACE_ID.replace("/", "-") + ".hf.space/" if SPACE_ID else "http://localhost:7860/"
|
17 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
18 |
|
|
|
|
|
19 |
# Folder
|
20 |
DOWNLOAD_FOLDER = "./downloads"
|
21 |
OUTPUT_FOLDER = "./outputs"
|
@@ -27,6 +29,8 @@ def create_folder(folder_name: str):
|
|
27 |
print(f"Creating folder: {folder_name}")
|
28 |
os.makedirs(folder_name)
|
29 |
|
|
|
|
|
30 |
def validate_token(oauth_token):
|
31 |
if oauth_token is None or oauth_token.token is None:
|
32 |
raise gr.Error(ERROR_LOGIN)
|
@@ -51,6 +55,18 @@ def get_model_creator(model_id: str):
|
|
51 |
def get_model_name(model_id: str):
|
52 |
return model_id.split('/')[-1]
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
|
55 |
if not os.path.isfile(model_path):
|
56 |
raise Exception(f"Model file not found: {model_path}")
|
@@ -120,12 +136,12 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, token: str, s
|
|
120 |
sharded_model_files = [f for f in os.listdir(outdir) if f.startswith(model_file_prefix) and f.endswith(".gguf")]
|
121 |
if sharded_model_files:
|
122 |
print(f"Sharded model files: {sharded_model_files}")
|
123 |
-
api = HfApi(token=token)
|
124 |
for file in sharded_model_files:
|
125 |
file_path = os.path.join(outdir, file)
|
126 |
try:
|
127 |
print(f"Uploading file: {file_path}")
|
128 |
-
|
|
|
129 |
path_or_fileobj=file_path,
|
130 |
path_in_repo=file,
|
131 |
repo_id=repo_id,
|
@@ -137,15 +153,21 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, token: str, s
|
|
137 |
|
138 |
print("Sharded model has been uploaded successfully!")
|
139 |
|
140 |
-
def download_base_model(token: str, model_id: str, outdir:
|
141 |
model_name = get_model_name(model_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
|
143 |
with tempfile.TemporaryDirectory(dir=DOWNLOAD_FOLDER) as tmpdir:
|
144 |
-
#
|
145 |
-
|
146 |
-
local_dir = Path(tmpdir)/model_name # Keep the model name as the dirname so the model name metadata is populated correctly
|
147 |
print(f"Local directory: {os.path.abspath(local_dir)}")
|
148 |
|
|
|
149 |
api = HfApi(token=token)
|
150 |
pattern = (
|
151 |
"*.safetensors"
|
@@ -172,7 +194,6 @@ def download_base_model(token: str, model_id: str, outdir: tempfile.TemporaryDir
|
|
172 |
raise Exception('adapter_config.json is present.<br/><br/>If you are converting a LoRA adapter to GGUF, please use <a href="https://huggingface.co/spaces/ggml-org/gguf-my-lora" target="_blank" style="text-decoration:underline">GGUF-my-lora</a>.')
|
173 |
|
174 |
# Convert HF to GGUF
|
175 |
-
fp16_model = str(Path(outdir)/f"{model_name}-fp16.gguf")
|
176 |
print(f"Converting to GGUF FP16: {os.path.abspath(fp16_model)}")
|
177 |
result = subprocess.run(
|
178 |
[
|
@@ -192,7 +213,7 @@ def download_base_model(token: str, model_id: str, outdir: tempfile.TemporaryDir
|
|
192 |
return fp16_model
|
193 |
|
194 |
def quantize_model(
|
195 |
-
outdir:
|
196 |
gguf_name: str,
|
197 |
fp16: str,
|
198 |
q_method: str,
|
@@ -235,7 +256,7 @@ def quantize_model(
|
|
235 |
else:
|
236 |
print("Not using imatrix quantization.")
|
237 |
|
238 |
-
quantized_gguf =
|
239 |
quantize_cmd.append(fp16)
|
240 |
quantize_cmd.append(quantized_gguf)
|
241 |
|
@@ -254,7 +275,7 @@ def quantize_model(
|
|
254 |
print(f"Quantized model path: {os.path.abspath(quantized_gguf)}")
|
255 |
return quantized_gguf
|
256 |
|
257 |
-
def generate_readme(outdir:
|
258 |
creator = get_model_creator(model_id)
|
259 |
model_name = get_model_name(model_id)
|
260 |
username = whoami(token)["name"]
|
@@ -307,7 +328,7 @@ llama-server --hf-repo "{new_repo_id}" --hf-file "{gguf_name}" -c 4096
|
|
307 |
```
|
308 |
"""
|
309 |
)
|
310 |
-
readme_path =
|
311 |
card.save(readme_path)
|
312 |
return readme_path
|
313 |
|
@@ -331,16 +352,19 @@ def process_model(
|
|
331 |
oauth_token: gr.OAuthToken | None,
|
332 |
):
|
333 |
validate_token(oauth_token)
|
334 |
-
|
335 |
token = oauth_token.token
|
|
|
336 |
print(f"Current working directory: {os.path.abspath(os.getcwd())}")
|
337 |
create_folder(DOWNLOAD_FOLDER)
|
338 |
create_folder(OUTPUT_FOLDER)
|
339 |
|
|
|
|
|
340 |
try:
|
341 |
-
with tempfile.TemporaryDirectory(dir=OUTPUT_FOLDER) as
|
|
|
342 |
fp16 = download_base_model(token, model_id, outdir)
|
343 |
-
imatrix_file =
|
344 |
quantized_gguf = quantize_model(outdir, gguf_name, fp16, q_method, use_imatrix, imatrix_q_method, imatrix_file, quant_embedding, embedding_tensor_method, leave_output, quant_output, output_tensor_method)
|
345 |
|
346 |
# Create empty repo
|
@@ -352,11 +376,12 @@ def process_model(
|
|
352 |
# Upload model
|
353 |
if split_model:
|
354 |
print(f"Splitting quantized model: {os.path.abspath(quantized_gguf)}")
|
355 |
-
split_upload_model(
|
356 |
else:
|
357 |
try:
|
358 |
print(f"Uploading quantized model: {os.path.abspath(quantized_gguf)}")
|
359 |
-
|
|
|
360 |
path_or_fileobj=quantized_gguf,
|
361 |
path_in_repo=gguf_name,
|
362 |
repo_id=new_repo_id,
|
@@ -367,7 +392,8 @@ def process_model(
|
|
367 |
if os.path.isfile(imatrix_file):
|
368 |
try:
|
369 |
print(f"Uploading imatrix.dat: {os.path.abspath(output_path)}")
|
370 |
-
|
|
|
371 |
path_or_fileobj=imatrix_file,
|
372 |
path_in_repo="imatrix.dat",
|
373 |
repo_id=new_repo_id,
|
@@ -378,7 +404,8 @@ def process_model(
|
|
378 |
# Upload README.md
|
379 |
readme_path = generate_readme(outdir, token, model_id, new_repo_id, gguf_name)
|
380 |
|
381 |
-
|
|
|
382 |
path_or_fileobj=readme_path,
|
383 |
path_in_repo="README.md",
|
384 |
repo_id=new_repo_id,
|
@@ -629,8 +656,8 @@ with gr.Blocks(css=css) as demo:
|
|
629 |
submit_btn.render()
|
630 |
|
631 |
with gr.Column() as outputs:
|
632 |
-
output_label.render()
|
633 |
output_image.render()
|
|
|
634 |
|
635 |
#####
|
636 |
# Button Click handlers
|
|
|
16 |
SPACE_URL = "https://" + SPACE_ID.replace("/", "-") + ".hf.space/" if SPACE_ID else "http://localhost:7860/"
|
17 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
18 |
|
19 |
+
RUN_LOCALLY = os.environ.get("RUN_LOCALLY")
|
20 |
+
|
21 |
# Folder
|
22 |
DOWNLOAD_FOLDER = "./downloads"
|
23 |
OUTPUT_FOLDER = "./outputs"
|
|
|
29 |
print(f"Creating folder: {folder_name}")
|
30 |
os.makedirs(folder_name)
|
31 |
|
32 |
+
return folder_name
|
33 |
+
|
34 |
def validate_token(oauth_token):
|
35 |
if oauth_token is None or oauth_token.token is None:
|
36 |
raise gr.Error(ERROR_LOGIN)
|
|
|
55 |
def get_model_name(model_id: str):
|
56 |
return model_id.split('/')[-1]
|
57 |
|
58 |
+
def upload_file(token, path_or_fileobj, path_in_repo, repo_id):
|
59 |
+
if RUN_LOCALLY == "1":
|
60 |
+
print("Skipping upload...")
|
61 |
+
return
|
62 |
+
|
63 |
+
api = HfApi(token=token)
|
64 |
+
api.upload_file(
|
65 |
+
path_or_fileobj=path_or_fileobj,
|
66 |
+
path_in_repo=path_in_repo,
|
67 |
+
repo_id=repo_id,
|
68 |
+
)
|
69 |
+
|
70 |
def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
|
71 |
if not os.path.isfile(model_path):
|
72 |
raise Exception(f"Model file not found: {model_path}")
|
|
|
136 |
sharded_model_files = [f for f in os.listdir(outdir) if f.startswith(model_file_prefix) and f.endswith(".gguf")]
|
137 |
if sharded_model_files:
|
138 |
print(f"Sharded model files: {sharded_model_files}")
|
|
|
139 |
for file in sharded_model_files:
|
140 |
file_path = os.path.join(outdir, file)
|
141 |
try:
|
142 |
print(f"Uploading file: {file_path}")
|
143 |
+
upload_file(
|
144 |
+
token=token,
|
145 |
path_or_fileobj=file_path,
|
146 |
path_in_repo=file,
|
147 |
repo_id=repo_id,
|
|
|
153 |
|
154 |
print("Sharded model has been uploaded successfully!")
|
155 |
|
156 |
+
def download_base_model(token: str, model_id: str, outdir: str):
|
157 |
model_name = get_model_name(model_id)
|
158 |
+
print(f"Downloading model {model_name}")
|
159 |
+
|
160 |
+
fp16_model = f"{outdir}/{model_name}-fp16.gguf"
|
161 |
+
if os.path.exists(fp16_model):
|
162 |
+
print("Skipping fp16 convertion...")
|
163 |
+
print(f"Converted model path: {os.path.abspath(fp16_model)}")
|
164 |
|
165 |
with tempfile.TemporaryDirectory(dir=DOWNLOAD_FOLDER) as tmpdir:
|
166 |
+
# Keep the model name as the dirname so the model name metadata is populated correctly
|
167 |
+
local_dir = f"{Path(tmpdir)}/{model_name}"
|
|
|
168 |
print(f"Local directory: {os.path.abspath(local_dir)}")
|
169 |
|
170 |
+
# Download model
|
171 |
api = HfApi(token=token)
|
172 |
pattern = (
|
173 |
"*.safetensors"
|
|
|
194 |
raise Exception('adapter_config.json is present.<br/><br/>If you are converting a LoRA adapter to GGUF, please use <a href="https://huggingface.co/spaces/ggml-org/gguf-my-lora" target="_blank" style="text-decoration:underline">GGUF-my-lora</a>.')
|
195 |
|
196 |
# Convert HF to GGUF
|
|
|
197 |
print(f"Converting to GGUF FP16: {os.path.abspath(fp16_model)}")
|
198 |
result = subprocess.run(
|
199 |
[
|
|
|
213 |
return fp16_model
|
214 |
|
215 |
def quantize_model(
|
216 |
+
outdir: str,
|
217 |
gguf_name: str,
|
218 |
fp16: str,
|
219 |
q_method: str,
|
|
|
256 |
else:
|
257 |
print("Not using imatrix quantization.")
|
258 |
|
259 |
+
quantized_gguf = f"{outdir}/{gguf_name}"
|
260 |
quantize_cmd.append(fp16)
|
261 |
quantize_cmd.append(quantized_gguf)
|
262 |
|
|
|
275 |
print(f"Quantized model path: {os.path.abspath(quantized_gguf)}")
|
276 |
return quantized_gguf
|
277 |
|
278 |
+
def generate_readme(outdir: str, token: str, model_id: str, new_repo_id: str, gguf_name: str):
|
279 |
creator = get_model_creator(model_id)
|
280 |
model_name = get_model_name(model_id)
|
281 |
username = whoami(token)["name"]
|
|
|
328 |
```
|
329 |
"""
|
330 |
)
|
331 |
+
readme_path = f"{outdir}/README.md"
|
332 |
card.save(readme_path)
|
333 |
return readme_path
|
334 |
|
|
|
352 |
oauth_token: gr.OAuthToken | None,
|
353 |
):
|
354 |
validate_token(oauth_token)
|
|
|
355 |
token = oauth_token.token
|
356 |
+
|
357 |
print(f"Current working directory: {os.path.abspath(os.getcwd())}")
|
358 |
create_folder(DOWNLOAD_FOLDER)
|
359 |
create_folder(OUTPUT_FOLDER)
|
360 |
|
361 |
+
model_name = get_model_name(model_id)
|
362 |
+
|
363 |
try:
|
364 |
+
with tempfile.TemporaryDirectory(dir=OUTPUT_FOLDER) as outDirObj:
|
365 |
+
outdir = create_folder(os.path.join(OUTPUT_FOLDER, model_name)) if RUN_LOCALLY == "1" else Path(outDirObj)
|
366 |
fp16 = download_base_model(token, model_id, outdir)
|
367 |
+
imatrix_file = f"{outdir}/{model_name}-imatrix.dat"
|
368 |
quantized_gguf = quantize_model(outdir, gguf_name, fp16, q_method, use_imatrix, imatrix_q_method, imatrix_file, quant_embedding, embedding_tensor_method, leave_output, quant_output, output_tensor_method)
|
369 |
|
370 |
# Create empty repo
|
|
|
376 |
# Upload model
|
377 |
if split_model:
|
378 |
print(f"Splitting quantized model: {os.path.abspath(quantized_gguf)}")
|
379 |
+
split_upload_model(quantized_gguf, outdir, new_repo_id, token, split_max_tensors, split_max_size)
|
380 |
else:
|
381 |
try:
|
382 |
print(f"Uploading quantized model: {os.path.abspath(quantized_gguf)}")
|
383 |
+
upload_file(
|
384 |
+
token=token,
|
385 |
path_or_fileobj=quantized_gguf,
|
386 |
path_in_repo=gguf_name,
|
387 |
repo_id=new_repo_id,
|
|
|
392 |
if os.path.isfile(imatrix_file):
|
393 |
try:
|
394 |
print(f"Uploading imatrix.dat: {os.path.abspath(output_path)}")
|
395 |
+
upload_file(
|
396 |
+
token=token,
|
397 |
path_or_fileobj=imatrix_file,
|
398 |
path_in_repo="imatrix.dat",
|
399 |
repo_id=new_repo_id,
|
|
|
404 |
# Upload README.md
|
405 |
readme_path = generate_readme(outdir, token, model_id, new_repo_id, gguf_name)
|
406 |
|
407 |
+
upload_file(
|
408 |
+
token=token,
|
409 |
path_or_fileobj=readme_path,
|
410 |
path_in_repo="README.md",
|
411 |
repo_id=new_repo_id,
|
|
|
656 |
submit_btn.render()
|
657 |
|
658 |
with gr.Column() as outputs:
|
|
|
659 |
output_image.render()
|
660 |
+
output_label.render()
|
661 |
|
662 |
#####
|
663 |
# Button Click handlers
|
docker-compose.yml
CHANGED
@@ -11,5 +11,6 @@ services:
|
|
11 |
- .:/home/user/app
|
12 |
environment:
|
13 |
- RUN_CUDA=0
|
|
|
14 |
- HF_TOKEN=${HF_TOKEN}
|
15 |
- HF_HUB_CACHE=/home/user/app/downloads
|
|
|
11 |
- .:/home/user/app
|
12 |
environment:
|
13 |
- RUN_CUDA=0
|
14 |
+
- RUN_LOCALLY=0
|
15 |
- HF_TOKEN=${HF_TOKEN}
|
16 |
- HF_HUB_CACHE=/home/user/app/downloads
|