Spaces:
Running
Running
Oleg Shulyakov
commited on
Commit
·
55ecc95
1
Parent(s):
c96815e
Replace model name with UI values
Browse files
app.py
CHANGED
@@ -220,9 +220,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
220 |
print("Not using imatrix quantization.")
|
221 |
|
222 |
# Quantize the model
|
223 |
-
|
224 |
-
quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
|
225 |
-
quantized_gguf_path = str(Path(outdir)/quantized_gguf_name)
|
226 |
if use_imatrix:
|
227 |
quantise_ggml = [
|
228 |
"llama-quantize",
|
@@ -241,9 +239,6 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
241 |
print(f"Quantized model path: {os.path.abspath(quantized_gguf_path)}")
|
242 |
|
243 |
# Create empty repo
|
244 |
-
username = whoami(oauth_token.token)["name"]
|
245 |
-
|
246 |
-
repo_name = f"{username}/{model_name}-GGUF"
|
247 |
api = HfApi(token=oauth_token.token)
|
248 |
new_repo_url = api.create_repo(repo_id=repo_name, exist_ok=True, private=private_repo)
|
249 |
new_repo_id = new_repo_url.repo_id
|
@@ -283,12 +278,12 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
283 |
|
284 |
### CLI:
|
285 |
```bash
|
286 |
-
llama-cli --hf-repo {new_repo_id} --hf-file {
|
287 |
```
|
288 |
|
289 |
### Server:
|
290 |
```bash
|
291 |
-
llama-server --hf-repo {new_repo_id} --hf-file {
|
292 |
```
|
293 |
|
294 |
Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
|
@@ -305,11 +300,11 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
305 |
|
306 |
Step 3: Run inference through the main binary.
|
307 |
```
|
308 |
-
./llama-cli --hf-repo {new_repo_id} --hf-file {
|
309 |
```
|
310 |
or
|
311 |
```
|
312 |
-
./llama-server --hf-repo {new_repo_id} --hf-file {
|
313 |
```
|
314 |
"""
|
315 |
)
|
@@ -323,7 +318,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
323 |
print(f"Uploading quantized model: {quantized_gguf_path}")
|
324 |
api.upload_file(
|
325 |
path_or_fileobj=quantized_gguf_path,
|
326 |
-
path_in_repo=
|
327 |
repo_id=new_repo_id,
|
328 |
)
|
329 |
except Exception as e:
|
@@ -455,18 +450,23 @@ gguf_name = gr.Textbox(
|
|
455 |
def update_output_repo(model_id, oauth_token: gr.OAuthToken | None):
|
456 |
if oauth_token is None or oauth_token.token is None:
|
457 |
return ""
|
458 |
-
|
459 |
if model_id is None:
|
460 |
return ""
|
|
|
|
|
461 |
model_name = model_id.split('/')[-1]
|
462 |
return f"{username}/{model_name}-GGUF"
|
463 |
|
464 |
def update_output_filename(model_id, use_imatrix, q_method, imatrix_q_method):
|
465 |
if model_id is None:
|
466 |
return ""
|
|
|
467 |
model_name = model_id.split('/')[-1]
|
|
|
468 |
if use_imatrix:
|
469 |
return f"{model_name.lower()}-{imatrix_q_method.upper()}-imat.gguf"
|
|
|
470 |
return f"{model_name.lower()}-{q_method.upper()}.gguf"
|
471 |
|
472 |
#####
|
|
|
220 |
print("Not using imatrix quantization.")
|
221 |
|
222 |
# Quantize the model
|
223 |
+
quantized_gguf_path = str(Path(outdir)/gguf_name)
|
|
|
|
|
224 |
if use_imatrix:
|
225 |
quantise_ggml = [
|
226 |
"llama-quantize",
|
|
|
239 |
print(f"Quantized model path: {os.path.abspath(quantized_gguf_path)}")
|
240 |
|
241 |
# Create empty repo
|
|
|
|
|
|
|
242 |
api = HfApi(token=oauth_token.token)
|
243 |
new_repo_url = api.create_repo(repo_id=repo_name, exist_ok=True, private=private_repo)
|
244 |
new_repo_id = new_repo_url.repo_id
|
|
|
278 |
|
279 |
### CLI:
|
280 |
```bash
|
281 |
+
llama-cli --hf-repo {new_repo_id} --hf-file {gguf_name} -p "The meaning to life and the universe is"
|
282 |
```
|
283 |
|
284 |
### Server:
|
285 |
```bash
|
286 |
+
llama-server --hf-repo {new_repo_id} --hf-file {gguf_name} -c 2048
|
287 |
```
|
288 |
|
289 |
Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
|
|
|
300 |
|
301 |
Step 3: Run inference through the main binary.
|
302 |
```
|
303 |
+
./llama-cli --hf-repo {new_repo_id} --hf-file {gguf_name} -p "The meaning to life and the universe is"
|
304 |
```
|
305 |
or
|
306 |
```
|
307 |
+
./llama-server --hf-repo {new_repo_id} --hf-file {gguf_name} -c 2048
|
308 |
```
|
309 |
"""
|
310 |
)
|
|
|
318 |
print(f"Uploading quantized model: {quantized_gguf_path}")
|
319 |
api.upload_file(
|
320 |
path_or_fileobj=quantized_gguf_path,
|
321 |
+
path_in_repo=gguf_name,
|
322 |
repo_id=new_repo_id,
|
323 |
)
|
324 |
except Exception as e:
|
|
|
450 |
def update_output_repo(model_id, oauth_token: gr.OAuthToken | None):
|
451 |
if oauth_token is None or oauth_token.token is None:
|
452 |
return ""
|
453 |
+
|
454 |
if model_id is None:
|
455 |
return ""
|
456 |
+
|
457 |
+
username = whoami(oauth_token.token)["name"]
|
458 |
model_name = model_id.split('/')[-1]
|
459 |
return f"{username}/{model_name}-GGUF"
|
460 |
|
461 |
def update_output_filename(model_id, use_imatrix, q_method, imatrix_q_method):
|
462 |
if model_id is None:
|
463 |
return ""
|
464 |
+
|
465 |
model_name = model_id.split('/')[-1]
|
466 |
+
|
467 |
if use_imatrix:
|
468 |
return f"{model_name.lower()}-{imatrix_q_method.upper()}-imat.gguf"
|
469 |
+
|
470 |
return f"{model_name.lower()}-{q_method.upper()}.gguf"
|
471 |
|
472 |
#####
|