"
```
## Use with LM Studio
```bash
lms load "{processing_config.new_repo_id}"
```
## Use with llama.cpp CLI
```bash
llama-cli --hf-repo "{processing_config.new_repo_id}" --hf-file "{processing_config.output_config.filename}" -p "The meaning to life and the universe is"
```
## Use with llama.cpp Server:
```bash
llama-server --hf-repo "{processing_config.new_repo_id}" --hf-file "{processing_config.output_config.filename}" -c 4096
```
"""
)
readme_path = f"{processing_config.outdir}/README.md"
card.save(readme_path)
return readme_path
def process_model(self, processing_config: ModelProcessingConfig) -> Tuple[str, str]:
"""Main method to process a model through the entire pipeline."""
quant_config = processing_config.quant_config
split_config = processing_config.split_config
output_config = processing_config.output_config
print(f"Current working directory: {os.path.abspath(os.getcwd())}")
# Download and convert base model
self._download_base_model(processing_config)
# Quantize the model
self._quantize_model(quant_config)
# Create empty repo
self._create_empty_repo(processing_config)
# Upload model
if split_config.enabled:
print(f"Splitting quantized model: {os.path.abspath(quant_config.quantized_gguf)}")
self._split_and_upload_model(processing_config)
else:
try:
print(f"Uploading quantized model: {os.path.abspath(quant_config.quantized_gguf)}")
self._upload_file(processing_config, quant_config.quantized_gguf, output_config.filename)
except Exception as e:
raise GGUFConverterError(f"Error uploading quantized model: {e}")
# Upload imatrix if it exists
if quant_config.use_imatrix and os.path.isfile(quant_config.imatrix_file):
try:
print(f"Uploading imatrix.dat: {os.path.abspath(quant_config.imatrix_file)}")
self._upload_file(processing_config, quant_config.imatrix_file, f"{processing_config.model_name}-imatrix.gguf")
except Exception as e:
raise GGUFConverterError(f"Error uploading imatrix.dat: {e}")
# Upload README.md
readme_path = self._generate_readme(processing_config)
self._upload_file(processing_config, readme_path, "README.md")
print(f"Uploaded successfully with {quant_config.imatrix_method if quant_config.use_imatrix else quant_config.method} option!")
class GGUFConverterUI:
"""Gradio UI for the GGUF Converter."""
def __init__(self):
self.processor = HuggingFaceModelProcessor()
self.css = """/* Custom CSS to allow scrolling */
.gradio-container {overflow-y: auto;}
"""
# Initialize components
self._initialize_components()
self._setup_interface()
def _initialize_components(self):
"""Initialize all UI components."""
#####
# Base model section
#####
self.model_id = HuggingfaceHubSearch(
label="Hub Model ID",
placeholder="Search for model id on Huggingface",
search_type="model",
)
#####
# Quantization section
#####
self.use_imatrix = gr.Checkbox(
value=False,
label="Use Imatrix Quantization",
info="Use importance matrix for quantization."
)
self.q_method = gr.Dropdown(
choices=["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0", "F16", "BF16"],
label="Quantization Method",
info="GGML quantization type",
value="Q4_K_M",
filterable=False,
visible=True
)
self.imatrix_q_method = gr.Dropdown(
choices=["IQ3_M", "IQ3_XXS", "Q4_K_M", "Q4_K_S", "IQ4_NL", "IQ4_XS", "Q5_K_M", "Q5_K_S"],
label="Imatrix Quantization Method",
info="GGML imatrix quants type",
value="IQ4_NL",
filterable=False,
visible=False
)
self.train_data_file = gr.File(
label="Training Data File",
file_types=[".txt"],
visible=False
)
#####
# Advanced Options section
#####
self.split_model = gr.Checkbox(
value=False,
label="Split Model",
info="Shard the model using gguf-split."
)
self.split_max_tensors = gr.Number(
value=256,
label="Max Tensors per File",
info="Maximum number of tensors per file when splitting model.",
visible=False
)
self.split_max_size = gr.Textbox(
label="Max File Size",
info="Maximum file size when splitting model (--split-max-size). May leave empty to use the default. Accepted suffixes: M, G. Example: 256M, 5G",
visible=False
)
self.leave_output = gr.Checkbox(
value=False,
label="Leave output tensor",
info="Leaves output.weight un(re)quantized"
)
self.quant_embedding = gr.Checkbox(
value=False,
label="Quant embeddings tensor",
info="Quantize embeddings tensor separately"
)
self.embedding_tensor_method = gr.Dropdown(
choices=["Q2_K", "Q3_K", "Q4_K", "Q5_K", "Q6_K", "Q8_0"],
label="Embeddings Quantization Method",
info="use a specific quant type for the token embeddings tensor",
value="Q8_0",
filterable=False,
visible=False
)
self.quant_output = gr.Checkbox(
value=False,
label="Quant output tensor",
info="Quantize output tensor separately"
)
self.output_tensor_method = gr.Dropdown(
choices=["Q2_K", "Q3_K", "Q4_K", "Q5_K", "Q6_K", "Q8_0"],
label="Output Quantization Method",
info="use a specific quant type for the output.weight tensor",
value="Q8_0",
filterable=False,
visible=False
)
#####
# Output Settings section
#####
self.private_repo = gr.Checkbox(
value=False,
label="Private Repo",
info="Create a private repo under your username."
)
self.repo_name = gr.Textbox(
label="Output Repository Name",
info="Set your repository name",
max_lines=1
)
self.gguf_name = gr.Textbox(
label="Output File Name",
info="Set output file name",
max_lines=1
)
#####
# Buttons section
#####
self.clear_btn = gr.ClearButton(
value="Clear",
variant="secondary",
components=[
self.model_id,
self.q_method,
self.use_imatrix,
self.imatrix_q_method,
self.private_repo,
self.train_data_file,
self.leave_output,
self.quant_embedding,
self.embedding_tensor_method,
self.quant_output,
self.output_tensor_method,
self.split_model,
self.split_max_tensors,
self.split_max_size,
self.repo_name,
self.gguf_name,
]
)
self.submit_btn = gr.Button(
value="Submit",
variant="primary"
)
#####
# Outputs section
#####
self.output_label = gr.Markdown(label="output")
self.output_image = gr.Image(
show_label=False,
show_download_button=False,
interactive=False
)
@staticmethod
def _update_output_repo(model_id: str, oauth_token: Optional[gr.OAuthToken]) -> str:
"""Update output repository name based on model and user."""
if oauth_token is None or not oauth_token.token:
return ""
if not model_id:
return ""
try:
username = whoami(oauth_token.token)["name"]
model_name = model_id.split('/')[-1]
return f"{username}/{model_name}-GGUF"
except:
return ""
@staticmethod
def _update_output_filename(model_id: str, use_imatrix: bool, q_method: str, imatrix_q_method: str) -> str:
"""Update output filename based on model and quantization settings."""
if not model_id:
return ""
model_name = model_id.split('/')[-1]
if use_imatrix:
return f"{model_name}-{imatrix_q_method.upper()}-imat.gguf"
return f"{model_name}-{q_method.upper()}.gguf"
def _setup_interface(self):
"""Set up the Gradio interface."""
with gr.Blocks(css=self.css) as self.demo:
#####
# Layout
#####
gr.Markdown(HuggingFaceModelProcessor.ERROR_LOGIN)
gr.LoginButton(min_width=250)
gr.HTML("Create your own GGUF Quants!
")
gr.Markdown(f"The space takes an HF repo as an input, quantizes it and creates a Public repo containing the selected quant under your HF user namespace.
Use via {self.processor.SPACE_URL}")
with gr.Row():
with gr.Column() as inputs:
gr.Markdown("### Model Configuration")
self.model_id.render()
with gr.Column():
self.use_imatrix.render()
self.q_method.render()
self.imatrix_q_method.render()
self.train_data_file.render()
gr.Markdown("### Advanced Options")
self.quant_embedding.render()
self.embedding_tensor_method.render()
self.leave_output.render()
self.quant_output.render()
self.output_tensor_method.render()
self.split_model.render()
with gr.Row() as split_options:
self.split_max_tensors.render()
self.split_max_size.render()
gr.Markdown("### Output Settings")
gr.Markdown("You can customize settings for your GGUF repo.")
self.private_repo.render()
with gr.Row():
self.repo_name.render()
self.gguf_name.render()
# Buttons
with gr.Row() as buttons:
self.clear_btn.render()
self.submit_btn.render()
with gr.Column() as outputs:
self.output_label.render()
self.output_image.render()
#####
# Event handlers
#####
self.submit_btn.click(
fn=self._process_model_wrapper,
inputs=[
self.model_id,
self.q_method,
self.use_imatrix,
self.imatrix_q_method,
self.private_repo,
self.train_data_file,
self.repo_name,
self.gguf_name,
self.quant_embedding,
self.embedding_tensor_method,
self.leave_output,
self.quant_output,
self.output_tensor_method,
self.split_model,
self.split_max_tensors,
self.split_max_size
],
outputs=[
self.output_label,
self.output_image,
],
)
#####
# OnChange handlers
#####
self.use_imatrix.change(
fn=lambda use_imatrix: [gr.update(visible=not use_imatrix), gr.update(visible=use_imatrix), gr.update(visible=use_imatrix)],
inputs=self.use_imatrix,
outputs=[self.q_method, self.imatrix_q_method, self.train_data_file]
)
self.split_model.change(
fn=lambda split_model: [gr.update(visible=split_model), gr.update(visible=split_model)],
inputs=self.split_model,
outputs=[self.split_max_tensors, self.split_max_size]
)
self.quant_embedding.change(
fn=lambda quant_embedding: gr.update(visible=quant_embedding),
inputs=self.quant_embedding,
outputs=[self.embedding_tensor_method]
)
self.leave_output.change(
fn=lambda leave_output, quant_output: [gr.update(visible=not leave_output), gr.update(visible=not leave_output and quant_output)],
inputs=[self.leave_output, self.leave_output],
outputs=[self.quant_output, self.output_tensor_method]
)
self.quant_output.change(
fn=lambda quant_output: [gr.update(visible=not quant_output), gr.update(visible=quant_output)],
inputs=self.quant_output,
outputs=[self.leave_output, self.output_tensor_method]
)
self.model_id.change(
fn=self._update_output_repo,
inputs=[self.model_id],
outputs=[self.repo_name]
)
self.model_id.change(
fn=self._update_output_filename,
inputs=[self.model_id, self.use_imatrix, self.q_method, self.imatrix_q_method],
outputs=[self.gguf_name]
)
self.use_imatrix.change(
fn=self._update_output_filename,
inputs=[self.model_id, self.use_imatrix, self.q_method, self.imatrix_q_method],
outputs=[self.gguf_name]
)
self.q_method.change(
fn=self._update_output_filename,
inputs=[self.model_id, self.use_imatrix, self.q_method, self.imatrix_q_method],
outputs=[self.gguf_name]
)
self.imatrix_q_method.change(
fn=self._update_output_filename,
inputs=[self.model_id, self.use_imatrix, self.q_method, self.imatrix_q_method],
outputs=[self.gguf_name]
)
def _process_model_wrapper(self, model_id: str, q_method: str, use_imatrix: bool,
imatrix_q_method: str, private_repo: bool, train_data_file,
repo_name: str, gguf_name: str, quant_embedding: bool,
embedding_tensor_method: str, leave_output: bool,
quant_output: bool, output_tensor_method: str,
split_model: bool, split_max_tensors, split_max_size: str, oauth_token: Optional[gr.OAuthToken]) -> Tuple[str, str]:
"""Wrapper for the process_model method to handle the conversion using ModelProcessingConfig."""
try:
# Validate token and get token string
token = self.processor._validate_token(oauth_token)
# Create configuration objects
quant_config = QuantizationConfig(
method=q_method,
use_imatrix=use_imatrix,
imatrix_method=imatrix_q_method,
train_data=train_data_file.name,
quant_embedding=quant_embedding,
embedding_tensor_method=embedding_tensor_method,
leave_output=leave_output,
quant_output=quant_output,
output_tensor_method=output_tensor_method
)
split_config = SplitConfig(
enabled=split_model,
max_tensors=split_max_tensors if isinstance(split_max_tensors, int) else 256,
max_size=split_max_size
)
output_config = OutputConfig(
private_repo=private_repo,
repo_name=repo_name,
filename=gguf_name
)
model_name = self.processor._get_model_name(model_id)
with tempfile.TemporaryDirectory(dir=self.processor.OUTPUT_FOLDER) as outDirObj:
outdir = (
self.processor._create_folder(os.path.join(self.processor.OUTPUT_FOLDER, model_name))
if self.processor.RUN_LOCALLY == "1"
else Path(outDirObj)
)
quant_config.fp16_model = f"{outdir}/{model_name}-fp16.gguf"
quant_config.imatrix_file = f"{outdir}/{model_name}-imatrix.gguf"
quant_config.quantized_gguf = f"{outdir}/{gguf_name}"
processing_config = ModelProcessingConfig(
token=token,
model_id=model_id,
model_name=model_name,
outdir=outdir,
quant_config=quant_config,
split_config=split_config,
output_config=output_config
)
# Call the processor's main method with the config object
self.processor.process_model(processing_config)
return (
f'✅ DONE
Find your repo here: {processing_config.new_repo_id}',
"llama.png",
)
except Exception as e:
print(f"Error processing model: {e}")
return (f'❌ ERROR
{self.processor._escape_html(str(e))}
', "error.png")
def launch(self):
"""Launch the Gradio interface."""
# Set up space restart scheduler
def restart_space():
HfApi().restart_space(repo_id=self.processor.SPACE_ID, token=self.processor.HF_TOKEN, factory_reboot=True)
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=21600)
scheduler.start()
# Launch the interface
self.demo.queue(default_concurrency_limit=1, max_size=5).launch(debug=True, show_api=False)
# Main execution
if __name__ == "__main__":
ui = GGUFConverterUI()
ui.launch()