import fasterai from fasterai.sparse.all import * from fasterai.prune.all import * import torch import gradio as gr import os from torch.ao.quantization import get_default_qconfig_mapping import torch.ao.quantization.quantize_fx as quantize_fx from torch.ao.quantization.quantize_fx import convert_fx, prepare_fx class Quant(): def __init__(self, backend="x86"): self.qconfig = get_default_qconfig_mapping(backend) def quantize(self, model): x = torch.randn(3, 224, 224) model_prepared = prepare_fx(model.eval(), self.qconfig, x) return convert_fx(model_prepared) def optimize_model(input_model, sparsity, context, criteria): model = torch.load(input_model) model = model.eval() model = model.to('cpu') sp = Sparsifier(model, 'filter', context, criteria=eval(criteria)) sp.sparsify_model(sparsity) sp._clean_buffers() pr = Pruner(model, context, criteria=eval(criteria)) pr.prune_model(sparsity) qu = Quant() qu_model = qu.quantize(model) comp_path = "./comp_model.pth" scripted = torch.jit.script(qu_model) torch.jit.save(scripted, comp_path) #torch.save(qu_model, comp_path) return comp_path def main_interface(model_file, sparsity, action): if action == 'Speed': return optimize_model(model_file, sparsity, 'local', "large_final") if action == 'Size': return optimize_model(model_file, sparsity, 'global', "large_final") if action == 'Consumption': return optimize_model(model_file, sparsity, 'local', "random") else: return "Action not supported" granularity = ['weight', 'filter'] context = ['local', 'global'] criteria = ['large_final', 'random'] iface = gr.Interface( fn=main_interface, inputs= [ gr.File(label="Upload your PyTorch model (.pth file)"), gr.Slider(label="Compression Level", minimum=0, maximum=100), gr.Radio(["Speed", "Size", "Consumption"], label="Select Action") ], outputs=gr.File(label="Download Compressed Model"), title="FasterAI", description="Upload your neural network model (.pt file) and receive a compressed version.", ) iface.launch()