Spaces:
Running
Running
import fasterai | |
from fasterai.sparse.all import * | |
from fasterai.prune.all import * | |
import torch | |
import gradio as gr | |
import os | |
from torch.ao.quantization import get_default_qconfig_mapping | |
import torch.ao.quantization.quantize_fx as quantize_fx | |
from torch.ao.quantization.quantize_fx import convert_fx, prepare_fx | |
class Quant(): | |
def __init__(self, backend="x86"): | |
self.qconfig = get_default_qconfig_mapping(backend) | |
def quantize(self, model): | |
x = torch.randn(3, 224, 224) | |
model_prepared = prepare_fx(model.eval(), self.qconfig, x) | |
return convert_fx(model_prepared) | |
def optimize_model(input_model, sparsity, context, criteria): | |
model = torch.load(input_model) | |
model = model.eval() | |
model = model.to('cpu') | |
sp = Sparsifier(model, 'filter', context, criteria=eval(criteria)) | |
sp.sparsify_model(sparsity) | |
sp._clean_buffers() | |
pr = Pruner(model, context, criteria=eval(criteria)) | |
pr.prune_model(sparsity) | |
qu = Quant() | |
qu_model = qu.quantize(model) | |
comp_path = "./comp_model.pth" | |
scripted = torch.jit.script(qu_model) | |
torch.jit.save(scripted, comp_path) | |
#torch.save(qu_model, comp_path) | |
return comp_path | |
def main_interface(model_file, sparsity, action): | |
if action == 'Speed': | |
return optimize_model(model_file, sparsity, 'local', "large_final") | |
if action == 'Size': | |
return optimize_model(model_file, sparsity, 'global', "large_final") | |
if action == 'Consumption': | |
return optimize_model(model_file, sparsity, 'local', "random") | |
else: | |
return "Action not supported" | |
granularity = ['weight', 'filter'] | |
context = ['local', 'global'] | |
criteria = ['large_final', 'random'] | |
iface = gr.Interface( | |
fn=main_interface, | |
inputs= [ | |
gr.File(label="Upload your PyTorch model (.pth file)"), | |
gr.Slider(label="Compression Level", minimum=0, maximum=100), | |
gr.Radio(["Speed", "Size", "Consumption"], label="Select Action") | |
], | |
outputs=gr.File(label="Download Compressed Model"), | |
title="FasterAI", | |
description="Upload your neural network model (.pt file) and receive a compressed version.", | |
) | |
iface.launch() |