Spaces:
Running
Running
File size: 2,235 Bytes
7ab4f00 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import fasterai
from fasterai.sparse.all import *
from fasterai.prune.all import *
import torch
import gradio as gr
import os
from torch.ao.quantization import get_default_qconfig_mapping
import torch.ao.quantization.quantize_fx as quantize_fx
from torch.ao.quantization.quantize_fx import convert_fx, prepare_fx
class Quant():
def __init__(self, backend="x86"):
self.qconfig = get_default_qconfig_mapping(backend)
def quantize(self, model):
x = torch.randn(3, 224, 224)
model_prepared = prepare_fx(model.eval(), self.qconfig, x)
return convert_fx(model_prepared)
def optimize_model(input_model, sparsity, context, criteria):
model = torch.load(input_model)
model = model.eval()
model = model.to('cpu')
sp = Sparsifier(model, 'filter', context, criteria=eval(criteria))
sp.sparsify_model(sparsity)
sp._clean_buffers()
pr = Pruner(model, context, criteria=eval(criteria))
pr.prune_model(sparsity)
qu = Quant()
qu_model = qu.quantize(model)
comp_path = "./comp_model.pth"
scripted = torch.jit.script(qu_model)
torch.jit.save(scripted, comp_path)
#torch.save(qu_model, comp_path)
return comp_path
def main_interface(model_file, sparsity, action):
if action == 'Speed':
return optimize_model(model_file, sparsity, 'local', "large_final")
if action == 'Size':
return optimize_model(model_file, sparsity, 'global', "large_final")
if action == 'Consumption':
return optimize_model(model_file, sparsity, 'local', "random")
else:
return "Action not supported"
granularity = ['weight', 'filter']
context = ['local', 'global']
criteria = ['large_final', 'random']
iface = gr.Interface(
fn=main_interface,
inputs= [
gr.File(label="Upload your PyTorch model (.pth file)"),
gr.Slider(label="Compression Level", minimum=0, maximum=100),
gr.Radio(["Speed", "Size", "Consumption"], label="Select Action")
],
outputs=gr.File(label="Download Compressed Model"),
title="FasterAI",
description="Upload your neural network model (.pt file) and receive a compressed version.",
)
iface.launch() |