File size: 2,235 Bytes
7ab4f00
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import fasterai
from fasterai.sparse.all import *
from fasterai.prune.all import *
import torch
import gradio as gr
import os
from torch.ao.quantization import get_default_qconfig_mapping
import torch.ao.quantization.quantize_fx as quantize_fx
from torch.ao.quantization.quantize_fx import convert_fx, prepare_fx

class Quant():
    def __init__(self, backend="x86"):
        self.qconfig = get_default_qconfig_mapping(backend)
    
    def quantize(self, model):
        x = torch.randn(3, 224, 224)
        model_prepared = prepare_fx(model.eval(), self.qconfig, x)
        return convert_fx(model_prepared)


def optimize_model(input_model, sparsity, context, criteria):

    model = torch.load(input_model)
    model = model.eval()
    model = model.to('cpu')
    sp = Sparsifier(model, 'filter', context, criteria=eval(criteria))
    sp.sparsify_model(sparsity)
    sp._clean_buffers()
    pr = Pruner(model, context, criteria=eval(criteria))
    pr.prune_model(sparsity)
    qu = Quant()
    qu_model = qu.quantize(model)
    
    comp_path = "./comp_model.pth"
    
    scripted = torch.jit.script(qu_model)
    torch.jit.save(scripted, comp_path)
    #torch.save(qu_model, comp_path)
    
    return comp_path

def main_interface(model_file, sparsity, action):
    if action == 'Speed':
        return optimize_model(model_file, sparsity, 'local', "large_final")

    if action == 'Size':
        return optimize_model(model_file, sparsity, 'global', "large_final")

    if action == 'Consumption':
        return optimize_model(model_file, sparsity, 'local', "random")
    else:
        return "Action not supported"


granularity = ['weight', 'filter']
context = ['local', 'global']
criteria = ['large_final', 'random']


iface = gr.Interface(
    fn=main_interface,
    inputs= [            
             gr.File(label="Upload your PyTorch model (.pth file)"),
             gr.Slider(label="Compression Level", minimum=0, maximum=100),
             gr.Radio(["Speed", "Size", "Consumption"], label="Select Action")  
            ],
    outputs=gr.File(label="Download Compressed Model"),
    title="FasterAI",
    description="Upload your neural network model (.pt file) and receive a compressed version.",
)

iface.launch()