svjack's picture
Upload folder using huggingface_hub
13d3ba0
raw
history blame
1.95 kB
from ggml import ffi, lib
from ggml.utils import init, numpy, copy
import numpy as np
from math import pi, cos, sin, ceil
import matplotlib.pyplot as plt
ctx = init(mem_size=100*1024*1024) # Will be auto-GC'd
n = 256
orig = np.array([
[
cos(j * 2 * pi / n) * (sin(i * 2 * pi / n))
for j in range(n)
]
for i in range(n)
], np.float32)
orig_tensor = lib.ggml_new_tensor_2d(ctx, lib.GGML_TYPE_F32, n, n)
copy(orig, orig_tensor)
quants = [
type for type in range(lib.GGML_TYPE_COUNT)
if lib.ggml_is_quantized(type) and
type not in [lib.GGML_TYPE_Q8_1, lib.GGML_TYPE_Q8_K] # Apparently not supported
]
# quants = [lib.GGML_TYPE_Q2_K] # Test a single one
def get_name(type):
name = lib.ggml_type_name(type)
return ffi.string(name).decode('utf-8') if name else '?'
quants.sort(key=get_name)
quants.insert(0, None)
print(quants)
ncols=4
nrows = ceil(len(quants) / ncols)
plt.figure(figsize=(ncols * 5, nrows * 5), layout='tight')
for i, type in enumerate(quants):
plt.subplot(nrows, ncols, i + 1)
try:
if type == None:
plt.title('Original')
plt.imshow(orig)
else:
quantized_tensor = lib.ggml_new_tensor_2d(ctx, type, n, n)
copy(orig_tensor, quantized_tensor)
quantized = numpy(quantized_tensor, allow_copy=True)
d = quantized - orig
results = {
"l2": np.linalg.norm(d, 2),
"linf": np.linalg.norm(d, np.inf),
"compression":
round(lib.ggml_nbytes(orig_tensor) /
lib.ggml_nbytes(quantized_tensor), 1)
}
name = get_name(type)
print(f'{name}: {results}')
plt.title(f'{name} ({results["compression"]}x smaller)')
plt.imshow(quantized, interpolation='nearest')
except Exception as e:
print(f'Error: {e}')
plt.show()