File size: 4,261 Bytes
5322e36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from io import BytesIO

from sklearn.linear_model import Ridge
from sklearn.gaussian_process.kernels import ExpSineSquared
from sklearn.kernel_ridge import KernelRidge

import gradio as gr
import random


def generate_data(n_samples: int) -> tuple:
    rng = np.random.RandomState(random.randint(0, 1000))
    data = np.linspace(0, 30, num=n_samples).reshape(-1, 1)
    target = np.sin(data).ravel()
    training_sample_indices = rng.choice(
        np.arange(0, int(0.4 * n_samples)), size=int(0.2 * n_samples), replace=False
    )
    training_data = data[training_sample_indices]
    training_noisy_target = (
        target[training_sample_indices] + 0.5 * rng.randn(len(training_sample_indices))
    )

    return data, target, training_data, training_noisy_target


# def plot_ridge_and_kernel(n_samples: int) -> plt.figure:

#     data, target, training_data, training_noisy_target = generate_data(n_samples)

#     ridge = Ridge().fit(training_data, training_noisy_target)
#     kernel_ridge = KernelRidge(kernel=ExpSineSquared())
#     kernel_ridge.fit(training_data, training_noisy_target)

#     fig, ax = plt.subplots(figsize=(8, 4))

#     ax.plot(data, target, label="True signal", linewidth=2)
#     ax.scatter(
#         training_data,
#         training_noisy_target,
#         color="black",
#         label="Noisy measurements",
#     )
#     ax.plot(data, ridge.predict(data), label="Ridge regression")
#     ax.plot(
#         data,
#         kernel_ridge.predict(data),
#         label="Kernel ridge",
#         linewidth=2,
#         linestyle="dashdot",
#     )
#     ax.legend()
#     ax.set_xlabel("data")
#     ax.set_ylabel("target")
#     _ = ax.set_title("Ridge vs Kernel Ridge")

#     return fig

def plot_ridge_and_kernel(n_samples: int) -> plt.figure:
    data, target, training_data, training_noisy_target = generate_data(n_samples)

    ridge = Ridge().fit(training_data, training_noisy_target)
    kernel_ridge = KernelRidge(kernel=ExpSineSquared())
    kernel_ridge.fit(training_data, training_noisy_target)

    fig, ax = plt.subplots(figsize=(8, 4))

    ridge_predictions = ridge.predict(data)
    kernel_ridge_predictions = kernel_ridge.predict(data)

    ax.plot(data, target, label="True signal", linewidth=2)
    ax.scatter(
        training_data,
        training_noisy_target,
        color="black",
        label="Noisy measurements",
    )
    ax.plot(data, ridge_predictions, label="Ridge regression")
    ax.plot(
        data,
        kernel_ridge_predictions,
        label="Kernel ridge",
        linewidth=2,
        linestyle="dashdot",
    )
    ax.fill_between(
        data.ravel(),
        ridge_predictions,
        kernel_ridge_predictions,
        color="lightgrey",
        alpha=0.4,
    )
    ax.legend()
    ax.set_xlabel("data")
    ax.set_ylabel("target")
    _ = ax.set_title("Ridge vs Kernel Ridge with the area between highlighted")

    return fig


def gradio_plot(n_samples: int) -> Image.Image:
    fig = plot_ridge_and_kernel(n_samples)
    buf = BytesIO()
    fig.savefig(buf, format="png")
    buf.seek(0)
    im = Image.open(buf)
    return im


inputs = [
    gr.inputs.Slider(minimum=100, maximum=5000, step=100, label="n_samples", default=1000),
]


# Create the Gradio app
title = "Comparison of kernel ridge and Gaussian process regression"
description = "Kernel ridge regression and Gaussian process regression both use the kernel trick to fit data. While kernel ridge regression aims to find a single target function minimizing the loss (mean squared error), Gaussian process regression takes a probabilistic approach, defining a Gaussian posterior distribution over target functions using Bayes' theorem. Essentially, kernel ridge regression seeks one best function, while Gaussian process regression considers a range of probable functions based on prior probabilities and observed data. \n \n link to the official doc https://scikit-learn.org/stable/auto_examples/gaussian_process/plot_compare_gpr_krr.html#sphx-glr-auto-examples-gaussian-process-plot-compare-gpr-krr-py"
iface = gr.Interface(fn=gradio_plot, inputs=inputs, outputs="image", title = title , description = description)
iface.launch()