EduardoPacheco commited on
Commit
3c4e9d2
·
1 Parent(s): f2b8171

App itself

Browse files
Files changed (1) hide show
  1. app.py +166 -0
app.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import numpy as np
4
+ import gradio as gr
5
+ import pandas as pd
6
+ import plotly.graph_objects as go
7
+ from sklearn.model_selection import train_test_split
8
+
9
+ import utils
10
+
11
+
12
+ def app_fn(
13
+ formula_str: str,
14
+ n_samples: int,
15
+ lower: float,
16
+ upper: float,
17
+ learning_rate: float,
18
+ n_estimators: int,
19
+ max_depth: int,
20
+ ) -> list[go.Figure, pd.DataFrame]:
21
+ # Generating Data
22
+ x_range = [0, 10]
23
+ seed = 42
24
+ gen = utils.DataGenerator(formula_str, x_range=x_range, n_samples=n_samples, seed=seed)
25
+ X = gen.X
26
+ y = gen.y
27
+ y_raw = gen.y_raw
28
+
29
+ # Splitting Data
30
+ X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed)
31
+
32
+ # Model Parameters
33
+ model_kwargs = {
34
+ "learning_rate": learning_rate,
35
+ "n_estimators": n_estimators,
36
+ "max_depth": max_depth,
37
+ }
38
+
39
+ # Ftting Interval Model
40
+ model_interval = utils.GradientBoostingCoverage(lower, upper, **model_kwargs)
41
+ model_interval.fit(X_train, y_train)
42
+
43
+ # Ftting Median Model
44
+ model_median = utils.fit_gradientboosting(X_train, y_train, alpha=0.5, loss="quantile",**model_kwargs)
45
+
46
+ # Ftting Mean Model
47
+ model_mean = utils.fit_gradientboosting(X_train, y_train, loss="squared_error", **model_kwargs)
48
+
49
+ # Calculating Train and Test Coverage
50
+ expected_coverage = model_interval.expected_coverage
51
+ coverage_train = model_interval.coverage_fraction(X_train, y_train)
52
+ coverage_test = model_interval.coverage_fraction(X_test, y_test)
53
+
54
+ # Plotting Predictions
55
+ xx = np.atleast_2d(np.linspace(*x_range, 1000)).T
56
+ y_lower, y_upper = model_interval.predict(xx)
57
+ y_median = model_median.predict(xx)
58
+ y_mean = model_mean.predict(xx)
59
+
60
+ fig = utils.plot_interval(
61
+ xx, X_test, y_test, y_upper, y_lower, y_median, y_mean, formula_str, f"{expected_coverage*100:.0f}"
62
+ )
63
+
64
+ # DataFrame with Coverage
65
+ df_coverage = pd.DataFrame(
66
+ {
67
+ "Split": ["Train", "Test"],
68
+ "Coverage": [f"{coverage_train*100:.0f}", f"{coverage_test*100:.0f}"],
69
+ "Expected Coverage": [f"{expected_coverage*100:.0f}", f"{expected_coverage*100:.0f}"],
70
+ }
71
+ )
72
+
73
+ return fig, df_coverage
74
+
75
+ title = "🤗 Prediction Intervals w/ Gradient Boosting Regression 🤗"
76
+ with gr.Blocks() as demo:
77
+ gr.Markdown(f"# {title}")
78
+ gr.Markdown(
79
+ """
80
+ ## This app shows how to use Gradient Boosting Regression to predict intervals. \
81
+ The app uses the [Quantile Loss](https://en.wikipedia.org/wiki/Quantile_regression#Quantile_loss_function) \
82
+ to predict the lower and upper quantiles with Gradient Boosting Regression. The data used in this example \
83
+ is generated through the equation passed in the Formula textbox heteroscedasticity noise is introduced to \
84
+ make the data more realistic. The app also shows the coverage of the intervals on the train and test data.
85
+
86
+ ## Write equations using x as the variable and Python notation. Other supported functions are sin, cos, tan, exp, log, sqrt, and abs.
87
+
88
+ [Orignal Example](https://scikit-learn.org/stable/auto_examples/ensemble/plot_gradient_boosting_quantile.html#sphx-glr-auto-examples-ensemble-plot-gradient-boosting-quantile-py)
89
+ """
90
+ )
91
+ with gr.Row():
92
+ with gr.Column():
93
+ formula_str = gr.inputs.Textbox(
94
+ lines=1,
95
+ label="Formula",
96
+ default="x * sin(x)"
97
+ )
98
+
99
+ n_samples = gr.inputs.Slider(
100
+ minimum=100,
101
+ maximum=10000,
102
+ step=100,
103
+ default=1000,
104
+ label="Number of Samples"
105
+ )
106
+
107
+ with gr.Column():
108
+ lower = gr.inputs.Slider(
109
+ minimum=0.01,
110
+ maximum=0.45,
111
+ step=0.01,
112
+ default=0.05,
113
+ label="Lower Quantile"
114
+ )
115
+
116
+ upper = gr.inputs.Slider(
117
+ minimum=0.5,
118
+ maximum=0.99,
119
+ step=0.01,
120
+ default=0.95,
121
+ label="Upper Quantile"
122
+ )
123
+
124
+ with gr.Column():
125
+ learning_rate = gr.inputs.Slider(
126
+ minimum=0.01,
127
+ maximum=1.0,
128
+ step=0.01,
129
+ default=0.05,
130
+ label="Learning Rate"
131
+ )
132
+
133
+ n_estimators = gr.inputs.Slider(
134
+ minimum=1,
135
+ maximum=1000,
136
+ step=1,
137
+ default=200,
138
+ label="Number of Estimators"
139
+ )
140
+
141
+ max_depth = gr.inputs.Slider(
142
+ minimum=1,
143
+ maximum=10,
144
+ step=1,
145
+ default=2,
146
+ label="Max Depth"
147
+ )
148
+
149
+ btn = gr.Button(label="Run")
150
+ with gr.Row():
151
+ with gr.Column():
152
+ fig = gr.Plot(label="Coverage Plot")
153
+ df_coverage = gr.Dataframe(label="Coverage DataFrame")
154
+
155
+ btn.click(
156
+ fn=app_fn,
157
+ inputs=[formula_str, n_samples, lower, upper, learning_rate, n_estimators, max_depth],
158
+ outputs=[fig, df_coverage],
159
+ )
160
+ demo.load(
161
+ fn=app_fn,
162
+ inputs=[formula_str, n_samples, lower, upper, learning_rate, n_estimators, max_depth],
163
+ outputs=[fig, df_coverage],
164
+ )
165
+
166
+ demo.launch()