Spaces:
Sleeping
Sleeping
feat(variance): add notebook for variance
Browse filesThis commit introduces a new notebook focused on the concept of variance in probability. It includes definitions, examples, and interactive visualizations to illustrate how variance and standard deviation affect distributions.
- probability/12_variance.py +631 -0
probability/12_variance.py
ADDED
|
@@ -0,0 +1,631 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# /// script
|
| 2 |
+
# requires-python = ">=3.10"
|
| 3 |
+
# dependencies = [
|
| 4 |
+
# "marimo",
|
| 5 |
+
# "matplotlib==3.10.0",
|
| 6 |
+
# "numpy==2.2.3",
|
| 7 |
+
# "scipy==1.15.2",
|
| 8 |
+
# "wigglystuff==0.1.10",
|
| 9 |
+
# ]
|
| 10 |
+
# ///
|
| 11 |
+
|
| 12 |
+
import marimo
|
| 13 |
+
|
| 14 |
+
__generated_with = "0.11.20"
|
| 15 |
+
app = marimo.App(width="medium", app_title="Variance")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@app.cell(hide_code=True)
|
| 19 |
+
def _(mo):
|
| 20 |
+
mo.md(
|
| 21 |
+
r"""
|
| 22 |
+
# Variance
|
| 23 |
+
|
| 24 |
+
_This notebook is a computational companion to ["Probability for Computer Scientists"](https://chrispiech.github.io/probabilityForComputerScientists/en/part2/variance/), by Stanford professor Chris Piech._
|
| 25 |
+
|
| 26 |
+
In our previous exploration of random variables, we learned about expectation - a measure of central tendency. However, knowing the average value alone doesn't tell us everything about a distribution. Consider these questions:
|
| 27 |
+
|
| 28 |
+
- How spread out are the values around the mean?
|
| 29 |
+
- How reliable is the expectation as a predictor of individual outcomes?
|
| 30 |
+
- How much do individual samples typically deviate from the average?
|
| 31 |
+
|
| 32 |
+
This is where **variance** comes in - it measures the spread or dispersion of a random variable around its expected value.
|
| 33 |
+
"""
|
| 34 |
+
)
|
| 35 |
+
return
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
@app.cell(hide_code=True)
|
| 39 |
+
def _(mo):
|
| 40 |
+
mo.md(
|
| 41 |
+
r"""
|
| 42 |
+
## Definition of Variance
|
| 43 |
+
|
| 44 |
+
The variance of a random variable $X$ with expected value $\mu = E[X]$ is defined as:
|
| 45 |
+
|
| 46 |
+
$$\text{Var}(X) = E[(X-\mu)^2]$$
|
| 47 |
+
|
| 48 |
+
This definition captures the average squared deviation from the mean. There's also an equivalent, often more convenient formula:
|
| 49 |
+
|
| 50 |
+
$$\text{Var}(X) = E[X^2] - (E[X])^2$$
|
| 51 |
+
|
| 52 |
+
/// tip
|
| 53 |
+
The second formula is usually easier to compute, as it only requires calculating $E[X^2]$ and $E[X]$, rather than working with deviations from the mean.
|
| 54 |
+
"""
|
| 55 |
+
)
|
| 56 |
+
return
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
@app.cell(hide_code=True)
|
| 60 |
+
def _(mo):
|
| 61 |
+
mo.md(
|
| 62 |
+
r"""
|
| 63 |
+
## Intuition Through Example
|
| 64 |
+
|
| 65 |
+
Let's look at a real-world example that illustrates why variance is important. Consider three different groups of graders evaluating assignments in a massive online course. Each grader has their own "grading distribution" - their pattern of assigning scores to work that deserves a 70/100.
|
| 66 |
+
|
| 67 |
+
The visualization below shows the probability distributions for three types of graders. Try clicking and dragging the blue numbers to adjust the parameters and see how they affect the variance.
|
| 68 |
+
"""
|
| 69 |
+
)
|
| 70 |
+
return
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
@app.cell(hide_code=True)
|
| 74 |
+
def _(mo):
|
| 75 |
+
mo.md(
|
| 76 |
+
r"""
|
| 77 |
+
/// TIP
|
| 78 |
+
Try adjusting the blue numbers above to see how:
|
| 79 |
+
|
| 80 |
+
- Increasing spread increases variance
|
| 81 |
+
- The mixture ratio affects how many outliers appear in Grader C's distribution
|
| 82 |
+
- Changing the true grade shifts all distributions but maintains their relative variances
|
| 83 |
+
"""
|
| 84 |
+
)
|
| 85 |
+
return
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
@app.cell(hide_code=True)
|
| 89 |
+
def _(controls):
|
| 90 |
+
controls
|
| 91 |
+
return
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
@app.cell(hide_code=True)
|
| 95 |
+
def _(
|
| 96 |
+
grader_a_spread,
|
| 97 |
+
grader_b_spread,
|
| 98 |
+
grader_c_mix,
|
| 99 |
+
np,
|
| 100 |
+
plt,
|
| 101 |
+
stats,
|
| 102 |
+
true_grade,
|
| 103 |
+
):
|
| 104 |
+
# Create data for three grader distributions
|
| 105 |
+
_grader_x = np.linspace(40, 100, 200)
|
| 106 |
+
|
| 107 |
+
# Calculate actual variances
|
| 108 |
+
var_a = grader_a_spread.amount**2
|
| 109 |
+
var_b = grader_b_spread.amount**2
|
| 110 |
+
var_c = (1-grader_c_mix.amount) * 3**2 + grader_c_mix.amount * 8**2 + \
|
| 111 |
+
grader_c_mix.amount * (1-grader_c_mix.amount) * (8-3)**2 # Mixture variance formula
|
| 112 |
+
|
| 113 |
+
# Grader A: Wide spread around true grade
|
| 114 |
+
grader_a = stats.norm.pdf(_grader_x, loc=true_grade.amount, scale=grader_a_spread.amount)
|
| 115 |
+
|
| 116 |
+
# Grader B: Narrow spread around true grade
|
| 117 |
+
grader_b = stats.norm.pdf(_grader_x, loc=true_grade.amount, scale=grader_b_spread.amount)
|
| 118 |
+
|
| 119 |
+
# Grader C: Mixture of distributions
|
| 120 |
+
grader_c = (1-grader_c_mix.amount) * stats.norm.pdf(_grader_x, loc=true_grade.amount, scale=3) + \
|
| 121 |
+
grader_c_mix.amount * stats.norm.pdf(_grader_x, loc=true_grade.amount, scale=8)
|
| 122 |
+
|
| 123 |
+
grader_fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5))
|
| 124 |
+
|
| 125 |
+
# Plot each distribution
|
| 126 |
+
ax1.fill_between(_grader_x, grader_a, alpha=0.3, color='green', label=f'Var ≈ {var_a:.2f}')
|
| 127 |
+
ax1.axvline(x=true_grade.amount, color='black', linestyle='--', label='True Grade')
|
| 128 |
+
ax1.set_title('Grader A: High Variance')
|
| 129 |
+
ax1.set_xlabel('Grade')
|
| 130 |
+
ax1.set_ylabel('Pr(G = g)')
|
| 131 |
+
ax1.set_ylim(0, max(grader_a)*1.1)
|
| 132 |
+
|
| 133 |
+
ax2.fill_between(_grader_x, grader_b, alpha=0.3, color='blue', label=f'Var ≈ {var_b:.2f}')
|
| 134 |
+
ax2.axvline(x=true_grade.amount, color='black', linestyle='--')
|
| 135 |
+
ax2.set_title('Grader B: Low Variance')
|
| 136 |
+
ax2.set_xlabel('Grade')
|
| 137 |
+
ax2.set_ylim(0, max(grader_b)*1.1)
|
| 138 |
+
|
| 139 |
+
ax3.fill_between(_grader_x, grader_c, alpha=0.3, color='purple', label=f'Var ≈ {var_c:.2f}')
|
| 140 |
+
ax3.axvline(x=true_grade.amount, color='black', linestyle='--')
|
| 141 |
+
ax3.set_title('Grader C: Mixed Distribution')
|
| 142 |
+
ax3.set_xlabel('Grade')
|
| 143 |
+
ax3.set_ylim(0, max(grader_c)*1.1)
|
| 144 |
+
|
| 145 |
+
# Add annotations to explain what's happening
|
| 146 |
+
ax1.annotate('Wide spread = high variance',
|
| 147 |
+
xy=(true_grade.amount, max(grader_a)*0.5),
|
| 148 |
+
xytext=(true_grade.amount-15, max(grader_a)*0.7),
|
| 149 |
+
arrowprops=dict(facecolor='black', shrink=0.05, width=1))
|
| 150 |
+
|
| 151 |
+
ax2.annotate('Narrow spread = low variance',
|
| 152 |
+
xy=(true_grade.amount, max(grader_b)*0.5),
|
| 153 |
+
xytext=(true_grade.amount+8, max(grader_b)*0.7),
|
| 154 |
+
arrowprops=dict(facecolor='black', shrink=0.05, width=1))
|
| 155 |
+
|
| 156 |
+
ax3.annotate('Mixture creates outliers',
|
| 157 |
+
xy=(true_grade.amount+15, grader_c[np.where(_grader_x >= true_grade.amount+15)[0][0]]),
|
| 158 |
+
xytext=(true_grade.amount+5, max(grader_c)*0.7),
|
| 159 |
+
arrowprops=dict(facecolor='black', shrink=0.05, width=1))
|
| 160 |
+
|
| 161 |
+
# Add legends and adjust layout
|
| 162 |
+
for _ax in [ax1, ax2, ax3]:
|
| 163 |
+
_ax.legend()
|
| 164 |
+
_ax.grid(alpha=0.2)
|
| 165 |
+
|
| 166 |
+
plt.tight_layout()
|
| 167 |
+
plt.gca()
|
| 168 |
+
return (
|
| 169 |
+
ax1,
|
| 170 |
+
ax2,
|
| 171 |
+
ax3,
|
| 172 |
+
grader_a,
|
| 173 |
+
grader_b,
|
| 174 |
+
grader_c,
|
| 175 |
+
grader_fig,
|
| 176 |
+
var_a,
|
| 177 |
+
var_b,
|
| 178 |
+
var_c,
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
@app.cell(hide_code=True)
|
| 183 |
+
def _(mo):
|
| 184 |
+
mo.md(
|
| 185 |
+
r"""
|
| 186 |
+
/// note
|
| 187 |
+
All three distributions have the same expected value (the true grade), but they differ significantly in their spread:
|
| 188 |
+
|
| 189 |
+
- **Grader A** has high variance - grades vary widely from the true value
|
| 190 |
+
- **Grader B** has low variance - grades consistently stay close to the true value
|
| 191 |
+
- **Grader C** has a mixture distribution - mostly consistent but with occasional extreme values
|
| 192 |
+
|
| 193 |
+
This illustrates why variance is crucial: two distributions can have the same mean but behave very differently in practice.
|
| 194 |
+
"""
|
| 195 |
+
)
|
| 196 |
+
return
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
@app.cell(hide_code=True)
|
| 200 |
+
def _(mo):
|
| 201 |
+
mo.md(
|
| 202 |
+
r"""
|
| 203 |
+
## Computing Variance
|
| 204 |
+
|
| 205 |
+
Let's work through some concrete examples to understand how to calculate variance.
|
| 206 |
+
|
| 207 |
+
### Example 1: Fair Die Roll
|
| 208 |
+
|
| 209 |
+
Consider rolling a fair six-sided die. We'll calculate its variance step by step:
|
| 210 |
+
"""
|
| 211 |
+
)
|
| 212 |
+
return
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
@app.cell
|
| 216 |
+
def _(np):
|
| 217 |
+
# Define the die values and probabilities
|
| 218 |
+
die_values = np.array([1, 2, 3, 4, 5, 6])
|
| 219 |
+
die_probs = np.array([1/6] * 6)
|
| 220 |
+
|
| 221 |
+
# Calculate E[X]
|
| 222 |
+
expected_value = np.sum(die_values * die_probs)
|
| 223 |
+
|
| 224 |
+
# Calculate E[X^2]
|
| 225 |
+
expected_square = np.sum(die_values**2 * die_probs)
|
| 226 |
+
|
| 227 |
+
# Calculate Var(X) = E[X^2] - (E[X])^2
|
| 228 |
+
variance = expected_square - expected_value**2
|
| 229 |
+
|
| 230 |
+
# Calculate standard deviation
|
| 231 |
+
std_dev = np.sqrt(variance)
|
| 232 |
+
|
| 233 |
+
print(f"E[X] = {expected_value:.2f}")
|
| 234 |
+
print(f"E[X^2] = {expected_square:.2f}")
|
| 235 |
+
print(f"Var(X) = {variance:.2f}")
|
| 236 |
+
print(f"Standard Deviation = {std_dev:.2f}")
|
| 237 |
+
return (
|
| 238 |
+
die_probs,
|
| 239 |
+
die_values,
|
| 240 |
+
expected_square,
|
| 241 |
+
expected_value,
|
| 242 |
+
std_dev,
|
| 243 |
+
variance,
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
@app.cell(hide_code=True)
|
| 248 |
+
def _(mo):
|
| 249 |
+
mo.md(
|
| 250 |
+
r"""
|
| 251 |
+
/// NOTE
|
| 252 |
+
For a fair die:
|
| 253 |
+
|
| 254 |
+
- The expected value (3.50) tells us the average roll
|
| 255 |
+
- The variance (2.92) tells us how much typical rolls deviate from this average
|
| 256 |
+
- The standard deviation (1.71) gives us this spread in the original units
|
| 257 |
+
"""
|
| 258 |
+
)
|
| 259 |
+
return
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
@app.cell(hide_code=True)
|
| 263 |
+
def _(mo):
|
| 264 |
+
mo.md(
|
| 265 |
+
r"""
|
| 266 |
+
## Properties of Variance
|
| 267 |
+
|
| 268 |
+
Variance has several important properties that make it useful for analyzing random variables:
|
| 269 |
+
|
| 270 |
+
1. **Non-negativity**: $\text{Var}(X) \geq 0$ for any random variable $X$
|
| 271 |
+
2. **Variance of a constant**: $\text{Var}(c) = 0$ for any constant $c$
|
| 272 |
+
3. **Scaling**: $\text{Var}(aX) = a^2\text{Var}(X)$ for any constant $a$
|
| 273 |
+
4. **Translation**: $\text{Var}(X + b) = \text{Var}(X)$ for any constant $b$
|
| 274 |
+
5. **Independence**: If $X$ and $Y$ are independent, then $\text{Var}(X + Y) = \text{Var}(X) + \text{Var}(Y)$
|
| 275 |
+
|
| 276 |
+
Let's verify a property with an example.
|
| 277 |
+
"""
|
| 278 |
+
)
|
| 279 |
+
return
|
| 280 |
+
|
| 281 |
+
|
| 282 |
+
@app.cell(hide_code=True)
|
| 283 |
+
def _(mo):
|
| 284 |
+
mo.md(
|
| 285 |
+
r"""
|
| 286 |
+
## Proof of Variance Formula
|
| 287 |
+
|
| 288 |
+
The equivalence of the two variance formulas is a fundamental result in probability theory. Here's the proof:
|
| 289 |
+
|
| 290 |
+
Starting with the definition $\text{Var}(X) = E[(X-\mu)^2]$ where $\mu = E[X]$:
|
| 291 |
+
|
| 292 |
+
\begin{align}
|
| 293 |
+
\text{Var}(X) &= E[(X-\mu)^2] \\
|
| 294 |
+
&= \sum_x(x-\mu)^2P(x) && \text{Definition of Expectation}\\
|
| 295 |
+
&= \sum_x (x^2 -2\mu x + \mu^2)P(x) && \text{Expanding the square}\\
|
| 296 |
+
&= \sum_x x^2P(x)- 2\mu \sum_x xP(x) + \mu^2 \sum_x P(x) && \text{Distributing the sum}\\
|
| 297 |
+
&= E[X^2]- 2\mu E[X] + \mu^2 && \text{Definition of expectation}\\
|
| 298 |
+
&= E[X^2]- 2(E[X])^2 + (E[X])^2 && \text{Since }\mu = E[X]\\
|
| 299 |
+
&= E[X^2]- (E[X])^2 && \text{Simplifying}
|
| 300 |
+
\end{align}
|
| 301 |
+
|
| 302 |
+
/// tip
|
| 303 |
+
This proof shows why the formula $\text{Var}(X) = E[X^2] - (E[X])^2$ is so useful - it's much easier to compute $E[X^2]$ and $E[X]$ separately than to work with deviations directly.
|
| 304 |
+
"""
|
| 305 |
+
)
|
| 306 |
+
return
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
@app.cell
|
| 310 |
+
def _(die_probs, die_values, np):
|
| 311 |
+
# Demonstrate scaling property
|
| 312 |
+
a = 2 # Scale factor
|
| 313 |
+
|
| 314 |
+
# Original variance
|
| 315 |
+
original_var = np.sum(die_values**2 * die_probs) - (np.sum(die_values * die_probs))**2
|
| 316 |
+
|
| 317 |
+
# Scaled random variable variance
|
| 318 |
+
scaled_values = a * die_values
|
| 319 |
+
scaled_var = np.sum(scaled_values**2 * die_probs) - (np.sum(scaled_values * die_probs))**2
|
| 320 |
+
|
| 321 |
+
print(f"Original Variance: {original_var:.2f}")
|
| 322 |
+
print(f"Scaled Variance (a={a}): {scaled_var:.2f}")
|
| 323 |
+
print(f"a^2 * Original Variance: {a**2 * original_var:.2f}")
|
| 324 |
+
print(f"Property holds: {abs(scaled_var - a**2 * original_var) < 1e-10}")
|
| 325 |
+
return a, original_var, scaled_values, scaled_var
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
@app.cell
|
| 329 |
+
def _():
|
| 330 |
+
# DIY : Prove more properties as shown above
|
| 331 |
+
return
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
@app.cell(hide_code=True)
|
| 335 |
+
def _(mo):
|
| 336 |
+
mo.md(
|
| 337 |
+
r"""
|
| 338 |
+
## Standard Deviation
|
| 339 |
+
|
| 340 |
+
While variance is mathematically convenient, it has one practical drawback: its units are squared. For example, if we're measuring grades (0-100), the variance is in "grade points squared." This makes it hard to interpret intuitively.
|
| 341 |
+
|
| 342 |
+
The **standard deviation**, denoted by $\sigma$ or $\text{SD}(X)$, is the square root of variance:
|
| 343 |
+
|
| 344 |
+
$$\sigma = \sqrt{\text{Var}(X)}$$
|
| 345 |
+
|
| 346 |
+
/// tip
|
| 347 |
+
Standard deviation is often more intuitive because it's in the same units as the original data. For a normal distribution, approximately:
|
| 348 |
+
- 68% of values fall within 1 standard deviation of the mean
|
| 349 |
+
- 95% of values fall within 2 standard deviations
|
| 350 |
+
- 99.7% of values fall within 3 standard deviations
|
| 351 |
+
"""
|
| 352 |
+
)
|
| 353 |
+
return
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
@app.cell(hide_code=True)
|
| 357 |
+
def _(controls1):
|
| 358 |
+
controls1
|
| 359 |
+
return
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
@app.cell(hide_code=True)
|
| 363 |
+
def _(TangleSlider, mo):
|
| 364 |
+
normal_mean = mo.ui.anywidget(TangleSlider(
|
| 365 |
+
amount=0,
|
| 366 |
+
min_value=-5,
|
| 367 |
+
max_value=5,
|
| 368 |
+
step=0.5,
|
| 369 |
+
digits=1,
|
| 370 |
+
suffix=" units"
|
| 371 |
+
))
|
| 372 |
+
|
| 373 |
+
normal_std = mo.ui.anywidget(TangleSlider(
|
| 374 |
+
amount=1,
|
| 375 |
+
min_value=0.1,
|
| 376 |
+
max_value=3,
|
| 377 |
+
step=0.1,
|
| 378 |
+
digits=1,
|
| 379 |
+
suffix=" units"
|
| 380 |
+
))
|
| 381 |
+
|
| 382 |
+
# Create a grid layout for the controls
|
| 383 |
+
controls1 = mo.vstack([
|
| 384 |
+
mo.md("### Interactive Normal Distribution"),
|
| 385 |
+
mo.hstack([
|
| 386 |
+
mo.md("Adjust the parameters to see how standard deviation affects the shape of the distribution:"),
|
| 387 |
+
]),
|
| 388 |
+
mo.hstack([
|
| 389 |
+
mo.md("Mean (μ): "),
|
| 390 |
+
normal_mean,
|
| 391 |
+
mo.md(" Standard deviation (σ): "),
|
| 392 |
+
normal_std
|
| 393 |
+
], justify="start"),
|
| 394 |
+
])
|
| 395 |
+
return controls1, normal_mean, normal_std
|
| 396 |
+
|
| 397 |
+
|
| 398 |
+
@app.cell(hide_code=True)
|
| 399 |
+
def _(normal_mean, normal_std, np, plt, stats):
|
| 400 |
+
# data for normal distribution
|
| 401 |
+
_normal_x = np.linspace(-10, 10, 1000)
|
| 402 |
+
_normal_y = stats.norm.pdf(_normal_x, loc=normal_mean.amount, scale=normal_std.amount)
|
| 403 |
+
|
| 404 |
+
# ranges for standard deviation intervals
|
| 405 |
+
one_sigma_left = normal_mean.amount - normal_std.amount
|
| 406 |
+
one_sigma_right = normal_mean.amount + normal_std.amount
|
| 407 |
+
two_sigma_left = normal_mean.amount - 2 * normal_std.amount
|
| 408 |
+
two_sigma_right = normal_mean.amount + 2 * normal_std.amount
|
| 409 |
+
three_sigma_left = normal_mean.amount - 3 * normal_std.amount
|
| 410 |
+
three_sigma_right = normal_mean.amount + 3 * normal_std.amount
|
| 411 |
+
|
| 412 |
+
# Create the plot
|
| 413 |
+
normal_fig, normal_ax = plt.subplots(figsize=(10, 6))
|
| 414 |
+
|
| 415 |
+
# Plot the distribution
|
| 416 |
+
normal_ax.plot(_normal_x, _normal_y, 'b-', linewidth=2)
|
| 417 |
+
|
| 418 |
+
# stdev intervals
|
| 419 |
+
normal_ax.fill_between(_normal_x, 0, _normal_y, where=(_normal_x >= one_sigma_left) & (_normal_x <= one_sigma_right),
|
| 420 |
+
alpha=0.3, color='red', label='68% (±1σ)')
|
| 421 |
+
normal_ax.fill_between(_normal_x, 0, _normal_y, where=(_normal_x >= two_sigma_left) & (_normal_x <= two_sigma_right),
|
| 422 |
+
alpha=0.2, color='green', label='95% (±2σ)')
|
| 423 |
+
normal_ax.fill_between(_normal_x, 0, _normal_y, where=(_normal_x >= three_sigma_left) & (_normal_x <= three_sigma_right),
|
| 424 |
+
alpha=0.1, color='blue', label='99.7% (±3σ)')
|
| 425 |
+
|
| 426 |
+
# vertical lines for the mean and standard deviations
|
| 427 |
+
normal_ax.axvline(x=normal_mean.amount, color='black', linestyle='-', linewidth=1.5, label='Mean (μ)')
|
| 428 |
+
normal_ax.axvline(x=one_sigma_left, color='red', linestyle='--', linewidth=1)
|
| 429 |
+
normal_ax.axvline(x=one_sigma_right, color='red', linestyle='--', linewidth=1)
|
| 430 |
+
normal_ax.axvline(x=two_sigma_left, color='green', linestyle='--', linewidth=1)
|
| 431 |
+
normal_ax.axvline(x=two_sigma_right, color='green', linestyle='--', linewidth=1)
|
| 432 |
+
|
| 433 |
+
# annotations
|
| 434 |
+
normal_ax.annotate(f'μ = {normal_mean.amount:.2f}',
|
| 435 |
+
xy=(normal_mean.amount, max(_normal_y)*0.5),
|
| 436 |
+
xytext=(normal_mean.amount + 0.5, max(_normal_y)*0.8),
|
| 437 |
+
arrowprops=dict(facecolor='black', shrink=0.05, width=1))
|
| 438 |
+
|
| 439 |
+
normal_ax.annotate(f'σ = {normal_std.amount:.2f}',
|
| 440 |
+
xy=(one_sigma_right, stats.norm.pdf(one_sigma_right, loc=normal_mean.amount, scale=normal_std.amount)),
|
| 441 |
+
xytext=(one_sigma_right + 0.5, max(_normal_y)*0.6),
|
| 442 |
+
arrowprops=dict(facecolor='red', shrink=0.05, width=1))
|
| 443 |
+
|
| 444 |
+
# labels and title
|
| 445 |
+
normal_ax.set_xlabel('Value')
|
| 446 |
+
normal_ax.set_ylabel('Probability Density')
|
| 447 |
+
normal_ax.set_title(f'Normal Distribution with μ = {normal_mean.amount:.2f} and σ = {normal_std.amount:.2f}')
|
| 448 |
+
|
| 449 |
+
# legend and grid
|
| 450 |
+
normal_ax.legend()
|
| 451 |
+
normal_ax.grid(alpha=0.3)
|
| 452 |
+
|
| 453 |
+
plt.tight_layout()
|
| 454 |
+
plt.gca()
|
| 455 |
+
return (
|
| 456 |
+
normal_ax,
|
| 457 |
+
normal_fig,
|
| 458 |
+
one_sigma_left,
|
| 459 |
+
one_sigma_right,
|
| 460 |
+
three_sigma_left,
|
| 461 |
+
three_sigma_right,
|
| 462 |
+
two_sigma_left,
|
| 463 |
+
two_sigma_right,
|
| 464 |
+
)
|
| 465 |
+
|
| 466 |
+
|
| 467 |
+
@app.cell(hide_code=True)
|
| 468 |
+
def _(mo):
|
| 469 |
+
mo.md(
|
| 470 |
+
r"""
|
| 471 |
+
/// tip
|
| 472 |
+
The interactive visualization above demonstrates how standard deviation (σ) affects the shape of a normal distribution:
|
| 473 |
+
|
| 474 |
+
- The **red region** covers μ ± 1σ, containing approximately 68% of the probability
|
| 475 |
+
- The **green region** covers μ ± 2σ, containing approximately 95% of the probability
|
| 476 |
+
- The **blue region** covers μ ± 3σ, containing approximately 99.7% of the probability
|
| 477 |
+
|
| 478 |
+
This is known as the "68-95-99.7 rule" or the "empirical rule" and is a useful heuristic for understanding the spread of data.
|
| 479 |
+
"""
|
| 480 |
+
)
|
| 481 |
+
return
|
| 482 |
+
|
| 483 |
+
|
| 484 |
+
@app.cell(hide_code=True)
|
| 485 |
+
def _(mo):
|
| 486 |
+
mo.md(
|
| 487 |
+
r"""
|
| 488 |
+
## 🤔 Test Your Understanding
|
| 489 |
+
|
| 490 |
+
Choose what you believe are the correct options in the questions below:
|
| 491 |
+
|
| 492 |
+
<details>
|
| 493 |
+
<summary>The variance of a random variable can be negative.</summary>
|
| 494 |
+
❌ False! Variance is defined as an expected value of squared deviations, and squares are always non-negative.
|
| 495 |
+
</details>
|
| 496 |
+
|
| 497 |
+
<details>
|
| 498 |
+
<summary>If X and Y are independent random variables, then Var(X + Y) = Var(X) + Var(Y).</summary>
|
| 499 |
+
✅ True! This is one of the key properties of variance for independent random variables.
|
| 500 |
+
</details>
|
| 501 |
+
|
| 502 |
+
<details>
|
| 503 |
+
<summary>Multiplying a random variable by 2 multiplies its variance by 2.</summary>
|
| 504 |
+
❌ False! Multiplying a random variable by a constant a multiplies its variance by a². So multiplying by 2 multiplies variance by 4.
|
| 505 |
+
</details>
|
| 506 |
+
|
| 507 |
+
<details>
|
| 508 |
+
<summary>Standard deviation is always equal to the square root of variance.</summary>
|
| 509 |
+
✅ True! By definition, standard deviation σ = √Var(X).
|
| 510 |
+
</details>
|
| 511 |
+
|
| 512 |
+
<details>
|
| 513 |
+
<summary>If Var(X) = 0, then X must be a constant.</summary>
|
| 514 |
+
✅ True! Zero variance means there is no spread around the mean, so X can only take one value.
|
| 515 |
+
</details>
|
| 516 |
+
"""
|
| 517 |
+
)
|
| 518 |
+
return
|
| 519 |
+
|
| 520 |
+
|
| 521 |
+
@app.cell(hide_code=True)
|
| 522 |
+
def _(mo):
|
| 523 |
+
mo.md(
|
| 524 |
+
r"""
|
| 525 |
+
## Key Takeaways
|
| 526 |
+
|
| 527 |
+
Variance gives us a way to measure how spread out a random variable is around its mean. It's like the "uncertainty" in our expectation - a high variance means individual outcomes can differ widely from what we expect on average.
|
| 528 |
+
|
| 529 |
+
Standard deviation brings this measure back to the original units, making it easier to interpret. For grades, a standard deviation of 10 points means typical grades fall within about 10 points of the average.
|
| 530 |
+
|
| 531 |
+
Variance pops up everywhere - from weather forecasts (how reliable is the predicted temperature?) to financial investments (how risky is this stock?) to quality control (how consistent is our manufacturing process?).
|
| 532 |
+
|
| 533 |
+
In our next notebook, we'll explore more properties of random variables and see how they combine to form more complex distributions.
|
| 534 |
+
"""
|
| 535 |
+
)
|
| 536 |
+
return
|
| 537 |
+
|
| 538 |
+
|
| 539 |
+
@app.cell(hide_code=True)
|
| 540 |
+
def _(mo):
|
| 541 |
+
mo.md(r"""Appendix (containing helper code):""")
|
| 542 |
+
return
|
| 543 |
+
|
| 544 |
+
|
| 545 |
+
@app.cell(hide_code=True)
|
| 546 |
+
def _():
|
| 547 |
+
import marimo as mo
|
| 548 |
+
return (mo,)
|
| 549 |
+
|
| 550 |
+
|
| 551 |
+
@app.cell(hide_code=True)
|
| 552 |
+
def _():
|
| 553 |
+
import numpy as np
|
| 554 |
+
import scipy.stats as stats
|
| 555 |
+
import matplotlib.pyplot as plt
|
| 556 |
+
from wigglystuff import TangleSlider
|
| 557 |
+
return TangleSlider, np, plt, stats
|
| 558 |
+
|
| 559 |
+
|
| 560 |
+
@app.cell(hide_code=True)
|
| 561 |
+
def _(TangleSlider, mo):
|
| 562 |
+
# Create interactive elements using TangleSlider for a more inline experience
|
| 563 |
+
true_grade = mo.ui.anywidget(TangleSlider(
|
| 564 |
+
amount=70,
|
| 565 |
+
min_value=50,
|
| 566 |
+
max_value=90,
|
| 567 |
+
step=5,
|
| 568 |
+
digits=0,
|
| 569 |
+
suffix=" points"
|
| 570 |
+
))
|
| 571 |
+
|
| 572 |
+
grader_a_spread = mo.ui.anywidget(TangleSlider(
|
| 573 |
+
amount=10,
|
| 574 |
+
min_value=5,
|
| 575 |
+
max_value=20,
|
| 576 |
+
step=1,
|
| 577 |
+
digits=0,
|
| 578 |
+
suffix=" points"
|
| 579 |
+
))
|
| 580 |
+
|
| 581 |
+
grader_b_spread = mo.ui.anywidget(TangleSlider(
|
| 582 |
+
amount=2,
|
| 583 |
+
min_value=1,
|
| 584 |
+
max_value=5,
|
| 585 |
+
step=0.5,
|
| 586 |
+
digits=1,
|
| 587 |
+
suffix=" points"
|
| 588 |
+
))
|
| 589 |
+
|
| 590 |
+
grader_c_mix = mo.ui.anywidget(TangleSlider(
|
| 591 |
+
amount=0.2,
|
| 592 |
+
min_value=0,
|
| 593 |
+
max_value=1,
|
| 594 |
+
step=0.05,
|
| 595 |
+
digits=2,
|
| 596 |
+
suffix=" proportion"
|
| 597 |
+
))
|
| 598 |
+
return grader_a_spread, grader_b_spread, grader_c_mix, true_grade
|
| 599 |
+
|
| 600 |
+
|
| 601 |
+
@app.cell(hide_code=True)
|
| 602 |
+
def _(grader_a_spread, grader_b_spread, grader_c_mix, mo, true_grade):
|
| 603 |
+
# Create a grid layout for the interactive controls
|
| 604 |
+
controls = mo.vstack([
|
| 605 |
+
mo.md("### Adjust Parameters to See How Variance Changes"),
|
| 606 |
+
mo.hstack([
|
| 607 |
+
mo.md("**True grade:** The correct score that should be assigned is "),
|
| 608 |
+
true_grade,
|
| 609 |
+
mo.md(" out of 100.")
|
| 610 |
+
], justify="start"),
|
| 611 |
+
mo.hstack([
|
| 612 |
+
mo.md("**Grader A:** Has a wide spread with standard deviation of "),
|
| 613 |
+
grader_a_spread,
|
| 614 |
+
mo.md(" points.")
|
| 615 |
+
], justify="start"),
|
| 616 |
+
mo.hstack([
|
| 617 |
+
mo.md("**Grader B:** Has a narrow spread with standard deviation of "),
|
| 618 |
+
grader_b_spread,
|
| 619 |
+
mo.md(" points.")
|
| 620 |
+
], justify="start"),
|
| 621 |
+
mo.hstack([
|
| 622 |
+
mo.md("**Grader C:** Has a mixture distribution with "),
|
| 623 |
+
grader_c_mix,
|
| 624 |
+
mo.md(" proportion of outliers.")
|
| 625 |
+
], justify="start"),
|
| 626 |
+
])
|
| 627 |
+
return (controls,)
|
| 628 |
+
|
| 629 |
+
|
| 630 |
+
if __name__ == "__main__":
|
| 631 |
+
app.run()
|