npb_data_app / plotting.py
patrickramos's picture
Add team filtering after percentile computation in pitch leaderboard
e86b89f
raw
history blame
10.2 kB
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import transforms
from matplotlib.colors import LinearSegmentedColormap
import polars as pl
from pyfonts import load_google_font
from scipy.stats import gaussian_kde
import numpy as np
from types import SimpleNamespace
from datetime import date
from convert import ball_kind_code_to_color, get_text_color_from_color
from stats import get_pitcher_stats
mpl.use('Agg')
def get_card_data(id, **kwargs):
both, left, right = get_pitcher_stats(id, **kwargs), get_pitcher_stats(id, 'l', **kwargs), get_pitcher_stats(id, 'r', **kwargs)
pitcher_stats = both.pitcher_stats.join(left.pitcher_stats, on='pitId', suffix='_left').join(right.pitcher_stats, on='pitId', suffix='_right')
pitch_stats = both.pitch_stats.join(left.pitch_stats, on='ballKind_code', how='full', suffix='_left').join(right.pitch_stats, on='ballKind_code', how='full', suffix='_right').fill_null(0)
return SimpleNamespace(
pitcher_stats=pitcher_stats,
pitch_stats=pitch_stats,
both_pitch_shapes=both.pitch_shapes,
left_pitch_shapes=left.pitch_shapes,
right_pitch_shapes=right.pitch_shapes
)
def plot_arsenal(ax, pitches):
ax.set_xlim(0, 11)
x = np.arange(len(pitches)) + 0.5
y = np.zeros(len(pitches))
ax.scatter(x, y, c=[ball_kind_code_to_color.get(pitch, 'C0') for pitch in pitches], s=170)
for i, pitch in enumerate(pitches):
color = ball_kind_code_to_color.get(pitch, 'C0')
ax.text(x=i+0.5, y=0, s=pitch, horizontalalignment='center', verticalalignment='center', font=font, color=get_text_color_from_color(color))
def plot_usage(ax, usages):
left = 0
height = 0.8
for pitch, usage in usages.iter_rows():
color = ball_kind_code_to_color[pitch]
ax.barh(0, usage, height=height, left=left, color=color)
if usage > 0.1:
ax.text(left+usage/2, 0, f'{usage:.0%}', horizontalalignment='center', verticalalignment='center', size=8, font=font, color=get_text_color_from_color(color))
left += usage
ax.set_xlim(0, 1)
ax.set_ylim(-height/2, height/2*2.75)
x_range = np.arange(-100, 100+1)
y_range = np.arange(0, 250+1)
X, Y = np.meshgrid(x_range, y_range)
def fit_pred_kde(data):
kde = gaussian_kde(data)
Z = kde(np.concat((X, Y)).reshape(2, -1)).reshape(*X.shape)
return Z
def plot_loc(ax, locs):
ax.set_aspect('equal', adjustable='datalim')
ax.set_ylim(-52, 252)
ax.add_patch(plt.Rectangle((-100, 0), width=200, height=250, facecolor='darkgray', edgecolor='dimgray'))
ax.add_patch(plt.Rectangle((-80, 25), width=160, height=200, facecolor='gainsboro', edgecolor='dimgray'))
ax.add_patch(plt.Rectangle((-60, 50), width=120, height=150, fill=False, edgecolor='yellowgreen', linestyle=':'))
ax.add_patch(plt.Rectangle((-40, 75), width=80, height=100, facecolor='ivory', edgecolor='darkgray'))
ax.add_patch(plt.Polygon([(0, -10), (45, -30), (51, -50), (-51, -50), (-45, -30), (0, -10)], facecolor='snow', edgecolor='darkgray'))
for (pitch,), _locs in locs.sort(pl.len().over('general_ballKind_code'), descending=True).group_by('general_ballKind_code', maintain_order=True):
if len(_locs) <= 2:
continue
Z = fit_pred_kde(_locs[['x', 'y']].to_numpy().T)
Z = Z / Z.sum()
Z_flat = Z.ravel()
sorted_Z = np.sort(Z_flat)
sorted_Z_idxs = np.argsort(Z_flat)
Z_cumsum = (sorted_Z).cumsum()
t = Z_flat[sorted_Z_idxs[np.argmin(np.abs(Z_cumsum - (1-0.68)))]]
ax.contourf(X, Y, Z, levels=[t, 1], colors=ball_kind_code_to_color[pitch], alpha=0.5)
ax.contour(X, Y, Z, levels=t.reshape(1), colors=ball_kind_code_to_color[pitch], alpha=0.75)
def plot_velo(ax, velos):
trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)
for (pitch,), _velos in velos.group_by('general_ballKind_code'):
if len(_velos) <= 1:
continue
violin = ax.violinplot(_velos['ballSpeed'], orientation='horizontal', side='high', showextrema=False)
for _violin in violin['bodies']:
_violin.set_facecolor(ball_kind_code_to_color[pitch])
mean = _velos['ballSpeed'].mean()
ax.text(mean, 0.5, round(mean), horizontalalignment='center', verticalalignment='center', color='gray', alpha=0.75, font=font, transform=trans)
stat_cmap = LinearSegmentedColormap.from_list('stat', colors=['dodgerblue', 'snow', 'crimson'])
def plot_pitch_stats(ax, stats, stat_names):
ax.set_aspect('equal', adjustable='datalim')
# axis_to_data = lambda coords: ax.transData.inverted().transform(ax.transAxes.transform(coords))
table = mpl.table.Table(ax)
rows = len(stat_names) + 1
cols = len(stats) + 1
cell_height = 1/rows
cell_width = 1/cols
for row, stat in enumerate(stat_names, start=1):
cell = table.add_cell(row=row, col=0, width=cell_width, height=cell_height, text=stat, loc='center', fontproperties=font, edgecolor='white')
for col, pitch in enumerate(stats['ballKind_code'], start=1):
color = ball_kind_code_to_color.get(pitch, 'C0')
cell = table.add_cell(row=0, col=col, width=cell_width, height=cell_height, text=pitch, loc='center', fontproperties=font, facecolor=color, edgecolor='white')
cell.get_text().set_color(get_text_color_from_color(color))
_stats = stats.filter(pl.col('ballKind_code') == pitch)
qualified = _stats['qualified'].item()
for row, stat_name in enumerate(stat_names, start=1):
stat = _stats[stat_name].item()
stat_pctl = _stats[f'{stat_name}_pctl'].item()
cell = table.add_cell(row=row, col=col, width=cell_width, height=cell_height, text=f'{stat:.0%}', loc='center', fontproperties=font, facecolor=(stat_cmap([0, stat_pctl, 1])[1] if qualified else 'gainsboro'), edgecolor='white')
if not qualified:
cell.get_text().set_color('gray')
ax.add_artist(table)
def plot_pitcher_stats(ax, stats, stat_names):
ax.set_aspect('equal', adjustable='datalim')
table = mpl.table.Table(ax)
cell_height = 1
cell_width = 1/(len(stat_names)*2)
qualified = stats['qualified'].item()
for i, stat_name in enumerate(stat_names):
stat = stats[stat_name].item()
stat_pctl = stats[f'{stat_name}_pctl'].item()
table.add_cell(row=0, col=i*2, width=cell_width, height=cell_height, text=stat_name, loc='center', fontproperties=font, edgecolor='white')
cell = table.add_cell(row=0, col=i*2+1, width=cell_width, height=cell_height, text=f'{stat:.0%}', loc='center', fontproperties=font, facecolor=(stat_cmap([0, stat_pctl, 1])[1] if qualified else 'gainsboro'), edgecolor='white')
if not qualified:
cell.get_text().set_color('gray')
ax.add_artist(table)
font = load_google_font('Saira Extra Condensed', weight='medium')
def create_pitcher_overview_card(id, season, dpi=300):
data = get_card_data(id, start_date=date(season, 1, 1), end_date=date(season, 12, 31), game_kind='Regular Season', min_pitches=100, pitch_class_type='general')
fig = plt.figure(figsize=(1080/300, 1350/300), dpi=dpi)
gs = fig.add_gridspec(8, 6, height_ratios=[1, 1, 1.5, 6, 1, 3, 1, 0.5])
title_ax = fig.add_subplot(gs[0, :])
title_ax.text(x=0, y=0, s=data.pitcher_stats['pitcher_name'].item().upper(), verticalalignment='baseline', font=font, size=20)
# title_ax.text(x=1, y=1, s='2021\n-2023', horizontalalignment='right', verticalalignment='top', font=font, size=8)
title_ax.text(x=0.95, y=0, s=season, horizontalalignment='right', verticalalignment='baseline', font=font, size=20)
title_ax.text(x=1, y=0.5, s='REG', horizontalalignment='right', verticalalignment='center', font=font, size=10, rotation='vertical')
arsenal_ax = fig.add_subplot(gs[1, :])
plot_arsenal(arsenal_ax, data.pitch_stats['ballKind_code'])
usage_l_ax = fig.add_subplot(gs[2, :3])
plot_usage(usage_l_ax, data.pitch_stats[['ballKind_code', 'usage_left']])
usage_l_ax.text(0, 1, 'LHH usage', horizontalalignment='left', verticalalignment='top', linespacing=0.5, color='gray', font=font, size=10, transform=usage_l_ax.transAxes)
usage_r_ax = fig.add_subplot(gs[2, 3:])
plot_usage(usage_r_ax, data.pitch_stats[['ballKind_code', 'usage_right']])
usage_r_ax.text(0, 1, 'RHH usage', horizontalalignment='left', verticalalignment='top', linespacing=0.5, color='gray', font=font, size=10, transform=usage_r_ax.transAxes)
loc_l_ax = fig.add_subplot(gs[3, :3])
loc_l_ax.text(0, 1, 'LHH\nloc', verticalalignment='top', horizontalalignment='left', color='gray', font=font, size=10, transform=loc_l_ax.transAxes)
plot_loc(loc_l_ax, data.left_pitch_shapes)
loc_r_ax = fig.add_subplot(gs[3, 3:])
loc_r_ax.text(0, 1, 'RHH\nloc', verticalalignment='top', horizontalalignment='left', color='gray', font=font, size=10, transform=loc_r_ax.transAxes)
plot_loc(loc_r_ax, data.right_pitch_shapes)
velo_ax = fig.add_subplot(gs[4, :])
plot_velo(velo_ax, data.both_pitch_shapes)
velo_ax.text(0, 1, 'Velo', verticalalignment='top', horizontalalignment='left', color='gray', font=font, size=10, transform=velo_ax.transAxes)
pitch_stats_ax = fig.add_subplot(gs[5, :])
plot_pitch_stats(pitch_stats_ax, data.pitch_stats, ['CSW%', 'GB%'])
pitcher_stats_ax = fig.add_subplot(gs[6, :])
plot_pitcher_stats(pitcher_stats_ax, data.pitcher_stats, ['CSW%', 'K%', 'BB%', 'GB%'])
# k_ax = fig.add_subplot(gs[5, :2])
# plot_stat(k_ax, data.pitcher_stats, 'K%')
# bb_ax = fig.add_subplot(gs[5, 2:4])
# plot_stat(bb_ax, data.pitcher_s`tats, 'BB%')
# gb_ax = fig.add_subplot(gs[5, 4:])
# plot_stat(gb_ax, data.pitcher_stats, 'GB%')
credits_ax = fig.add_subplot(gs[7, :])
credits_ax.text(x=0, y=0.5, s='Data: SPAIA, Sanspo', verticalalignment='center', font=font, size=7)
credits_ax.text(x=1, y=0.5, s='@yakyucosmo', horizontalalignment='right', verticalalignment='center', font=font, size=7)
for ax in [
title_ax,
arsenal_ax,
usage_l_ax, usage_r_ax,
loc_l_ax, loc_r_ax,
velo_ax,
# k_ax, bb_ax, gb_ax,
pitch_stats_ax,
pitcher_stats_ax,
credits_ax
]:
ax.axis('off')
ax.tick_params(
axis='both',
which='both',
length=0,
labelbottom=False,
labelleft=False
)
return fig
# fig = create_card('1600153', season=2023, dpi=300)
# plt.show()