Show code cell source
MAKE_BOOK_FIGURES=Trueimport numpy as npimport scipy.stats as stimport matplotlib as mplimport matplotlib.pyplot as plt%matplotlib inlineimport matplotlib_inlinematplotlib_inline.backend_inline.set_matplotlib_formats('svg')import seaborn as snssns.set_context("paper")sns.set_style("ticks")def set_book_style(): plt.style.use('seaborn-v0_8-white') sns.set_style("ticks") sns.set_palette("deep") mpl.rcParams.update({ # Font settings 'font.family': 'serif', # For academic publishing 'font.size': 8, # As requested, 10pt font 'axes.labelsize': 8, 'axes.titlesize': 8, 'xtick.labelsize': 7, # Slightly smaller for better readability 'ytick.labelsize': 7, 'legend.fontsize': 7, # Line and marker settings for consistency 'axes.linewidth': 0.5, 'grid.linewidth': 0.5, 'lines.linewidth': 1.0, 'lines.markersize': 4, # Layout to prevent clipped labels 'figure.constrained_layout.use': True, # Default DPI (will override when saving) 'figure.dpi': 600, 'savefig.dpi': 600, # Despine - remove top and right spines 'axes.spines.top': False, 'axes.spines.right': False, # Remove legend frame 'legend.frameon': False, # Additional trim settings 'figure.autolayout': True, # Alternative to constrained_layout 'savefig.bbox': 'tight', # Trim when saving 'savefig.pad_inches': 0.1 # Small padding to ensure nothing gets cut off })def set_notebook_style(): plt.style.use('seaborn-v0_8-white') sns.set_style("ticks") sns.set_palette("deep") mpl.rcParams.update({ # Font settings - using default sizes 'font.family': 'serif', 'axes.labelsize': 10, 'axes.titlesize': 10, 'xtick.labelsize': 9, 'ytick.labelsize': 9, 'legend.fontsize': 9, # Line and marker settings 'axes.linewidth': 0.5, 'grid.linewidth': 0.5, 'lines.linewidth': 1.0, 'lines.markersize': 4, # Layout settings 'figure.constrained_layout.use': True, # Remove only top and right spines 'axes.spines.top': False, 'axes.spines.right': False, # Remove legend frame 'legend.frameon': False, # Additional settings 'figure.autolayout': True, 'savefig.bbox': 'tight', 'savefig.pad_inches': 0.1 })def save_for_book(fig, filename, is_vector=True, **kwargs): """ Save a figure with book-optimized settings. Parameters: ----------- fig : matplotlib figure The figure to save filename : str Filename without extension is_vector : bool If True, saves as vector at 1000 dpi. If False, saves as raster at 600 dpi. **kwargs : dict Additional kwargs to pass to savefig """ # Set appropriate DPI and format based on figure type if is_vector: dpi = 1000 ext = '.pdf' else: dpi = 600 ext = '.tif' # Save the figure with book settings fig.savefig(f"{filename}{ext}", dpi=dpi, **kwargs)def make_full_width_fig(): return plt.subplots(figsize=(4.7, 2.9), constrained_layout=True)def make_half_width_fig(): return plt.subplots(figsize=(2.35, 1.45), constrained_layout=True)if MAKE_BOOK_FIGURES: set_book_style()else: set_notebook_style()make_full_width_fig = make_full_width_fig if MAKE_BOOK_FIGURES else lambda: plt.subplots()make_half_width_fig = make_half_width_fig if MAKE_BOOK_FIGURES else lambda: plt.subplots()
Decision Making#
So, we learned about credible intervals. But what if someone asks you to report a single value for \(\theta\) in the coin toss example? What is the correct way of doing this?
You have to make a decision. If you make any wrong decision, you will incur a cost. The best decision is the one that minimizes the cost. Another name for this cost is \emph{loss}.
To formalize this concept, let \(\ell(\theta', \theta)\) be the cost we incur when we guess \(\theta'\) and the actual value is \(\theta\). This cost is an entirely subjective quantity. Different people have different costs. However, here are some ideas:
The 0-1 loss:
The square loss:
The absolute loss:
When choosing a value for \(\theta\), the rational thing is to minimize our expected loss. We take the expectation over our posterior state of knowledge about \(\theta\). That is, we make our choice by solving this problem:
For the special loss functions above, the answer is:
The choice that minimizes the 0-1 loss is the one maximizing the posterior:
The choice that minimizes the square loss is the expectation of the random variable:
The choice that minimizes the absolute loss is the median:
Let’s reintroduce our coin toss example so that we have something to work with:
Show code cell source
import scipy.stats as st
theta_true = 0.8
X = st.bernoulli(theta_true)
N = 5
data = X.rvs(size=N)
alpha = 1.0 + data.sum()
beta = 1.0 + N - data.sum()
Theta_post = st.beta(alpha, beta)
fig, ax = plt.subplots()
thetas = np.linspace(0, 1, 100)
ax.plot(
[theta_true],
[0.0],
'o',
markeredgewidth=2,
markersize=10,
label='True value')
ax.plot(
thetas,
Theta_post.pdf(thetas),
label=r'$p(\theta|x_{1:N})$'
)
ax.set_xlabel(r'$\theta$')
ax.set_ylabel('Probability density')
ax.set_title(f'$N={N}$')
plt.legend(loc='best', frameon=False)
sns.despine(trim=True);
For the 0-1 loss, we need to find the maximum of the posterior. This is an optimization problem with an analytical solution, but we will do it differently. We will solve the problem using a grid search:
idx = np.argmax(Theta_post.pdf(thetas))
theta_star_01 = thetas[idx]
print(f'theta_star_01 = {theta_star_01:.2f}')
theta_star_01 = 0.80
Now, let’s the theta \(\theta\) that minimizes the square loss. We have to find the expectation of the posterior \(p(\theta|x_{1:N})\) (which is just a Beta). It is:
# In the example we had above:
theta_star_2 = Theta_post.expect()
print(f'theta_star_2 = {theta_star_2:.2f}')
theta_star_2 = 0.71
And finally, here is the median which minimizes the absolute loss:
# In the example we had above:
theta_star_1 = Theta_post.median()
print(f'theta_star_1 = {theta_star_1:.2f}')
theta_star_1 = 0.74
See them all together in the same plot:
Show code cell source
fig, ax = plt.subplots()
ax.plot(
[theta_true],
[0.0],
'o',
markeredgewidth=2,
markersize=10,
label='True value'
)
ax.plot(
thetas,
Theta_post.pdf(thetas),
label=r'$p(\theta|x_{1:N})$'
)
ax.plot(
theta_star_01,
0,
'x',
markeredgewidth=2,
label=r'$\theta^*_{01}$'
)
ax.plot(
theta_star_2,
0,
's',
markeredgewidth=2,
label=r'$\theta^*_{2}$'
)
ax.plot(
theta_star_1,
0,
'd',
markeredgewidth=2,
label=r'$\theta^*_{1}$'
)
ax.set_xlabel(r'$\theta$')
ax.set_title(f'$N={N}$')
plt.legend(loc='best', frameon=False)
sns.despine(trim=True);
Questions#
Repeat the analysis for \(N=0, 5, 10, 100\). Do these estimates converge to the actual value?