Chapter3-Section1-Worksheet-C-Demo
In [1]:
# !pip install seaborn==0.9.0
import numpy as np
import pandas as pd
import scipy
import scipy.stats
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
sns.set()
import warnings
warnings.simplefilter('ignore',FutureWarning)

Demonstrate how to simulate the deviation of the sample mean from the true mean.

True mean is 0. Samples have mean 0 and standard deviation 10. We will take N of them and find the mean, then plot the distribution of the results. The results are interesting for small N, like N=2, N=3, N=4.

This distribution is called the student t-distribution.

In [2]:
d = scipy.stats.norm(scale=10)
def one_sample(N=2):
    return np.mean(d.rvs(N))
def lots_of_samples(N=2, M=1000):
    return np.array([one_sample(N) for _ in range(M)])
In [30]:
sns.distplot(lots_of_samples(M=10000), kde = False, norm_hist=True);
ndist = scipy.stats.norm(scale=7)
xs = np.linspace(-20,20,1000)
ys = ndist.pdf(xs)
#YOU::
#ys = 3 * xs + 5 + ndist.rvs(len(xs))
sns.lineplot(x=xs,y=ys);
In [40]:
xs = np.linspace(-10,10,100)
noise = scipy.stats.logistic.rvs(scale=50, size=100)
trueYs = 8 * xs + 6 
ys = trueYs + noise
df = pd.DataFrame({'x':xs, 'y':ys})
sns.lmplot(x='x',y='y',data=df);
sns.lineplot(x=xs,y=trueYs, color='Orange');
In [43]:
def one_estimate():
    xs = np.linspace(-10,10,100)
    noise = scipy.stats.logistic.rvs(scale=50, size=100)
    trueYs = 8 * xs + 6 
    ys = trueYs + noise
    df = pd.DataFrame({'x':xs, 'y':ys})
    sns.lmplot(x='x',y='y',data=df);

one_estimate()


xs = np.linspace(-10,10,100)
noise = scipy.stats.logistic.rvs(scale=50, size=100)
trueYs = 8 * xs + 6 
ys = trueYs + noise
df = pd.DataFrame({'x':xs, 'y':ys})
sns.lineplot(x=xs,y=trueYs, color='Orange');