Optional: try Matplotlib's low-level plot command to add points or lines to an existing graph.
# !pip install seaborn==0.9.0
import numpy as np
import pandas as pd
import scipy
import scipy.stats
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
sns.set()
import warnings
warnings.simplefilter('ignore',FutureWarning)
geyser = pd.read_csv('https://vincentarelbundock.github.io/Rdatasets/csv/MASS/geyser.csv')
geyser.drop(columns=['Unnamed: 0'], inplace=True)
smf.ols
, fit
.params
, pvalues
, summary()
, get_prediction()
, conf_int()
. Use alpha
to change confidence.lm = smf.ols('duration ~ waiting', data=geyser).fit()
(b0,b1) = lm.params
yfitted = b0 + b1 * geyser.waiting
resid = geyser.duration - yfitted
sns.scatterplot(x=geyser.waiting, y=resid);
short = geyser.duration < 3
geyser['short'] = short
sns.scatterplot(data=geyser,x='waiting',y='duration');
sum(geyser['duration'] == 4)
sns.residplot(y='duration', x='waiting', data=geyser);
sns.lmplot(y='duration', x='waiting', hue='short', data=geyser);
sns.scatterplot(y='duration', x='waiting', hue='short', data=geyser);
sns.residplot(y='duration', x='waiting', data=geyser[short]);
sns.residplot(y='duration', x='waiting', data=geyser[~short]);