# !pip install seaborn==0.9.0
import numpy as np
import pandas as pd
import scipy
import scipy.stats
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
sns.set()
import warnings
warnings.simplefilter('ignore',FutureWarning)
advertising = pd.read_csv('http://www-bcf.usc.edu/~gareth/ISL/Advertising.csv')
advertising.drop(columns='Unnamed: 0', inplace=True)
df = advertising.copy()
df_dependent = 'sales'
df_independent = ['TV','radio','newspaper']
df.columns
lm_tv = smf.ols('sales ~ TV', data=df).fit()
lm_radio = smf.ols('sales ~ radio', data=df).fit()
lm_newspaper = smf.ols('sales ~ newspaper', data=df).fit()
lm_all = smf.ols('sales ~ TV + radio + newspaper', data=df).fit()
lm_newspaper.summary()
lm_all.summary()
df[df_independent].corr()
Easier thinking about F value: (mean explained sum of squares) / (mean residual sum of squares).
lm_all.fvalue
lm_all.mse_model / lm_all.mse_resid