In [1]:
import numpy as np
import pandas as pd
import scipy
import scipy.stats
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter('ignore',FutureWarning)
In [2]:
df = pd.read_csv('http://www-bcf.usc.edu/~gareth/ISL/College.csv')
In [16]:
college = df
In [14]:
df['Accept'].describe()
Out[14]:
count      777.000000
mean      2018.804376
std       2451.113971
min         72.000000
25%        604.000000
50%       1110.000000
75%       2424.000000
max      26330.000000
Name: Accept, dtype: float64
In [15]:
df[['Accept','F.Undergrad']].describe()
Out[15]:
Accept F.Undergrad
count 777.000000 777.000000
mean 2018.804376 3699.907336
std 2451.113971 4850.420531
min 72.000000 139.000000
25% 604.000000 992.000000
50% 1110.000000 1707.000000
75% 2424.000000 4005.000000
max 26330.000000 31643.000000
In [12]:
df['Name'] = df.iloc[:,0]
In [23]:
interested = df.columns
sns.pairplot(data=college[['Accept','F.Undergrad']])
Out[23]:
<seaborn.axisgrid.PairGrid at 0x7f6b20303cf8>
In [24]:
college.columns
Out[24]:
Index(['Unnamed: 0', 'Private', 'Apps', 'Accept', 'Enroll', 'Top10perc',
       'Top25perc', 'F.Undergrad', 'P.Undergrad', 'Outstate', 'Room.Board',
       'Books', 'Personal', 'PhD', 'Terminal', 'S.F.Ratio', 'perc.alumni',
       'Expend', 'Grad.Rate', 'Name'],
      dtype='object')
In [26]:
sns.pairplot(data=college[['Top10perc','Top25perc','Room.Board']]);
In [28]:
sns.scatterplot(x='Top10perc',y='Room.Board',data=college[['Top10perc','Top25perc','Room.Board']]);
In [32]:
sns.boxplot(y="Private", x="Outstate", data=college);
In [36]:
college['Elite'] = (college['Top10perc'] > 50)
college['Acceptance.Rate'] = college['Accept'] / college['Apps']
In [37]:
sns.distplot(college['Acceptance.Rate']);
In [41]:
sns.boxplot(x="Elite", y="Outstate", data=college);
In [47]:
sns.distplot(college['Outstate'], bins=6, kde = False, rug = True);