In [6]:
import pandas as pd
sal = pd.read_csv('Salaries.csv')
What was the average (mean) BasePay of all employees per year? (2011-2014) ?
sal.groupby('Year').mean()['BasePay']¶
sal['BasePay']
In [12]:
sal.groupby('Year').mean()['BasePay']
Out[12]:
In [16]:
len(sal['JobTitle'].unique())
Out[16]:
In [17]:
sal['JobTitle'].nunique()
Out[17]:
In [19]:
sal[ sal['year']== 2013][JobTitle'].value_counts().head(5)
Out[19]:
How many Job Titles were represented by only one person in 2013? (e.g. Job Titles with only one occurence in 2013?)
##### sum(sal[sal['Year']==2013]['JobTitle'].value_counts() == 1)¶
In [22]:
sum(sal[sal['Year']==2013]['JobTitle'].value_counts()==1)
Out[22]:
How many people have the word Chief in their job title? (This is pretty tricky)
In [26]:
def chie(title):
if 'chief' in title.lower():
return True
else:
return False
sum(sal['JobTitle'].apply(lambda x: chie(x)))
Out[26]:
In [27]:
Out[27]:
In [28]:
def chie(title):
if 'chief' in title.lower():
return True
else:
return False
def ram(x):
if 'chief' in x.lower():
return True
else:
return False
In [33]:
ram("kumar ChIef ramesh")
Out[33]:
In [35]:
sum(sal['JobTitle'].apply(lambda x: ram(x)))
Out[35]:
In [37]:
sal['title_len'] = sal['JobTitle'].apply(len)
In [76]:
#sal[["TotalPayBenefits",'title_len']]
sal[['title_len','TotalPayBenefits']].corr() # No correlation.
Out[76]:
No comments:
Post a Comment