/ Python And R Data science skills: 51 Groupby In numpy

Monday 5 February 2018

51 Groupby In numpy

51 Groupby In numpy
In [1]:
import pandas as pd
In [4]:
dmc = {'Company':['Hyderabad','Hyderabad','Pune','Pune','Chennai','Chennai'],
       'Person':['ramesh','Venkat','Praveen','Prasad','Nani','Kiran'],
       'Sales':[500,1520,440,334,273,850]}
In [5]:
dmc
Out[5]:
{'Company': ['Hyderabad', 'Hyderabad', 'Pune', 'Pune', 'Chennai', 'Chennai'],
 'Person': ['ramesh', 'Venkat', 'Praveen', 'Prasad', 'Nani', 'Kiran'],
 'Sales': [500, 1520, 440, 334, 273, 850]}
In [6]:
df=pd.DataFrame(dmc)
In [7]:
df
Out[7]:
Company Person Sales
0 Hyderabad ramesh 500
1 Hyderabad Venkat 1520
2 Pune Praveen 440
3 Pune Prasad 334
4 Chennai Nani 273
5 Chennai Kiran 850
In [8]:
df.groupby('Company').mean()
Out[8]:
Sales
Company
Chennai 561.5
Hyderabad 1010.0
Pune 387.0
In [9]:
df.groupby('Company')
Out[9]:
<pandas.core.groupby.DataFrameGroupBy object at 0x0000000007F7AF28>
In [10]:
gr=df.groupby('Company')
In [11]:
gr.mean()
Out[11]:
Sales
Company
Chennai 561.5
Hyderabad 1010.0
Pune 387.0
In [12]:
gr.max()
Out[12]:
Person Sales
Company
Chennai Nani 850
Hyderabad ramesh 1520
Pune Praveen 440
In [13]:
gr.min()
Out[13]:
Person Sales
Company
Chennai Kiran 273
Hyderabad Venkat 500
Pune Prasad 334
In [14]:
gr.count()
Out[14]:
Person Sales
Company
Chennai 2 2
Hyderabad 2 2
Pune 2 2
In [22]:
gr.describe()
Out[22]:
Sales
count mean std min 25% 50% 75% max
Company
Chennai 2.0 561.5 408.000613 273.0 417.25 561.5 705.75 850.0
Hyderabad 2.0 1010.0 721.248917 500.0 755.00 1010.0 1265.00 1520.0
Pune 2.0 387.0 74.953319 334.0 360.50 387.0 413.50 440.0
In [16]:
gr.describe().transpose()
Out[16]:
Company Chennai Hyderabad Pune
Sales count 2.000000 2.000000 2.000000
mean 561.500000 1010.000000 387.000000
std 408.000613 721.248917 74.953319
min 273.000000 500.000000 334.000000
25% 417.250000 755.000000 360.500000
50% 561.500000 1010.000000 387.000000
75% 705.750000 1265.000000 413.500000
max 850.000000 1520.000000 440.000000
In [23]:
gr.describe().transpose()
Out[23]:
Company Chennai Hyderabad Pune
Sales count 2.000000 2.000000 2.000000
mean 561.500000 1010.000000 387.000000
std 408.000613 721.248917 74.953319
min 273.000000 500.000000 334.000000
25% 417.250000 755.000000 360.500000
50% 561.500000 1010.000000 387.000000
75% 705.750000 1265.000000 413.500000
max 850.000000 1520.000000 440.000000
In [3]:
import pandas as pd

ipl_data = {'Team': ['Riders', 'Riders', 'Devils', 'Devils', 'Kings',
         'kings', 'Kings', 'Kings', 'Riders', 'Royals', 'Royals', 'Riders'],
         'Rank': [1, 2, 2, 3, 3,4 ,1 ,1,2 , 4,1,2],
         'Year': [2014,2015,2014,2015,2014,2015,2016,2017,2016,2014,2015,2017],
         'Points':[876,789,863,673,741,812,756,788,694,701,804,690]}
df = pd.DataFrame(ipl_data)

print (df)
    Points  Rank    Team  Year
0      876     1  Riders  2014
1      789     2  Riders  2015
2      863     2  Devils  2014
3      673     3  Devils  2015
4      741     3   Kings  2014
5      812     4   kings  2015
6      756     1   Kings  2016
7      788     1   Kings  2017
8      694     2  Riders  2016
9      701     4  Royals  2014
10     804     1  Royals  2015
11     690     2  Riders  2017
In [4]:
df
Out[4]:
Points Rank Team Year
0 876 1 Riders 2014
1 789 2 Riders 2015
2 863 2 Devils 2014
3 673 3 Devils 2015
4 741 3 Kings 2014
5 812 4 kings 2015
6 756 1 Kings 2016
7 788 1 Kings 2017
8 694 2 Riders 2016
9 701 4 Royals 2014
10 804 1 Royals 2015
11 690 2 Riders 2017
In [5]:
df.groupby('Team')
Out[5]:
<pandas.core.groupby.DataFrameGroupBy object at 0x0000000005AFF400>
In [6]:
 df.groupby('Team').groups
Out[6]:
{'Devils': Int64Index([2, 3], dtype='int64'),
 'Kings': Int64Index([4, 6, 7], dtype='int64'),
 'Riders': Int64Index([0, 1, 8, 11], dtype='int64'),
 'Royals': Int64Index([9, 10], dtype='int64'),
 'kings': Int64Index([5], dtype='int64')}

No comments:

Post a Comment