/ Python And R Data science skills: 49 multi index data frames

Monday 5 February 2018

49 multi index data frames

49 multi index data frames
In [14]:
import pandas as pd
import numpy as np
from numpy.random import randn
np.random.seed(3)
m1=randn(6,2)
In [6]:
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside))
In [7]:
hier_index
Out[7]:
[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2', 3)]
In [9]:
hier_index1 = pd.MultiIndex.from_tuples(hier_index)
In [19]:
hier_index1
Out[19]:
MultiIndex(levels=[['G1', 'G2'], [1, 2, 3]],
           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])
In [15]:
df = pd.DataFrame(m1,index=hier_index1,columns=['A','B'])
In [20]:
df
Out[20]:
A B
G1 1 1.788628 0.436510
2 0.096497 -1.863493
3 -0.277388 -0.354759
G2 1 -0.082741 -0.627001
2 -0.043818 -0.477218
3 -1.313865 0.884622
In [21]:
df.loc['G1']
Out[21]:
A B
1 1.788628 0.436510
2 0.096497 -1.863493
3 -0.277388 -0.354759
In [18]:
df.loc['G1'].loc[1]
Out[18]:
A    1.788628
B    0.436510
Name: 1, dtype: float64
In [23]:
df.index.names=["gr","nm"]
In [24]:
df
Out[24]:
A B
gr nm
G1 1 1.788628 0.436510
2 0.096497 -1.863493
3 -0.277388 -0.354759
G2 1 -0.082741 -0.627001
2 -0.043818 -0.477218
3 -1.313865 0.884622
In [27]:
df.xs(["G1",1])
Out[27]:
A    1.788628
B    0.436510
Name: (G1, 1), dtype: float64
In [30]:
df.xs(1,level='nm')
Out[30]:
A B
gr
G1 1.788628 0.436510
G2 -0.082741 -0.627001

No comments:

Post a Comment