/ Python And R Data science skills: 53 Python Pandas Merging

Monday, 5 February 2018

53 Python Pandas Merging

53 Python Pandas - Merging
In [1]:
import pandas as pd
In [22]:
# import the pandas library
import pandas as pd
left1 = pd.DataFrame({
         'id':[1,2,3,4,5],
         'Name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'],
         'subject_id':['sub1','sub2','sub4','sub6','sub5']})
right1 = pd.DataFrame(
         {'id':[1,2,3,4,5],
         'Name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'],
         'subject_id':['sub2','sub4','sub3','sub6','sub5']})
print (left)
print (right)
     Name  id subject_id
0    Alex   1       sub1
1     Amy   2       sub2
2   Allen   3       sub4
3   Alice   4       sub6
4  Ayoung   5       sub5
    Name  id subject_id
0  Billy   1       sub2
1  Brian   2       sub4
2   Bran   3       sub3
3  Bryce   4       sub6
4  Betty   5       sub5
In [13]:
#left
In [14]:
#right
In [23]:
pd.merge(left1,right1,on='id')
Out[23]:
Name_x id subject_id_x Name_y subject_id_y
0 Alex 1 sub1 Billy sub2
1 Amy 2 sub2 Brian sub4
2 Allen 3 sub4 Bran sub3
3 Alice 4 sub6 Bryce sub6
4 Ayoung 5 sub5 Betty sub5
In [24]:
pd.merge(left1,right1,on=['id','subject_id'])
Out[24]:
Name_x id subject_id Name_y
0 Alice 4 sub6 Bryce
1 Ayoung 5 sub5 Betty
In [25]:
pd.merge(left1, right1, on='subject_id', how='left')
Out[25]:
Name_x id_x subject_id Name_y id_y
0 Alex 1 sub1 NaN NaN
1 Amy 2 sub2 Billy 1.0
2 Allen 3 sub4 Brian 2.0
3 Alice 4 sub6 Bryce 4.0
4 Ayoung 5 sub5 Betty 5.0
In [19]:
pd.merge(left, right, on='subject_id', how='right')
Out[19]:
Name_x id_x subject_id Name_y id_y
0 Amy 2.0 sub2 Billy 1
1 Allen 3.0 sub4 Brian 2
2 Alice 4.0 sub6 Bryce 4
3 Ayoung 5.0 sub5 Betty 5
4 NaN NaN sub3 Bran 3
In [26]:
pd.merge(left, right, how='outer', on='subject_id',sort=True)
Out[26]:
Name_x id_x subject_id Name_y id_y
0 Alex 1.0 sub1 NaN NaN
1 Amy 2.0 sub2 Billy 1.0
2 NaN NaN sub3 Bran 3.0
3 Allen 3.0 sub4 Brian 2.0
4 Ayoung 5.0 sub5 Betty 5.0
5 Alice 4.0 sub6 Bryce 4.0
In [21]:
pd.merge(left, right, on='subject_id', how='inner')
Out[21]:
Name_x id_x subject_id Name_y id_y
0 Amy 2 sub2 Billy 1
1 Allen 3 sub4 Brian 2
2 Alice 4 sub6 Bryce 4
3 Ayoung 5 sub5 Betty 5

No comments:

Post a Comment