# 结果演示 >>> df.set_index('one') two three one -1.1838021.0 zhangsan 1.4267242.0 李四 -1.5309583.0999 -0.9391474.00.1
>>> df.set_index(['one'],drop=False) one two three one -1.183802 -1.1838021.0 zhangsan 1.4267241.4267242.0 李四 -1.530958 -1.5309583.0999 -0.939147 -0.9391474.00.1
# 结果演示 >>> df A B C D 1 -0.1736900.004300 -0.8961260.360287 21.4937320.784469 -0.799769 -1.828341 a -0.6784920.6136441.8357870.252200 b -1.3473270.134869 -0.595432 -0.533671 2006-10-10.178216 -0.690090 -0.6254361.377065 第六行 -1.534302 -0.1474620.7038470.221454
>>> data=np.random.randn(6,4) >>> df=pd.DataFrame(data,columns=list('ABCD'),index=[1,2,'a','b','2006-10-1','第六行']) >>> df A B C D 1 -0.1736900.004300 -0.8961260.360287 21.4937320.784469 -0.799769 -1.828341 a -0.6784920.6136441.8357870.252200 b -1.3473270.134869 -0.595432 -0.533671 2006-10-10.178216 -0.690090 -0.6254361.377065 第六行 -1.534302 -0.1474620.7038470.221454
2.1 按列读取
2.1.1 df.列名
# 该方法每次只能读取一列。 df.A
# 结果演示 >>> df.A 11.787797 20.098504 a -0.361166 b 0.337533 2006-10-1 -0.628970 第六行 3.356526 Name: A, dtype: float64
# 结果演示 >>> df['A'] 11.787797 20.098504 a -0.361166 b 0.337533 2006-10-1 -0.628970 第六行 3.356526 Name: A, dtype: float64
>>> df[['A','C','D']] A C D 11.787797 -0.6680130.554594 20.098504 -0.4205580.508395 a -0.3611660.423340 -2.039099 b 0.3375330.3783150.485731 2006-10-1 -0.6289701.152818 -0.671454 第六行 3.3565260.854735 -0.768296
# 结果演示 >>> df.loc[[1]] A B C D 11.7877970.366138 -0.6680130.554594
>>> df.loc[[1,'a','2006-10-1']] A B C D 11.7877970.366138 -0.6680130.554594 a -0.361166 -0.4273580.423340 -2.039099 2006-10-1 -0.628970 -1.2194191.152818 -0.671454
>>> df.iloc[[1,2,5]] A B C D 20.098504 -1.332709 -0.4205580.508395 a -0.361166 -0.4273580.423340 -2.039099 第六行 3.356526 -0.9452340.854735 -0.768296
>>> df.iloc[1:5] A B C D 20.098504 -1.332709 -0.4205580.508395 a -0.361166 -0.4273580.423340 -2.039099 b 0.3375331.7702790.3783150.485731 2006-10-1 -0.628970 -1.2194191.152818 -0.671454
2.2.3 .ix
.ix 已经不推荐使用, 所以就不举例分析了.
# 结果演示 >>> df.ix[[1]] __main__:1: DeprecationWarning: .ix is deprecated. Please use .loc for label based indexing or .iloc for positional indexing
# 结果演示 >>> df.loc[1][['A','B']] A 1.787797 B 0.366138 Name: 1, dtype: float64 >>> df.loc[1,['A','B']] A 1.787797 B 0.366138 Name: 1, dtype: float64 >>> df.loc[1]['A':'C'] A 1.787797 B 0.366138 C -0.668013 Name: 1, dtype: float64 >>> df.loc[1,'A':'C'] A 1.787797 B 0.366138 C -0.668013 Name: 1, dtype: float64
# 结果演示 >>> df.loc[[1,2]][['A','C']] A C 11.787797 -0.668013 20.098504 -0.420558 >>> df.loc[[1,'b'],['A','B']] A B 11.7877970.366138 b 0.3375331.770279 >>> df.loc[[1,'b'],'A':'C'] A B C 11.7877970.366138 -0.668013 b 0.3375331.7702790.378315
# 结果演示 >>> df.iloc[0,1:3] B 0.366138 C -0.668013 Name: 1, dtype: float64 >>> df.iloc[0][['A','B']] A 1.787797 B 0.366138 Name: 1, dtype: float64 >>> df.iloc[0]['A':'C'] A 1.787797 B 0.366138 C -0.668013 Name: 1, dtype: float64
# 结果演示 >>> df.iloc[0:3,1:4] B C D 10.366138 -0.6680130.554594 2 -1.332709 -0.4205580.508395 a -0.4273580.423340 -2.039099 >>> df.iloc[0:3, [1,2,3]] B C D 10.366138 -0.6680130.554594 2 -1.332709 -0.4205580.508395 a -0.4273580.423340 -2.039099 >>> df.iloc[1:3][['A','B','C']] A B C 20.098504 -1.332709 -0.420558 a -0.361166 -0.4273580.423340
# 结果演示 >>> df.insert(2,'four',[11,22,33,44]) >>> df one two four three 1 -1.4172221.011 zhangsan 21.2516732.022 李四 3 -0.1037103.033999 4 -1.2377224.0440.1
# 插入一行 row={'one':111,'two':222,'three':333} df.loc[1]=row # or df.iloc[1]=row # or df.ix[1]=row
>>> pd.concat([df,df1],axis=1) one two four three four five 1 -1.4172221.011 zhangsan -0.1197801.0 21.2516732.022 李四 -0.29020234.0 3 -0.1037103.0339991.20903267.0 4 -1.2377224.0440.10.997500100.0 # 若 df2 和 df 的列数不相同, 空缺的行内容会别填充为 NaN
# 按列合并 # 初始化列表df, df2, 用于列合并 >>> data={"one":np.random.randn(4),"two":np.linspace(1,4,4),"three":['zhangsan','李四',999,0.1]} >>> df=pd.DataFrame(data,index=[1,2,3,4]) >>> data={"one":[222,214],"two":np.linspace(11,22,2), "three":['AAA', 'BBB']} >>> df2=pd.DataFrame(data) >>> df2 one two three 022211.0 AAA 121422.0 BBB
>>> pd.concat([df,df2],axis=0) } one two three 10.0349671.0 zhangsan 2 -1.8884952.0 李四 31.0450133.0999 4 -0.3130274.00.1 0222.00000011.0 AAA 1214.00000022.0 BBB # 若 df2 和 df 的列数不相同, 空缺的列内容会别填充为 NaN