pandas统计数据项重复值次数和删除
from pandas import DataFrame
df = DataFrame({ 'key1':['a','a','b','b','a','a'],
'key2':['one','two','one','two','one','one'],
'data1':[1,2,3,2,1,1],
# 'data2':np.random.randn(5)
})
# 打印数据框
print(df)
# data1 key1 key2
# 0 1 a one
# 1 2 a two
# 2 3 b one
# 3 2 b two
# 4 1 a one
# 5 1 a one
# 重复项
print(df[df.duplicated()])
# data1 key1 key2
# 4 1 a one
# 5 1 a one
# 统计重复值
dup=df[df.duplicated()].count()
print(dup) # 最后两项重复
# data1 2
# key1 2
# key2 2
# 去除重复项
nodup=df[-df.duplicated()]
print(nodup)
# data1 key1 key2
# 0 1 a one
# 1 2 a two
# 2 3 b one
# 3 2 b two