我想使用格式删除一些列.(想要删除列:new_cost0,new_0_quantity,new_2_cost和new_2_quantity)但不是每个列都被删除.以下是数据框和代码.
数据帧
|new_0_cost|new_0_quantity|new_2_cost|new_2_quantity|quality|weights|
0| 10 | 20 | 10 | 20 | good | 40 |
功能
def drop_cost_and_quan(data):
# data is a dataframe described above
# try to drop new_cost0, new_0_quantity, new_2_cost, and new_2_quantity
data3 = data.copy()
for i, item in enumerate(data3.columns):
if item == 'new_{0}_cost'.format(i):
data3 = data3.drop(item, axis=1)
print('cost:',item == 'new_{0}_cost'.format(i))
for i, item in enumerate(data3.columns):
if item == 'new_{0}_quantity'.format(i):
data3 = data3.drop(item, axis=1)
print(item == 'item_{0}_quantity'.format(i))
return data3
Outptut:
data3 = drop_cost_and_quan(data):
cost: True
cost: False
cost: True
cost: False
cost: False
cost: False
quntity: True
quntity: False
quntity: False
quntity: False
data3
|new_2_quantity|quality| weights|
0| 20 |good |40
最佳答案 或者@ vinod的方法你也可以这样做:
In [148]: df
Out[148]:
new_0_cost new_0_quantity new_2_cost new_2_quantity new_0_total_cost new_2_total_cost quality weights
0 10 20 10 20 1111 2222 good 40
In [151]: df.drop(df.columns[df.columns.str.contains(r'^new_\d+_(?:quantity|cost)')], 1, inplace=True)
In [152]: df
Out[152]:
new_0_total_cost new_2_total_cost quality weights
0 1111 2222 good 40
说明:
In [148]: df
Out[148]:
new_0_cost new_0_quantity new_2_cost new_2_quantity new_0_total_cost new_2_total_cost quality weights
0 10 20 10 20 1111 2222 good 40
In [149]: df.columns.str.contains(r'^new_\d+_(?:quantity|cost)')
Out[149]: array([ True, True, True, True, False, False, False, False], dtype=bool)
In [150]: df.columns[df.columns.str.contains(r'^new_\d+_(?:quantity|cost)')]
Out[150]: Index(['new_0_cost', 'new_0_quantity', 'new_2_cost', 'new_2_quantity'], dtype='object')