python – Pandas SQL相当于update by group by

尽管如此,我找不到正确的方法来获得在pandas中运行的等效查询.

update product
  set maxrating = (select max(rating)
                   from rating
                   where source = 'customer'
                     and product.sku = rating.sku
                   group by sku)
  where maxrating is null;

熊猫

product = pd.DataFrame({'sku':[1,2,3],'maxrating':[0,0,1]})
rating = pd.DataFrame({'sku':[1,1,2,3,3],'rating':[2,5,3,5,4],'source':['retailer','customer','customer','retailer','customer']})
expected_result = pd.DataFrame({'sku':[1,2,3],'maxrating':[5,3,1]})

SQL

drop table if exists product;
create table product(sku integer primary key, maxrating int);
insert into product(maxrating) values(null),(null),(1);
drop table if exists rating; create table rating(sku int, rating int, source text);
insert into rating values(1,2,'retailer'),(1,5,'customer'),(2,3,'customer'),(2,5,'retailer'),(3,3,'retailer'),(3,4,'customer');
update product
  set maxrating = (select max(rating)
                   from rating
                   where source = 'customer'
                     and product.sku = rating.sku
                   group by sku)
  where maxrating is null;
select *
from product;

怎么做到呢?

最佳答案 试试这个:

In [220]: product.ix[product.maxrating == 0, 'maxrating'] = product.sku.map(rating.groupby('sku')['rating'].max())

In [221]: product
Out[221]:
   maxrating  sku
0          5    1
1          3    2
2          1    3

或使用普通面具:

In [222]: mask = (product.maxrating == 0)

In [223]: product.ix[mask, 'maxrating'] = product.ix[mask, 'maxrating'].map(rating.groupby('sku')['rating'].max())

In [224]: product
Out[224]:
   maxrating  sku
0          5    1
1          3    2
2          1    3
点赞