统计每个分组里的最大最小记录
mongo实现
{
"_id" : "01001",
"city" : "AGAWAM",
"pop" : 15338,
"state" : "MA"
}
完整json见: http://media.mongodb.org/zips…
需求
得到每个state拥有最多人口的城市和拥有最小人口的城市以及对应的人口数
db.zipcodes.aggregate(
{$group: {_id:{state:"$state",city:"$city"}, popPerCity:{$sum:"$pop"} } },
{$sort: {popPerCity:1} },
{$group: {
_id:"$_id.state",
biggestCity:{$last:"$_id.city"},
biggestPop: {$last:"$popPerCity"},
smallestCity: {$first:"$_id.city"},
smallestPop: {$first:"$popPerCity"}
}}
)
效果
{ "_id" : "DE", "biggestCity" : "NEWARK", "biggestPop" : 111674, "smallestCity" : "BETHEL", "smallestPop" : 108 }
{ "_id" : "MS", "biggestCity" : "JACKSON", "biggestPop" : 204788, "smallestCity" : "CHUNKY", "smallestPop" : 79 }
...
见: https://docs.mongodb.com/manu…
Mysql实现
相比mongo的直观 就要绕很多了
方案一
# 需要设置一个较大值 默认的1024还不够用
SET SESSION group_concat_max_len = 20480;
select state, substring_index(group_concat(city order by pop ),",",1) smallestCity, min(pop),substring_index(group_concat(city order by pop ),",",-1) biggestCity, max(pop) from (select state, city, sum(pop) pop from zipcode group by state, city) a group by state ;
参考
https://dev.mysql.com/doc/ref…
https://dev.mysql.com/doc/ref…
方案二
# 每个state分组里面分别按pop升序、降序排序 人为分配一个序号 均取序号一就得到了该分组的起止记录
select b.state, b.city smallestCity, b.pop smallestPop, c.city biggestCity, c.pop biggestPop from
( select state,city,pop,@rank:=if(@current_state=state, @rank+1, 1) rank, @current_state:=state from
(select state, city, sum(pop) pop from zipcode group by state, city) a,
(select @current_state:=NULL, @rank:=NULL) vars
order by a.state,a.pop
) b ,
( select state,city,pop,@rank:=if(@current_state=state, @rank+1, 1) rank, @current_state:=state from
(select state, city, sum(pop) pop from zipcode group by state, city) a,
(select @current_state:=NULL, @rank:=NULL) vars
order by a.state,a.pop desc
) c
where b.state = c.state and b.rank = 1 and c.rank = 1
补充
建表语句
CREATE TABLE `zipcode` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`zipcode` varchar(10) NOT NULL,
`city` varchar(30) NOT NULL,
`pop` int(11) NOT NULL,
`state` varchar(5) NOT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `zipcode` (`zipcode`),
KEY `idx_state_city` (`state`,`city`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8
json ==> batch insert sql
jq -c '[._id, .city, .pop, .state]' zips.json | sed 's/\[\(.*\)\]$/\1/' | awk -F, '{print "insert into zipcode select null," $1"," $2","$3","$4";"}'