使用 https://github.com/taowen/es-monitor 可以用 SQL 进行 elasticsearch 的查询。今天需要做一些最简单的聚合查询
COUNT(*)
SQL
$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select count(*) from quote
EOF
{"count(*)": 20994400}
Elasticsearch
{
"aggs": {},
"size": 0
}
{
"hits": {
"hits": [],
"total": 20994400,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 26,
"timed_out": false
}
这个就不算聚合,只是看了一下最终满足过滤条件的 total hits count。
COUNT(ipo_year)
这个和 COUNT(*) 的区别是 COUNT(ipo_year) 要求字段必须有值才算一个。
$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select count(ipo_year) from symbol
EOF
{"count(ipo_year)": 2898}
Elasticsearch
{
"aggs": {
"count(ipo_year)": {
"value_count": {
"field": "ipo_year"
}
}
},
"size": 0
}
{
"hits": {
"hits": [],
"total": 6714,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 55,
"aggregations": {
"count(ipo_year)": {
"value": 2898
}
},
"timed_out": false
}
Profile
[
{
"query": [
{
"query_type": "MatchAllDocsQuery",
"lucene": "*:*",
"time": "0.3204170000ms",
"breakdown": {
"score": 0,
"create_weight": 10688,
"next_doc": 278660,
"match": 0,
"build_scorer": 31069,
"advance": 0
}
}
],
"rewrite_time": 2279,
"collector": [
{
"name": "MultiCollector",
"reason": "search_multi",
"time": "2.957183000ms",
"children": [
{
"name": "TotalHitCountCollector",
"reason": "search_count",
"time": "0.2319240000ms"
},
{
"name": "ValueCountAggregator: [count(ipo_year)]",
"reason": "aggregation",
"time": "1.999916000ms"
}
]
}
]
}
]
这是我们的第一个聚合例子。可以从profile结果看出来,其实现方式在采集文档的时候加上了ValueCountAggregator统计了字段非空的文档数量。
COUNT(DISTINCT ipo_year)
SQL
$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select count(distinct ipo_year) from symbol
EOF
{"count(distinct ipo_year)": 39}
Elasticsearch
{
"aggs": {
"count(distinct ipo_year)": {
"cardinality": {
"field": "ipo_year"
}
}
},
"size": 0
}
{
"hits": {
"hits": [],
"total": 6714,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 24,
"aggregations": {
"count(distinct ipo_year)": {
"value": 39
}
},
"timed_out": false
}
Profile
[
{
"query": [
{
"query_type": "MatchAllDocsQuery",
"lucene": "*:*",
"time": "0.2033600000ms",
"breakdown": {
"score": 0,
"create_weight": 7501,
"next_doc": 162905,
"match": 0,
"build_scorer": 32954,
"advance": 0
}
}
],
"rewrite_time": 2300,
"collector": [
{
"name": "MultiCollector",
"reason": "search_multi",
"time": "2.438386000ms",
"children": [
{
"name": "TotalHitCountCollector",
"reason": "search_count",
"time": "0.2240230000ms"
},
{
"name": "CardinalityAggregator: [count(distinct ipo_year)]",
"reason": "aggregation",
"time": "1.471620000ms"
}
]
}
]
}
]
这个例子里 ValueCountAggregator 变成了 CardinalityAggregator
SUM(market_cap)
MIN/MAX/AVG/SUM 这几个简单的聚合也是支持的
$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select sum(market_cap) from symbol
EOF
{"sum(market_cap)": 11454155180142.0}
Elasticsearch
{
"aggs": {
"sum(market_cap)": {
"sum": {
"field": "market_cap"
}
}
},
"size": 0
}
{
"hits": {
"hits": [],
"total": 6714,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 15,
"aggregations": {
"sum(market_cap)": {
"value": 11454155180142.0
}
},
"timed_out": false
}
Profile
[
{
"query": [
{
"query_type": "MatchAllDocsQuery",
"lucene": "*:*",
"time": "0.2026870000ms",
"breakdown": {
"score": 0,
"create_weight": 8097,
"next_doc": 163069,
"match": 0,
"build_scorer": 31521,
"advance": 0
}
}
],
"rewrite_time": 2151,
"collector": [
{
"name": "MultiCollector",
"reason": "search_multi",
"time": "2.461247000ms",
"children": [
{
"name": "TotalHitCountCollector",
"reason": "search_count",
"time": "0.3302140000ms"
},
{
"name": "SumAggregator: [sum(market_cap)]",
"reason": "aggregation",
"time": "1.102363000ms"
}
]
}
]
}
]
过滤 + 聚合
SQL
$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select sum(market_cap) from symbol where ipo_year=1998
EOF
{"sum(market_cap)": 107049150786.0}
Elasticsearch
{
"query": {
"term": {
"ipo_year": 1998
}
},
"aggs": {
"sum(market_cap)": {
"sum": {
"field": "market_cap"
}
}
},
"size": 0
}
{
"hits": {
"hits": [],
"total": 56,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 11,
"aggregations": {
"sum(market_cap)": {
"value": 107049150786.0
}
},
"timed_out": false
}
Profile
[
{
"query": [
{
"query_type": "TermQuery",
"lucene": "ipo_year:`N",
"time": "0.4526400000ms",
"breakdown": {
"score": 0,
"create_weight": 220579,
"next_doc": 159412,
"match": 0,
"build_scorer": 72649,
"advance": 0
}
}
],
"rewrite_time": 3750,
"collector": [
{
"name": "MultiCollector",
"reason": "search_multi",
"time": "0.2203470000ms",
"children": [
{
"name": "TotalHitCountCollector",
"reason": "search_count",
"time": "0.009478000000ms"
},
{
"name": "SumAggregator: [sum(market_cap)]",
"reason": "aggregation",
"time": "0.1557820000ms"
}
]
}
]
}
]
query 过滤完,然后再计算 aggs