使用 https://github.com/taowen/es-monitor 可以用 SQL 进行 elasticsearch 的查询。下钻未必一定是GROUP BY,每记过一层GROUP BY,桶的数量就会增加一层。还有一种下钻是用 filter 来下钻,这种下钻方式会使下一层的 match的文档数量变少,但是桶的数量不变。
SELECT INSIDE WHERE ipo_year=2000
SQL
$ cat << EOF | ./es_query.py http://127.0.0.1:9200
WITH all_symbols AS (SELECT MAX(market_cap) AS max_all_times FROM symbol);
WITH year_2000 AS (SELECT MAX(market_cap) AS max_at_2000 FROM all_symbols
WHERE ipo_year=2000)
EOF
{"max_at_2000": 20310000000.0, "max_all_times": 522690000000.0}
可以看到桶的数量还是同一个,只是增加了一个max_at_2000结果字段。
Elasticsearch
{
"aggs": {
"year_2000": {
"filter": {
"term": {
"ipo_year": 2000
}
},
"aggs": {
"max_at_2000": {
"max": {
"field": "market_cap"
}
}
}
},
"max_all_times": {
"max": {
"field": "market_cap"
}
}
},
"size": 0
}
{
"hits": {
"hits": [],
"total": 6714,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 3,
"aggregations": {
"year_2000": {
"max_at_2000": {
"value": 20310000000.0
},
"doc_count": 58
},
"max_all_times": {
"value": 522690000000.0
}
},
"timed_out": false
}
Profile
[
{
"query": [
{
"query_type": "TermQuery",
"lucene": "ipo_year:`P",
"time": "0.3492430000ms",
"breakdown": {
"score": 0,
"create_weight": 220149,
"next_doc": 0,
"match": 0,
"build_scorer": 95037,
"advance": 34057
}
},
{
"query_type": "MatchAllDocsQuery",
"lucene": "*:*",
"time": "0.1883710000ms",
"breakdown": {
"score": 0,
"create_weight": 3980,
"next_doc": 169730,
"match": 0,
"build_scorer": 14661,
"advance": 0
}
}
],
"rewrite_time": 3158,
"collector": [
{
"name": "MultiCollector",
"reason": "search_multi",
"time": "8.789594000ms",
"children": [
{
"name": "TotalHitCountCollector",
"reason": "search_count",
"time": "0.2768050000ms"
},
{
"name": "BucketCollector: [[year_2000, max_all_times]]",
"reason": "aggregation",
"time": "7.667765000ms"
}
]
}
]
}
]
从Profile的结果来看,其实是一次性做了两个查询,一个是全部文档,一个是ipo_year=2000的文档,然后统一聚合。
SELECT INSIDE 往不同方向过滤下钻
SQL
$ cat << EOF | ./es_query.py http://127.0.0.1:9200
WITH all_symbols AS (SELECT MAX(market_cap) AS max_all_times FROM symbol);
WITH year_2000 AS (SELECT MAX(market_cap) AS max_at_2000 FROM all_symbols
WHERE ipo_year=2000);
WITH year_2001 AS (SELECT MAX(market_cap) AS max_at_2001 FROM all_symbols
WHERE ipo_year=2001)
EOF
这个写法其实和 CASE WHEN 很类似,但是其表达能力更强大,更灵活。
$ cat << EOF | ./es_query.py http://127.0.0.1:9200
SELECT per_ipo_year, MAX(market_cap) AS max_all_times FROM symbol
GROUP BY CASE
WHEN ipo_year=2000 THEN 'year_2000'
WHEN ipo_year=2001 THEN 'year_2001'
END AS per_ipo_year
EOF
上面的sql查询结果是
{"max_at_2000": 20310000000.0, "max_all_times": 522690000000.0, "max_at_2001": 8762940000.0}
Elasticsearch
{
"aggs": {
"year_2001": {
"filter": {
"term": {
"ipo_year": 2001
}
},
"aggs": {
"max_at_2001": {
"max": {
"field": "market_cap"
}
}
}
},
"year_2000": {
"filter": {
"term": {
"ipo_year": 2000
}
},
"aggs": {
"max_at_2000": {
"max": {
"field": "market_cap"
}
}
}
},
"max_all_times": {
"max": {
"field": "market_cap"
}
}
},
"size": 0
}
{
"hits": {
"hits": [],
"total": 6714,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 2,
"aggregations": {
"year_2001": {
"max_at_2001": {
"value": 8762940000.0
},
"doc_count": 38
},
"year_2000": {
"max_at_2000": {
"value": 20310000000.0
},
"doc_count": 58
},
"max_all_times": {
"value": 522690000000.0
}
},
"timed_out": false
}
Profile
[
{
"query": [
{
"query_type": "TermQuery",
"lucene": "ipo_year:`Q",
"time": "0.2518270000ms",
"breakdown": {
"score": 0,
"create_weight": 186032,
"next_doc": 0,
"match": 0,
"build_scorer": 48664,
"advance": 17131
}
},
{
"query_type": "TermQuery",
"lucene": "ipo_year:`P",
"time": "0.1200760000ms",
"breakdown": {
"score": 0,
"create_weight": 77254,
"next_doc": 0,
"match": 0,
"build_scorer": 25184,
"advance": 17638
}
},
{
"query_type": "MatchAllDocsQuery",
"lucene": "*:*",
"time": "0.1968800000ms",
"breakdown": {
"score": 0,
"create_weight": 3573,
"next_doc": 180136,
"match": 0,
"build_scorer": 13171,
"advance": 0
}
}
],
"rewrite_time": 4250,
"collector": [
{
"name": "MultiCollector",
"reason": "search_multi",
"time": "2.459413000ms",
"children": [
{
"name": "TotalHitCountCollector",
"reason": "search_count",
"time": "0.2160950000ms"
},
{
"name": "BucketCollector: [[year_2001, year_2000, max_all_times]]",
"reason": "aggregation",
"time": "1.455703000ms"
}
]
}
]
}
]
SELECT INSIDE 往同一方向连续过滤下钻
SQL
$ cat << EOF | ./es_query.py http://127.0.0.1:9200
WITH SELECT MAX(market_cap) AS max_all_times FROM symbol AS all_symbols;
WITH SELECT MAX(market_cap) AS max_at_2000 FROM all_symbols
WHERE ipo_year=2000 AS year_2000;
WITH SELECT MAX(market_cap) AS max_at_2001_finance FROM year_2000
WHERE sector='Finance' AS year_2000_finance
EOF
{"max_at_2000": 20310000000.0, "max_all_times": 522690000000.0, "max_at_2001_finance": 985668354.0}
Elasticsearch
{
"aggs": {
"year_2000": {
"filter": {
"term": {
"ipo_year": 2000
}
},
"aggs": {
"max_at_2000": {
"max": {
"field": "market_cap"
}
},
"year_2000_finance": {
"filter": {
"term": {
"sector": "Finance"
}
},
"aggs": {
"max_at_2001_finance": {
"max": {
"field": "market_cap"
}
}
}
}
}
},
"max_all_times": {
"max": {
"field": "market_cap"
}
}
},
"size": 0
}
{
"hits": {
"hits": [],
"total": 6714,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 2,
"aggregations": {
"year_2000": {
"max_at_2000": {
"value": 20310000000.0
},
"year_2000_finance": {
"max_at_2001_finance": {
"value": 985668354.0
},
"doc_count": 2
},
"doc_count": 58
},
"max_all_times": {
"value": 522690000000.0
}
},
"timed_out": false
}
Profile
[
{
"query": [
{
"query_type": "TermQuery",
"lucene": "ipo_year:`P",
"time": "0.1897790000ms",
"breakdown": {
"score": 0,
"create_weight": 145762,
"next_doc": 0,
"match": 0,
"build_scorer": 26216,
"advance": 17801
}
},
{
"query_type": "TermQuery",
"lucene": "sector:Finance",
"time": "0.2380290000ms",
"breakdown": {
"score": 0,
"create_weight": 57770,
"next_doc": 0,
"match": 0,
"build_scorer": 55497,
"advance": 124762
}
},
{
"query_type": "MatchAllDocsQuery",
"lucene": "*:*",
"time": "0.1965630000ms",
"breakdown": {
"score": 0,
"create_weight": 3500,
"next_doc": 178347,
"match": 0,
"build_scorer": 14716,
"advance": 0
}
}
],
"rewrite_time": 4190,
"collector": [
{
"name": "MultiCollector",
"reason": "search_multi",
"time": "2.466917000ms",
"children": [
{
"name": "TotalHitCountCollector",
"reason": "search_count",
"time": "0.2712430000ms"
},
{
"name": "BucketCollector: [[year_2000, max_all_times]]",
"reason": "aggregation",
"time": "1.370663000ms"
}
]
}
]
}
]
有了 GROUP BY 下钻和 FILTER 下钻,很多复杂的查询可以一条就查询出来。而这种边下钻边聚合指标的查询能力甚至是传统SQL都不具备的。而且稍微训练一下,就会非常习惯这种下钻的思维方式,写查询也会很自然。