使用 https://github.com/taowen/es-… 可以用 SQL 进行 elasticsearch 的查询。前面我们使用sum或者avg的输入都是document的字段。如果需要对聚合的结果再次进行sum或者avg也是支持的。
特别注意,这里的语法和常规的SQL聚合的语法是反着来的。SQL的语义是从行,到组,再到更大的组。而Elasticsearch的bucket的概念,是从整体,到细分,再到更细分。所以es-monitor的表达方式是按照elasticsearch的习惯来搞的,bucket不断被子sql细分。然后再被select的projection给聚合回来。
MAX(sum_this_year)
SQL
$ cat << EOF | ./es_query.py http://127.0.0.1:9200
WITH all_symbols AS (SELECT MAX(sum_this_year) AS max_all_times FROM symbol);
SELECT ipo_year, SUM(market_cap) AS sum_this_year FROM all_symbols
GROUP BY ipo_year LIMIT 5
EOF
{"_bucket_path": ["level2"], "max_all_times": 353502983870.0, "ipo_year": 2014, "sum_this_year": 212327524694.0}
{"_bucket_path": ["level2"], "max_all_times": 353502983870.0, "ipo_year": 2015, "sum_this_year": 87950852154.0}
{"_bucket_path": ["level2"], "max_all_times": 353502983870.0, "ipo_year": 2013, "sum_this_year": 138713332228.0}
{"_bucket_path": ["level2"], "max_all_times": 353502983870.0, "ipo_year": 2012, "sum_this_year": 353502983870.0}
{"_bucket_path": ["level2"], "max_all_times": 353502983870.0, "ipo_year": 2011, "sum_this_year": 84547012061.0}
Elasticsearch
{
"aggs": {
"max_all_times": {
"max_bucket": {
"buckets_path": "ipo_year.sum_this_year"
}
},
"ipo_year": {
"terms": {
"field": "ipo_year",
"size": 5
},
"aggs": {
"sum_this_year": {
"sum": {
"field": "market_cap"
}
}
}
}
},
"size": 0
}
注意这里 max 翻译出来的计算方式是 max_bucket 而不是 max
{
"hits": {
"hits": [],
"total": 6714,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 9,
"aggregations": {
"max_all_times": {
"keys": [
"2012"
],
"value": 353502983870.0
},
"ipo_year": {
"buckets": [
{
"sum_this_year": {
"value": 212327524694.0
},
"key": 2014,
"doc_count": 390
},
{
"sum_this_year": {
"value": 87950852154.0
},
"key": 2015,
"doc_count": 334
},
{
"sum_this_year": {
"value": 138713332228.0
},
"key": 2013,
"doc_count": 253
},
{
"sum_this_year": {
"value": 353502983870.0
},
"key": 2012,
"doc_count": 147
},
{
"sum_this_year": {
"value": 84547012061.0
},
"key": 2011,
"doc_count": 144
}
],
"sum_other_doc_count": 1630,
"doc_count_error_upper_bound": 0
}
},
"timed_out": false
}
CSUM(max_adj_close)
累加求和,这个只能在histogram聚合后面使用
SQL
$ cat << EOF | ./es_query.py http://127.0.0.1:9200
SELECT year, MAX(adj_close) AS max_adj_close, CSUM(max_adj_close) FROM quote
WHERE symbol='AAPL' AND "date" < TIMESTAMP '1985-01-01 00:00:00'
GROUP BY date_trunc('year', "date") AS year LIMIT
EOF
{"max_adj_close": 50.0, "CSUM(max_adj_close)": 50.0, "year": "1981-01-01T00:00:00.000+08:00"}
{"max_adj_close": 51.0, "CSUM(max_adj_close)": 101.0, "year": "1982-01-01T00:00:00.000+08:00"}
{"max_adj_close": 94.0, "CSUM(max_adj_close)": 195.0, "year": "1983-01-01T00:00:00.000+08:00"}
{"max_adj_close": 50.0, "CSUM(max_adj_close)": 245.0, "year": "1984-01-01T00:00:00.000+08:00"}
Elasticsearch
{
"query": {
"bool": {
"filter": [
{
"term": {
"symbol": "AAPL"
}
},
{
"range": {
"date": {
"lt": 473356800000
}
}
}
]
}
},
"aggs": {
"year": {
"date_histogram": {
"field": "date",
"interval": "year",
"time_zone": "+08:00"
},
"aggs": {
"max_adj_close": {
"max": {
"field": "adj_close"
}
},
"CSUM(max_adj_close)": {
"cumulative_sum": {
"buckets_path": "max_adj_close"
}
}
}
}
},
"size": 0
}
{
"hits": {
"hits": [],
"total": 943,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 32,
"aggregations": {
"year": {
"buckets": [
{
"max_adj_close": {
"value": 50.0
},
"CSUM(max_adj_close)": {
"value": 50.0
},
"key_as_string": "1981-01-01T00:00:00.000+08:00",
"key": 347126400000,
"doc_count": 185
},
{
"max_adj_close": {
"value": 51.0
},
"CSUM(max_adj_close)": {
"value": 101.0
},
"key_as_string": "1982-01-01T00:00:00.000+08:00",
"key": 378662400000,
"doc_count": 253
},
{
"max_adj_close": {
"value": 94.0
},
"CSUM(max_adj_close)": {
"value": 195.0
},
"key_as_string": "1983-01-01T00:00:00.000+08:00",
"key": 410198400000,
"doc_count": 252
},
{
"max_adj_close": {
"value": 50.0
},
"CSUM(max_adj_close)": {
"value": 245.0
},
"key_as_string": "1984-01-01T00:00:00.000+08:00",
"key": 441734400000,
"doc_count": 253
}
]
}
},
"timed_out": false
}
DERIVATIVE(max_adj_close)
SQL
$ cat << EOF | ./es_query.py http://127.0.0.1:9200
SELECT year, MAX(adj_close) AS max_adj_close, DERIVATIVE(max_adj_close) FROM quote
WHERE symbol='AAPL' AND "date" < TIMESTAMP '1985-01-01 00:00:00'
GROUP BY date_trunc('year', "date") AS year
EOF
{"max_adj_close": 50.0, "DERIVATIVE(max_adj_close)": null, "year": "1981-01-01T00:00:00.000+08:00"}
{"max_adj_close": 51.0, "DERIVATIVE(max_adj_close)": 1.0, "year": "1982-01-01T00:00:00.000+08:00"}
{"max_adj_close": 94.0, "DERIVATIVE(max_adj_close)": 43.0, "year": "1983-01-01T00:00:00.000+08:00"}
{"max_adj_close": 50.0, "DERIVATIVE(max_adj_close)": -44.0, "year": "1984-01-01T00:00:00.000+08:00"}
Elasticsearch
{
"query": {
"bool": {
"filter": [
{
"term": {
"symbol": "AAPL"
}
},
{
"range": {
"date": {
"lt": 473356800000
}
}
}
]
}
},
"aggs": {
"year": {
"date_histogram": {
"field": "date",
"interval": "year",
"time_zone": "+08:00"
},
"aggs": {
"max_adj_close": {
"max": {
"field": "adj_close"
}
},
"DERIVATIVE(max_adj_close)": {
"derivative": {
"buckets_path": "max_adj_close"
}
}
}
}
},
"size": 0
}
{
"hits": {
"hits": [],
"total": 943,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 4,
"aggregations": {
"year": {
"buckets": [
{
"max_adj_close": {
"value": 50.0
},
"key_as_string": "1981-01-01T00:00:00.000+08:00",
"key": 347126400000,
"doc_count": 185
},
{
"max_adj_close": {
"value": 51.0
},
"DERIVATIVE(max_adj_close)": {
"value": 1.0
},
"key_as_string": "1982-01-01T00:00:00.000+08:00",
"key": 378662400000,
"doc_count": 253
},
{
"max_adj_close": {
"value": 94.0
},
"DERIVATIVE(max_adj_close)": {
"value": 43.0
},
"key_as_string": "1983-01-01T00:00:00.000+08:00",
"key": 410198400000,
"doc_count": 252
},
{
"max_adj_close": {
"value": 50.0
},
"DERIVATIVE(max_adj_close)": {
"value": -44.0
},
"key_as_string": "1984-01-01T00:00:00.000+08:00",
"key": 441734400000,
"doc_count": 253
}
]
}
},
"timed_out": false
}