使用 https://github.com/taowen/es-monitor 可以用 SQL 进行 elasticsearch 的查询。按已有字段来分桶是最简单的一种分桶的方式。很多时候我们希望用于分桶的key是需要先经过计算而来的。其中最简单的一种计算方式是按区间段来算histogram。用于计算的字段可以是时间戳,也可能是数值。
GROUP BY DATE_TRUNC(‘year’,”date”)
SQL(其中”date”是指一个列的名字,之所以要加引号是因为date是关键字)
$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select year, max(adj_close) from quote where symbol='AAPL' group by date_trunc('year',"date") as year
EOF
{"max(adj_close)": 50.0, "year": "1981-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 51.0, "year": "1982-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 94.0, "year": "1983-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 50.0, "year": "1984-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 46.0, "year": "1985-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 66.0, "year": "1986-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 179.0, "year": "1987-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 143.0, "year": "1988-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 152.0, "year": "1989-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 147.0, "year": "1990-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 228.0, "year": "1991-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 221.0, "year": "1992-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 207.0, "year": "1993-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 141.0, "year": "1994-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 162.0, "year": "1995-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 115.0, "year": "1996-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 96.0, "year": "1997-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 142.0, "year": "1998-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 389.0, "year": "1999-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 476.0, "year": "2000-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 175.0, "year": "2001-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 172.0, "year": "2002-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 164.0, "year": "2003-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 452.0, "year": "2004-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 991.0, "year": "2005-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 1214.0, "year": "2006-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 2643.0, "year": "2007-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 2578.0, "year": "2008-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 2799.0, "year": "2009-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 4305.0, "year": "2010-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 5586.0, "year": "2011-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 9328.0, "year": "2012-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 7800.0, "year": "2013-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 11637.0, "year": "2014-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 13067.0, "year": "2015-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 10478.0, "year": "2016-01-01T00:00:00.000+08:00"}
Elasticsearch
{
"query": {
"term": {
"symbol": "AAPL"
}
},
"aggs": {
"year": {
"date_histogram": {
"field": "date",
"interval": "year",
"time_zone": "+08:00"
},
"aggs": {
"max(adj_close)": {
"max": {
"field": "adj_close"
}
}
}
}
},
"size": 0
}
{
"hits": {
"hits": [],
"total": 8790,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 6,
"aggregations": {
"year": {
"buckets": [
{
"max(adj_close)": {
"value": 50.0
},
"key_as_string": "1981-01-01T00:00:00.000+08:00",
"key": 347126400000,
"doc_count": 185
},
{
"max(adj_close)": {
"value": 51.0
},
"key_as_string": "1982-01-01T00:00:00.000+08:00",
"key": 378662400000,
"doc_count": 253
},
{
"max(adj_close)": {
"value": 94.0
},
"key_as_string": "1983-01-01T00:00:00.000+08:00",
"key": 410198400000,
"doc_count": 252
},
{
"max(adj_close)": {
"value": 50.0
},
"key_as_string": "1984-01-01T00:00:00.000+08:00",
"key": 441734400000,
"doc_count": 253
},
{
"max(adj_close)": {
"value": 46.0
},
"key_as_string": "1985-01-01T00:00:00.000+08:00",
"key": 473356800000,
"doc_count": 253
},
{
"max(adj_close)": {
"value": 66.0
},
"key_as_string": "1986-01-01T00:00:00.000+08:00",
"key": 504892800000,
"doc_count": 253
},
{
"max(adj_close)": {
"value": 179.0
},
"key_as_string": "1987-01-01T00:00:00.000+08:00",
"key": 536428800000,
"doc_count": 253
},
{
"max(adj_close)": {
"value": 143.0
},
"key_as_string": "1988-01-01T00:00:00.000+08:00",
"key": 567964800000,
"doc_count": 253
},
{
"max(adj_close)": {
"value": 152.0
},
"key_as_string": "1989-01-01T00:00:00.000+08:00",
"key": 599587200000,
"doc_count": 252
},
{
"max(adj_close)": {
"value": 147.0
},
"key_as_string": "1990-01-01T00:00:00.000+08:00",
"key": 631123200000,
"doc_count": 253
},
{
"max(adj_close)": {
"value": 228.0
},
"key_as_string": "1991-01-01T00:00:00.000+08:00",
"key": 662659200000,
"doc_count": 253
},
{
"max(adj_close)": {
"value": 221.0
},
"key_as_string": "1992-01-01T00:00:00.000+08:00",
"key": 694195200000,
"doc_count": 254
},
{
"max(adj_close)": {
"value": 207.0
},
"key_as_string": "1993-01-01T00:00:00.000+08:00",
"key": 725817600000,
"doc_count": 253
},
{
"max(adj_close)": {
"value": 141.0
},
"key_as_string": "1994-01-01T00:00:00.000+08:00",
"key": 757353600000,
"doc_count": 252
},
{
"max(adj_close)": {
"value": 162.0
},
"key_as_string": "1995-01-01T00:00:00.000+08:00",
"key": 788889600000,
"doc_count": 252
},
{
"max(adj_close)": {
"value": 115.0
},
"key_as_string": "1996-01-01T00:00:00.000+08:00",
"key": 820425600000,
"doc_count": 254
},
{
"max(adj_close)": {
"value": 96.0
},
"key_as_string": "1997-01-01T00:00:00.000+08:00",
"key": 852048000000,
"doc_count": 253
},
{
"max(adj_close)": {
"value": 142.0
},
"key_as_string": "1998-01-01T00:00:00.000+08:00",
"key": 883584000000,
"doc_count": 252
},
{
"max(adj_close)": {
"value": 389.0
},
"key_as_string": "1999-01-01T00:00:00.000+08:00",
"key": 915120000000,
"doc_count": 252
},
{
"max(adj_close)": {
"value": 476.0
},
"key_as_string": "2000-01-01T00:00:00.000+08:00",
"key": 946656000000,
"doc_count": 252
},
{
"max(adj_close)": {
"value": 175.0
},
"key_as_string": "2001-01-01T00:00:00.000+08:00",
"key": 978278400000,
"doc_count": 248
},
{
"max(adj_close)": {
"value": 172.0
},
"key_as_string": "2002-01-01T00:00:00.000+08:00",
"key": 1009814400000,
"doc_count": 252
},
{
"max(adj_close)": {
"value": 164.0
},
"key_as_string": "2003-01-01T00:00:00.000+08:00",
"key": 1041350400000,
"doc_count": 252
},
{
"max(adj_close)": {
"value": 452.0
},
"key_as_string": "2004-01-01T00:00:00.000+08:00",
"key": 1072886400000,
"doc_count": 252
},
{
"max(adj_close)": {
"value": 991.0
},
"key_as_string": "2005-01-01T00:00:00.000+08:00",
"key": 1104508800000,
"doc_count": 252
},
{
"max(adj_close)": {
"value": 1214.0
},
"key_as_string": "2006-01-01T00:00:00.000+08:00",
"key": 1136044800000,
"doc_count": 251
},
{
"max(adj_close)": {
"value": 2643.0
},
"key_as_string": "2007-01-01T00:00:00.000+08:00",
"key": 1167580800000,
"doc_count": 251
},
{
"max(adj_close)": {
"value": 2578.0
},
"key_as_string": "2008-01-01T00:00:00.000+08:00",
"key": 1199116800000,
"doc_count": 253
},
{
"max(adj_close)": {
"value": 2799.0
},
"key_as_string": "2009-01-01T00:00:00.000+08:00",
"key": 1230739200000,
"doc_count": 252
},
{
"max(adj_close)": {
"value": 4305.0
},
"key_as_string": "2010-01-01T00:00:00.000+08:00",
"key": 1262275200000,
"doc_count": 252
},
{
"max(adj_close)": {
"value": 5586.0
},
"key_as_string": "2011-01-01T00:00:00.000+08:00",
"key": 1293811200000,
"doc_count": 252
},
{
"max(adj_close)": {
"value": 9328.0
},
"key_as_string": "2012-01-01T00:00:00.000+08:00",
"key": 1325347200000,
"doc_count": 250
},
{
"max(adj_close)": {
"value": 7800.0
},
"key_as_string": "2013-01-01T00:00:00.000+08:00",
"key": 1356969600000,
"doc_count": 252
},
{
"max(adj_close)": {
"value": 11637.0
},
"key_as_string": "2014-01-01T00:00:00.000+08:00",
"key": 1388505600000,
"doc_count": 252
},
{
"max(adj_close)": {
"value": 13067.0
},
"key_as_string": "2015-01-01T00:00:00.000+08:00",
"key": 1420041600000,
"doc_count": 252
},
{
"max(adj_close)": {
"value": 10478.0
},
"key_as_string": "2016-01-01T00:00:00.000+08:00",
"key": 1451577600000,
"doc_count": 30
}
]
}
},
"timed_out": false
}
Profile
[
{
"query": [
{
"query_type": "TermQuery",
"lucene": "symbol:AAPL",
"time": "1.496726000ms",
"breakdown": {
"score": 0,
"create_weight": 725453,
"next_doc": 380669,
"match": 0,
"build_scorer": 390604,
"advance": 0
}
}
],
"rewrite_time": 2705,
"collector": [
{
"name": "MultiCollector",
"reason": "search_multi",
"time": "6.185877000ms",
"children": [
{
"name": "TotalHitCountCollector",
"reason": "search_count",
"time": "0.4641090000ms"
},
{
"name": "HistogramAggregator: [year]",
"reason": "aggregation",
"time": "4.456138000ms"
}
]
}
]
}
]
TO_CHAR(DATE_TRUNC(‘year’,”date”), ‘yyyy’)
SQL
$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select year, max(adj_close) from quote where symbol='AAPL' group by to_char(date_trunc('year',"date"), 'yyyy') as year
EOF
{"max(adj_close)": 50.0, "year": "1981"}
{"max(adj_close)": 51.0, "year": "1982"}
{"max(adj_close)": 94.0, "year": "1983"}
{"max(adj_close)": 50.0, "year": "1984"}
{"max(adj_close)": 46.0, "year": "1985"}
{"max(adj_close)": 66.0, "year": "1986"}
{"max(adj_close)": 179.0, "year": "1987"}
{"max(adj_close)": 143.0, "year": "1988"}
{"max(adj_close)": 152.0, "year": "1989"}
{"max(adj_close)": 147.0, "year": "1990"}
{"max(adj_close)": 228.0, "year": "1991"}
{"max(adj_close)": 221.0, "year": "1992"}
{"max(adj_close)": 207.0, "year": "1993"}
{"max(adj_close)": 141.0, "year": "1994"}
{"max(adj_close)": 162.0, "year": "1995"}
{"max(adj_close)": 115.0, "year": "1996"}
{"max(adj_close)": 96.0, "year": "1997"}
{"max(adj_close)": 142.0, "year": "1998"}
{"max(adj_close)": 389.0, "year": "1999"}
{"max(adj_close)": 476.0, "year": "2000"}
{"max(adj_close)": 175.0, "year": "2001"}
{"max(adj_close)": 172.0, "year": "2002"}
{"max(adj_close)": 164.0, "year": "2003"}
{"max(adj_close)": 452.0, "year": "2004"}
{"max(adj_close)": 991.0, "year": "2005"}
{"max(adj_close)": 1214.0, "year": "2006"}
{"max(adj_close)": 2643.0, "year": "2007"}
{"max(adj_close)": 2578.0, "year": "2008"}
{"max(adj_close)": 2799.0, "year": "2009"}
{"max(adj_close)": 4305.0, "year": "2010"}
{"max(adj_close)": 5586.0, "year": "2011"}
{"max(adj_close)": 9328.0, "year": "2012"}
{"max(adj_close)": 7800.0, "year": "2013"}
{"max(adj_close)": 11637.0, "year": "2014"}
{"max(adj_close)": 13067.0, "year": "2015"}
{"max(adj_close)": 10478.0, "year": "2016"}
Elasticsearch
{
"query": {
"term": {
"symbol": "AAPL"
}
},
"aggs": {
"year": {
"date_histogram": {
"field": "date",
"interval": "year",
"time_zone": "+08:00",
"format": "yyyy"
},
"aggs": {
"max(adj_close)": {
"max": {
"field": "adj_close"
}
}
}
}
},
"size": 0
}
GROUP BY HISTOGRAM(ipo_year, 5)
SQL
$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select ipo_year_range, count(*) from symbol group by histogram(ipo_year, 5) as ipo_year_range
EOF
{"ipo_year_range": 1970, "count(*)": 5}
{"ipo_year_range": 1975, "count(*)": 0}
{"ipo_year_range": 1980, "count(*)": 31}
{"ipo_year_range": 1985, "count(*)": 124}
{"ipo_year_range": 1990, "count(*)": 283}
{"ipo_year_range": 1995, "count(*)": 315}
{"ipo_year_range": 2000, "count(*)": 358}
{"ipo_year_range": 2005, "count(*)": 387}
{"ipo_year_range": 2010, "count(*)": 1055}
{"ipo_year_range": 2015, "count(*)": 340}
Elasticsearch
{
"aggs": {
"ipo_year_range": {
"aggs": {},
"histogram": {
"field": "ipo_year",
"interval": 5
}
}
},
"size": 0
}
{
"hits": {
"hits": [],
"total": 6714,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 2,
"aggregations": {
"ipo_year_range": {
"buckets": [
{
"key": 1970,
"doc_count": 5
},
{
"key": 1975,
"doc_count": 0
},
{
"key": 1980,
"doc_count": 31
},
{
"key": 1985,
"doc_count": 124
},
{
"key": 1990,
"doc_count": 283
},
{
"key": 1995,
"doc_count": 315
},
{
"key": 2000,
"doc_count": 358
},
{
"key": 2005,
"doc_count": 387
},
{
"key": 2010,
"doc_count": 1055
},
{
"key": 2015,
"doc_count": 340
}
]
}
},
"timed_out": false
}
Profile
[
{
"query": [
{
"query_type": "MatchAllDocsQuery",
"lucene": "*:*",
"time": "0.2565110000ms",
"breakdown": {
"score": 0,
"create_weight": 7913,
"next_doc": 220517,
"match": 0,
"build_scorer": 28081,
"advance": 0
}
}
],
"rewrite_time": 2524,
"collector": [
{
"name": "MultiCollector",
"reason": "search_multi",
"time": "2.350590000ms",
"children": [
{
"name": "TotalHitCountCollector",
"reason": "search_count",
"time": "0.2413570000ms"
},
{
"name": "HistogramAggregator: [ipo_year_range]",
"reason": "aggregation",
"time": "1.112294000ms"
}
]
}
]
}
]
从profile结果可以看出来,两种histogram在底层实现是一样的。