【05】把 Elasticsearch 当数据库使:HISTOGRAM聚合

使用 https://github.com/taowen/es-monitor 可以用 SQL 进行 elasticsearch 的查询。按已有字段来分桶是最简单的一种分桶的方式。很多时候我们希望用于分桶的key是需要先经过计算而来的。其中最简单的一种计算方式是按区间段来算histogram。用于计算的字段可以是时间戳,也可能是数值。

GROUP BY DATE_TRUNC(‘year’,”date”)

SQL(其中”date”是指一个列的名字,之所以要加引号是因为date是关键字)

$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select year, max(adj_close) from quote where symbol='AAPL' group by date_trunc('year',"date") as year
EOF
{"max(adj_close)": 50.0, "year": "1981-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 51.0, "year": "1982-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 94.0, "year": "1983-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 50.0, "year": "1984-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 46.0, "year": "1985-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 66.0, "year": "1986-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 179.0, "year": "1987-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 143.0, "year": "1988-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 152.0, "year": "1989-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 147.0, "year": "1990-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 228.0, "year": "1991-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 221.0, "year": "1992-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 207.0, "year": "1993-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 141.0, "year": "1994-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 162.0, "year": "1995-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 115.0, "year": "1996-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 96.0, "year": "1997-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 142.0, "year": "1998-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 389.0, "year": "1999-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 476.0, "year": "2000-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 175.0, "year": "2001-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 172.0, "year": "2002-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 164.0, "year": "2003-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 452.0, "year": "2004-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 991.0, "year": "2005-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 1214.0, "year": "2006-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 2643.0, "year": "2007-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 2578.0, "year": "2008-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 2799.0, "year": "2009-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 4305.0, "year": "2010-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 5586.0, "year": "2011-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 9328.0, "year": "2012-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 7800.0, "year": "2013-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 11637.0, "year": "2014-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 13067.0, "year": "2015-01-01T00:00:00.000+08:00"}
{"max(adj_close)": 10478.0, "year": "2016-01-01T00:00:00.000+08:00"}

Elasticsearch

{
  "query": {
    "term": {
      "symbol": "AAPL"
    }
  }, 
  "aggs": {
    "year": {
      "date_histogram": {
        "field": "date", 
        "interval": "year", 
        "time_zone": "+08:00"
      }, 
      "aggs": {
        "max(adj_close)": {
          "max": {
            "field": "adj_close"
          }
        }
      }
    }
  }, 
  "size": 0
}
{
  "hits": {
    "hits": [], 
    "total": 8790, 
    "max_score": 0.0
  }, 
  "_shards": {
    "successful": 1, 
    "failed": 0, 
    "total": 1
  }, 
  "took": 6, 
  "aggregations": {
    "year": {
      "buckets": [
        {
          "max(adj_close)": {
            "value": 50.0
          }, 
          "key_as_string": "1981-01-01T00:00:00.000+08:00", 
          "key": 347126400000, 
          "doc_count": 185
        }, 
        {
          "max(adj_close)": {
            "value": 51.0
          }, 
          "key_as_string": "1982-01-01T00:00:00.000+08:00", 
          "key": 378662400000, 
          "doc_count": 253
        }, 
        {
          "max(adj_close)": {
            "value": 94.0
          }, 
          "key_as_string": "1983-01-01T00:00:00.000+08:00", 
          "key": 410198400000, 
          "doc_count": 252
        }, 
        {
          "max(adj_close)": {
            "value": 50.0
          }, 
          "key_as_string": "1984-01-01T00:00:00.000+08:00", 
          "key": 441734400000, 
          "doc_count": 253
        }, 
        {
          "max(adj_close)": {
            "value": 46.0
          }, 
          "key_as_string": "1985-01-01T00:00:00.000+08:00", 
          "key": 473356800000, 
          "doc_count": 253
        }, 
        {
          "max(adj_close)": {
            "value": 66.0
          }, 
          "key_as_string": "1986-01-01T00:00:00.000+08:00", 
          "key": 504892800000, 
          "doc_count": 253
        }, 
        {
          "max(adj_close)": {
            "value": 179.0
          }, 
          "key_as_string": "1987-01-01T00:00:00.000+08:00", 
          "key": 536428800000, 
          "doc_count": 253
        }, 
        {
          "max(adj_close)": {
            "value": 143.0
          }, 
          "key_as_string": "1988-01-01T00:00:00.000+08:00", 
          "key": 567964800000, 
          "doc_count": 253
        }, 
        {
          "max(adj_close)": {
            "value": 152.0
          }, 
          "key_as_string": "1989-01-01T00:00:00.000+08:00", 
          "key": 599587200000, 
          "doc_count": 252
        }, 
        {
          "max(adj_close)": {
            "value": 147.0
          }, 
          "key_as_string": "1990-01-01T00:00:00.000+08:00", 
          "key": 631123200000, 
          "doc_count": 253
        }, 
        {
          "max(adj_close)": {
            "value": 228.0
          }, 
          "key_as_string": "1991-01-01T00:00:00.000+08:00", 
          "key": 662659200000, 
          "doc_count": 253
        }, 
        {
          "max(adj_close)": {
            "value": 221.0
          }, 
          "key_as_string": "1992-01-01T00:00:00.000+08:00", 
          "key": 694195200000, 
          "doc_count": 254
        }, 
        {
          "max(adj_close)": {
            "value": 207.0
          }, 
          "key_as_string": "1993-01-01T00:00:00.000+08:00", 
          "key": 725817600000, 
          "doc_count": 253
        }, 
        {
          "max(adj_close)": {
            "value": 141.0
          }, 
          "key_as_string": "1994-01-01T00:00:00.000+08:00", 
          "key": 757353600000, 
          "doc_count": 252
        }, 
        {
          "max(adj_close)": {
            "value": 162.0
          }, 
          "key_as_string": "1995-01-01T00:00:00.000+08:00", 
          "key": 788889600000, 
          "doc_count": 252
        }, 
        {
          "max(adj_close)": {
            "value": 115.0
          }, 
          "key_as_string": "1996-01-01T00:00:00.000+08:00", 
          "key": 820425600000, 
          "doc_count": 254
        }, 
        {
          "max(adj_close)": {
            "value": 96.0
          }, 
          "key_as_string": "1997-01-01T00:00:00.000+08:00", 
          "key": 852048000000, 
          "doc_count": 253
        }, 
        {
          "max(adj_close)": {
            "value": 142.0
          }, 
          "key_as_string": "1998-01-01T00:00:00.000+08:00", 
          "key": 883584000000, 
          "doc_count": 252
        }, 
        {
          "max(adj_close)": {
            "value": 389.0
          }, 
          "key_as_string": "1999-01-01T00:00:00.000+08:00", 
          "key": 915120000000, 
          "doc_count": 252
        }, 
        {
          "max(adj_close)": {
            "value": 476.0
          }, 
          "key_as_string": "2000-01-01T00:00:00.000+08:00", 
          "key": 946656000000, 
          "doc_count": 252
        }, 
        {
          "max(adj_close)": {
            "value": 175.0
          }, 
          "key_as_string": "2001-01-01T00:00:00.000+08:00", 
          "key": 978278400000, 
          "doc_count": 248
        }, 
        {
          "max(adj_close)": {
            "value": 172.0
          }, 
          "key_as_string": "2002-01-01T00:00:00.000+08:00", 
          "key": 1009814400000, 
          "doc_count": 252
        }, 
        {
          "max(adj_close)": {
            "value": 164.0
          }, 
          "key_as_string": "2003-01-01T00:00:00.000+08:00", 
          "key": 1041350400000, 
          "doc_count": 252
        }, 
        {
          "max(adj_close)": {
            "value": 452.0
          }, 
          "key_as_string": "2004-01-01T00:00:00.000+08:00", 
          "key": 1072886400000, 
          "doc_count": 252
        }, 
        {
          "max(adj_close)": {
            "value": 991.0
          }, 
          "key_as_string": "2005-01-01T00:00:00.000+08:00", 
          "key": 1104508800000, 
          "doc_count": 252
        }, 
        {
          "max(adj_close)": {
            "value": 1214.0
          }, 
          "key_as_string": "2006-01-01T00:00:00.000+08:00", 
          "key": 1136044800000, 
          "doc_count": 251
        }, 
        {
          "max(adj_close)": {
            "value": 2643.0
          }, 
          "key_as_string": "2007-01-01T00:00:00.000+08:00", 
          "key": 1167580800000, 
          "doc_count": 251
        }, 
        {
          "max(adj_close)": {
            "value": 2578.0
          }, 
          "key_as_string": "2008-01-01T00:00:00.000+08:00", 
          "key": 1199116800000, 
          "doc_count": 253
        }, 
        {
          "max(adj_close)": {
            "value": 2799.0
          }, 
          "key_as_string": "2009-01-01T00:00:00.000+08:00", 
          "key": 1230739200000, 
          "doc_count": 252
        }, 
        {
          "max(adj_close)": {
            "value": 4305.0
          }, 
          "key_as_string": "2010-01-01T00:00:00.000+08:00", 
          "key": 1262275200000, 
          "doc_count": 252
        }, 
        {
          "max(adj_close)": {
            "value": 5586.0
          }, 
          "key_as_string": "2011-01-01T00:00:00.000+08:00", 
          "key": 1293811200000, 
          "doc_count": 252
        }, 
        {
          "max(adj_close)": {
            "value": 9328.0
          }, 
          "key_as_string": "2012-01-01T00:00:00.000+08:00", 
          "key": 1325347200000, 
          "doc_count": 250
        }, 
        {
          "max(adj_close)": {
            "value": 7800.0
          }, 
          "key_as_string": "2013-01-01T00:00:00.000+08:00", 
          "key": 1356969600000, 
          "doc_count": 252
        }, 
        {
          "max(adj_close)": {
            "value": 11637.0
          }, 
          "key_as_string": "2014-01-01T00:00:00.000+08:00", 
          "key": 1388505600000, 
          "doc_count": 252
        }, 
        {
          "max(adj_close)": {
            "value": 13067.0
          }, 
          "key_as_string": "2015-01-01T00:00:00.000+08:00", 
          "key": 1420041600000, 
          "doc_count": 252
        }, 
        {
          "max(adj_close)": {
            "value": 10478.0
          }, 
          "key_as_string": "2016-01-01T00:00:00.000+08:00", 
          "key": 1451577600000, 
          "doc_count": 30
        }
      ]
    }
  }, 
  "timed_out": false
}

Profile

[
  {
    "query": [
      {
        "query_type": "TermQuery",
        "lucene": "symbol:AAPL",
        "time": "1.496726000ms",
        "breakdown": {
          "score": 0,
          "create_weight": 725453,
          "next_doc": 380669,
          "match": 0,
          "build_scorer": 390604,
          "advance": 0
        }
      }
    ],
    "rewrite_time": 2705,
    "collector": [
      {
        "name": "MultiCollector",
        "reason": "search_multi",
        "time": "6.185877000ms",
        "children": [
          {
            "name": "TotalHitCountCollector",
            "reason": "search_count",
            "time": "0.4641090000ms"
          },
          {
            "name": "HistogramAggregator: [year]",
            "reason": "aggregation",
            "time": "4.456138000ms"
          }
        ]
      }
    ]
  }
]

TO_CHAR(DATE_TRUNC(‘year’,”date”), ‘yyyy’)

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select year, max(adj_close) from quote where symbol='AAPL' group by to_char(date_trunc('year',"date"), 'yyyy') as year
EOF
{"max(adj_close)": 50.0, "year": "1981"}
{"max(adj_close)": 51.0, "year": "1982"}
{"max(adj_close)": 94.0, "year": "1983"}
{"max(adj_close)": 50.0, "year": "1984"}
{"max(adj_close)": 46.0, "year": "1985"}
{"max(adj_close)": 66.0, "year": "1986"}
{"max(adj_close)": 179.0, "year": "1987"}
{"max(adj_close)": 143.0, "year": "1988"}
{"max(adj_close)": 152.0, "year": "1989"}
{"max(adj_close)": 147.0, "year": "1990"}
{"max(adj_close)": 228.0, "year": "1991"}
{"max(adj_close)": 221.0, "year": "1992"}
{"max(adj_close)": 207.0, "year": "1993"}
{"max(adj_close)": 141.0, "year": "1994"}
{"max(adj_close)": 162.0, "year": "1995"}
{"max(adj_close)": 115.0, "year": "1996"}
{"max(adj_close)": 96.0, "year": "1997"}
{"max(adj_close)": 142.0, "year": "1998"}
{"max(adj_close)": 389.0, "year": "1999"}
{"max(adj_close)": 476.0, "year": "2000"}
{"max(adj_close)": 175.0, "year": "2001"}
{"max(adj_close)": 172.0, "year": "2002"}
{"max(adj_close)": 164.0, "year": "2003"}
{"max(adj_close)": 452.0, "year": "2004"}
{"max(adj_close)": 991.0, "year": "2005"}
{"max(adj_close)": 1214.0, "year": "2006"}
{"max(adj_close)": 2643.0, "year": "2007"}
{"max(adj_close)": 2578.0, "year": "2008"}
{"max(adj_close)": 2799.0, "year": "2009"}
{"max(adj_close)": 4305.0, "year": "2010"}
{"max(adj_close)": 5586.0, "year": "2011"}
{"max(adj_close)": 9328.0, "year": "2012"}
{"max(adj_close)": 7800.0, "year": "2013"}
{"max(adj_close)": 11637.0, "year": "2014"}
{"max(adj_close)": 13067.0, "year": "2015"}
{"max(adj_close)": 10478.0, "year": "2016"}

Elasticsearch

{
  "query": {
    "term": {
      "symbol": "AAPL"
    }
  }, 
  "aggs": {
    "year": {
      "date_histogram": {
        "field": "date", 
        "interval": "year", 
        "time_zone": "+08:00", 
        "format": "yyyy"
      }, 
      "aggs": {
        "max(adj_close)": {
          "max": {
            "field": "adj_close"
          }
        }
      }
    }
  }, 
  "size": 0
}

GROUP BY HISTOGRAM(ipo_year, 5)

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select ipo_year_range, count(*) from symbol group by histogram(ipo_year, 5) as ipo_year_range
EOF
{"ipo_year_range": 1970, "count(*)": 5}
{"ipo_year_range": 1975, "count(*)": 0}
{"ipo_year_range": 1980, "count(*)": 31}
{"ipo_year_range": 1985, "count(*)": 124}
{"ipo_year_range": 1990, "count(*)": 283}
{"ipo_year_range": 1995, "count(*)": 315}
{"ipo_year_range": 2000, "count(*)": 358}
{"ipo_year_range": 2005, "count(*)": 387}
{"ipo_year_range": 2010, "count(*)": 1055}
{"ipo_year_range": 2015, "count(*)": 340}

Elasticsearch

{
  "aggs": {
    "ipo_year_range": {
      "aggs": {}, 
      "histogram": {
        "field": "ipo_year", 
        "interval": 5
      }
    }
  }, 
  "size": 0
}
{
  "hits": {
    "hits": [], 
    "total": 6714, 
    "max_score": 0.0
  }, 
  "_shards": {
    "successful": 1, 
    "failed": 0, 
    "total": 1
  }, 
  "took": 2, 
  "aggregations": {
    "ipo_year_range": {
      "buckets": [
        {
          "key": 1970, 
          "doc_count": 5
        }, 
        {
          "key": 1975, 
          "doc_count": 0
        }, 
        {
          "key": 1980, 
          "doc_count": 31
        }, 
        {
          "key": 1985, 
          "doc_count": 124
        }, 
        {
          "key": 1990, 
          "doc_count": 283
        }, 
        {
          "key": 1995, 
          "doc_count": 315
        }, 
        {
          "key": 2000, 
          "doc_count": 358
        }, 
        {
          "key": 2005, 
          "doc_count": 387
        }, 
        {
          "key": 2010, 
          "doc_count": 1055
        }, 
        {
          "key": 2015, 
          "doc_count": 340
        }
      ]
    }
  }, 
  "timed_out": false
}

Profile

[
  {
    "query": [
      {
        "query_type": "MatchAllDocsQuery",
        "lucene": "*:*",
        "time": "0.2565110000ms",
        "breakdown": {
          "score": 0,
          "create_weight": 7913,
          "next_doc": 220517,
          "match": 0,
          "build_scorer": 28081,
          "advance": 0
        }
      }
    ],
    "rewrite_time": 2524,
    "collector": [
      {
        "name": "MultiCollector",
        "reason": "search_multi",
        "time": "2.350590000ms",
        "children": [
          {
            "name": "TotalHitCountCollector",
            "reason": "search_count",
            "time": "0.2413570000ms"
          },
          {
            "name": "HistogramAggregator: [ipo_year_range]",
            "reason": "aggregation",
            "time": "1.112294000ms"
          }
        ]
      }
    ]
  }
]

从profile结果可以看出来,两种histogram在底层实现是一样的。

    原文作者:taowen
    原文地址: https://segmentfault.com/a/1190000004454721
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞