【03】把 Elasticsearch 当数据库使：简单指标

2024年2月16日 192次阅读来源: taowen

使用 https://github.com/taowen/es-monitor 可以用 SQL 进行 elasticsearch 的查询。今天需要做一些最简单的聚合查询

COUNT(*)

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select count(*) from quote                                           
EOF

{"count(*)": 20994400}

Elasticsearch

{
  "aggs": {}, 
  "size": 0
}

{
  "hits": {
    "hits": [], 
    "total": 20994400, 
    "max_score": 0.0
  }, 
  "_shards": {
    "successful": 1, 
    "failed": 0, 
    "total": 1
  }, 
  "took": 26, 
  "timed_out": false
}

这个就不算聚合，只是看了一下最终满足过滤条件的 total hits count。

COUNT(ipo_year)

这个和 COUNT(*) 的区别是 COUNT(ipo_year) 要求字段必须有值才算一个。

$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select count(ipo_year) from symbol
EOF

{"count(ipo_year)": 2898}

Elasticsearch

{
  "aggs": {
    "count(ipo_year)": {
      "value_count": {
        "field": "ipo_year"
      }
    }
  }, 
  "size": 0
}

{
  "hits": {
    "hits": [], 
    "total": 6714, 
    "max_score": 0.0
  }, 
  "_shards": {
    "successful": 1, 
    "failed": 0, 
    "total": 1
  }, 
  "took": 55, 
  "aggregations": {
    "count(ipo_year)": {
      "value": 2898
    }
  }, 
  "timed_out": false
}

Profile

[
  {
    "query": [
      {
        "query_type": "MatchAllDocsQuery",
        "lucene": "*:*",
        "time": "0.3204170000ms",
        "breakdown": {
          "score": 0,
          "create_weight": 10688,
          "next_doc": 278660,
          "match": 0,
          "build_scorer": 31069,
          "advance": 0
        }
      }
    ],
    "rewrite_time": 2279,
    "collector": [
      {
        "name": "MultiCollector",
        "reason": "search_multi",
        "time": "2.957183000ms",
        "children": [
          {
            "name": "TotalHitCountCollector",
            "reason": "search_count",
            "time": "0.2319240000ms"
          },
          {
            "name": "ValueCountAggregator: [count(ipo_year)]",
            "reason": "aggregation",
            "time": "1.999916000ms"
          }
        ]
      }
    ]
  }
]

这是我们的第一个聚合例子。可以从profile结果看出来，其实现方式在采集文档的时候加上了ValueCountAggregator统计了字段非空的文档数量。

COUNT(DISTINCT ipo_year)

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select count(distinct ipo_year) from symbol
EOF

{"count(distinct ipo_year)": 39}

Elasticsearch

{
  "aggs": {
    "count(distinct ipo_year)": {
      "cardinality": {
        "field": "ipo_year"
      }
    }
  }, 
  "size": 0
}

{
  "hits": {
    "hits": [], 
    "total": 6714, 
    "max_score": 0.0
  }, 
  "_shards": {
    "successful": 1, 
    "failed": 0, 
    "total": 1
  }, 
  "took": 24, 
  "aggregations": {
    "count(distinct ipo_year)": {
      "value": 39
    }
  }, 
  "timed_out": false
}

Profile

[
  {
    "query": [
      {
        "query_type": "MatchAllDocsQuery",
        "lucene": "*:*",
        "time": "0.2033600000ms",
        "breakdown": {
          "score": 0,
          "create_weight": 7501,
          "next_doc": 162905,
          "match": 0,
          "build_scorer": 32954,
          "advance": 0
        }
      }
    ],
    "rewrite_time": 2300,
    "collector": [
      {
        "name": "MultiCollector",
        "reason": "search_multi",
        "time": "2.438386000ms",
        "children": [
          {
            "name": "TotalHitCountCollector",
            "reason": "search_count",
            "time": "0.2240230000ms"
          },
          {
            "name": "CardinalityAggregator: [count(distinct ipo_year)]",
            "reason": "aggregation",
            "time": "1.471620000ms"
          }
        ]
      }
    ]
  }
]

这个例子里 ValueCountAggregator 变成了 CardinalityAggregator

SUM(market_cap)

MIN/MAX/AVG/SUM 这几个简单的聚合也是支持的

$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select sum(market_cap) from symbol         
EOF

{"sum(market_cap)": 11454155180142.0}

Elasticsearch

{
  "aggs": {
    "sum(market_cap)": {
      "sum": {
        "field": "market_cap"
      }
    }
  }, 
  "size": 0
}

{
  "hits": {
    "hits": [], 
    "total": 6714, 
    "max_score": 0.0
  }, 
  "_shards": {
    "successful": 1, 
    "failed": 0, 
    "total": 1
  }, 
  "took": 15, 
  "aggregations": {
    "sum(market_cap)": {
      "value": 11454155180142.0
    }
  }, 
  "timed_out": false
}

Profile

[
  {
    "query": [
      {
        "query_type": "MatchAllDocsQuery",
        "lucene": "*:*",
        "time": "0.2026870000ms",
        "breakdown": {
          "score": 0,
          "create_weight": 8097,
          "next_doc": 163069,
          "match": 0,
          "build_scorer": 31521,
          "advance": 0
        }
      }
    ],
    "rewrite_time": 2151,
    "collector": [
      {
        "name": "MultiCollector",
        "reason": "search_multi",
        "time": "2.461247000ms",
        "children": [
          {
            "name": "TotalHitCountCollector",
            "reason": "search_count",
            "time": "0.3302140000ms"
          },
          {
            "name": "SumAggregator: [sum(market_cap)]",
            "reason": "aggregation",
            "time": "1.102363000ms"
          }
        ]
      }
    ]
  }
]

过滤 + 聚合

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select sum(market_cap) from symbol where ipo_year=1998
EOF

{"sum(market_cap)": 107049150786.0}

Elasticsearch

{
  "query": {
    "term": {
      "ipo_year": 1998
    }
  }, 
  "aggs": {
    "sum(market_cap)": {
      "sum": {
        "field": "market_cap"
      }
    }
  }, 
  "size": 0
}

{
  "hits": {
    "hits": [], 
    "total": 56, 
    "max_score": 0.0
  }, 
  "_shards": {
    "successful": 1, 
    "failed": 0, 
    "total": 1
  }, 
  "took": 11, 
  "aggregations": {
    "sum(market_cap)": {
      "value": 107049150786.0
    }
  }, 
  "timed_out": false
}

Profile

[
  {
    "query": [
      {
        "query_type": "TermQuery",
        "lucene": "ipo_year:`N",
        "time": "0.4526400000ms",
        "breakdown": {
          "score": 0,
          "create_weight": 220579,
          "next_doc": 159412,
          "match": 0,
          "build_scorer": 72649,
          "advance": 0
        }
      }
    ],
    "rewrite_time": 3750,
    "collector": [
      {
        "name": "MultiCollector",
        "reason": "search_multi",
        "time": "0.2203470000ms",
        "children": [
          {
            "name": "TotalHitCountCollector",
            "reason": "search_count",
            "time": "0.009478000000ms"
          },
          {
            "name": "SumAggregator: [sum(market_cap)]",
            "reason": "aggregation",
            "time": "0.1557820000ms"
          }
        ]
      }
    ]
  }
]

query 过滤完，然后再计算 aggs

    原文作者：taowen
    原文地址: https://segmentfault.com/a/1190000004454399
    本文转自网络文章，转载此文章仅为分享知识，如有侵权，请联系博主进行删除。