每天有多少用户首次流失
select t4.aa,count(*)
from
(
select t3.role_id role_id,min(t3.date1) aa
from
(
select t2.role_id role_id,t2.date1 date1,t2.date2 date2
from
(
select t1.role_id role_id,t1.date date1,lag(t1.date,1) over (partition by role_id) date2
from
(
select *,row_number() over (partition by role_id order by ta.date asc) as rn
from
(select distinct role_id,split(time," ")[0] date
from g37.logoutrole
where date between 20160902 and 20161219) ta ) t1) t2
where int(datediff(t2.date2,t2.date1)) > 7 ) t3
group by t3.role_id
) t4
group by t4.aa
分位数
from(
select t1.date a,percentile(t1.a, array(0,0.25,0.5,0.75,1)) over (partition by t1.date) b
from
(select date, role_id,sum(cast(int(fn.json(source, "$.num")) as bigint)) a
from g37.currency_xunzhang
where date between 20161108 and 20161115
and int(fn.json(source,"$.code_op"))= 1
group by date, role_id) t1
) t2
每天和每个月的去重登录id量
SELECT
month(time),
date,
COUNT(DISTINCT role_id) AS uv,
GROUPING__ID
FROM g17_loginrole_utf8_log
where date >= 20161001
GROUP BY month(time), date
GROUPING SETS (month(time), date)
正则提取
select regexp_extract(source, "level=(\\d+)", 1),count(regexp_extract(source, "level=(\\d+)", 1))
from xxx
where date between 20160902 and 20160925
and key rlike "xxx"
group by regexp_extract(source, "level=(\\d+)", 1)
转换时间格式
select sum(t1.pj), sum(t1.hp)
from
(
select fn.change_date_format(time, "yyyy-MM-dd HH:mm:ss", "yyyy-MM-dd HH"),regexp_extract(source, "game_server=(\\d+)", 1), regexp_extract(source, "game_uid=(\\d+)", 1), count(pingjia) as pj, sum(pingjia) as hp
from xxx
where date between 20160825 and 20160926
group by
fn.change_date_format(time, "yyyy-MM-dd HH:mm:ss", "yyyy-MM-dd HH"),regexp_extract(source, "game_server=(\\d+)", 1), regexp_extract(source, "game_uid=(\\d+)",1)
) as t1
where t1.pj - t1.hp >= 10
连接字符串
select role_id,split(max(concat(time,"|",get_json_object(source,"$.left_count"))) ,"|")[1]
from xxx as aa
where date between 20160925 and 20160927
group by role_id
最值
select role_id,max(get_json_object(source,"$.left_count"))
from xxx
where get_json_object(source,"$.item_name") rlike "aaa"
and date between 20160925 and 20160926
group by role_id
时间排序,取最后一条
select t3.server, t3.role_id, t3.cou, t2.cou
from
(
select server,
role_id,
max(int(get_json_object(source,"$.left_count"))) as cou
from xxx
where date between 20160915 and 20160926
and get_json_object(source,"$.item_name") rlike "aaa"
group by server,
role_id
) as t3
join
(
select t1.server, t1.role_id, int(get_json_object(t1.source,"$.left_count")) as cou from
(
select * , ROW_NUMBER() over (partition by server, role_id ORDER BY time desc) as rn
from xxx
where date between 20160925 and 20160927
and get_json_object(source,"$.item_name") rlike "aaa"
) as t1 where t1.rn = 1 ) as t2
on t3.server = t2.server and t3.role_id = t2.role_id
where t3.cou = t2.cou
rlike正则(或)
select player
from xxx
where player rlike "重叠|叠加|冲突|触发"
and date between 20161001 and 20161008
创建外部表
create external table table_name (
name string,
urs string,
ccid int,
uid int,
hn int,
role_id int
)
row format delimited fields terminated by "\t"
stored as textfile
location '/path/file/'
文件引用为表
insert overwrite directory 'hdfs:///path/path/sjsj'
select a.role_id,time,fn.json(source,"$.role_level")
from xxx as a where date >=20160401 and date <=20160430
and fn.json(source,"$.reason") ="player-upgrade"
and fn.json(source,"$.type") ="lv"
and fn.json(source,"$.role_level") in ('20','60')
and a.role_id in
(select server from xxx)
===
and a.role_id in
(select server from xxx)
流失天数
select t1.server,t1.account_id,t1.liushi,t1.cost,t2.role_id,t2.udid
from
(select server,account_id,(20161013-max(date)) liushi,max(int(fn.json(source, "$.total_cost"))) cost
from loginrole
where date between 20161006 and 20161013
group by server,account_id
) t1
join
(
select fn.json(source, "$.account_id") account_id,fn.json(source, "$.role_id") role_id,fn.json(source, "$.udid") udid,fn.json(source, "$.server") server
from createrole
where date between 20161006 and 20161012) t2
on t1.server = t2.server
where t1.account_id = t2.account_id
实际登录天数
select t4.server,t4.account_id,count(cc)
from
(
select server,account_id,split(time, " ")[0] cc
from loginrole
where date between 20161006 and 20161015
group by server,account_id,split(time, " ")[0]
) t4
group by t4.server,t4.account_id
key是数组
select role_id,count(*)
from xxx
where fn.to_array(fn.json(source, "$.cost[0]"))[0] in ("490001", "490003")
and date between 20160902 and 20160916
group by role_id
SUBSTRING ( expression, start, length )
select distinct aa.account_id
from xxx aa
where date between 20161006 and 20161017
and int(aa.server)<=20000
and int(fn.json(source,"$.level"))>5
and concat(aa.account_id,"|",concat(substring(aa.date,1,4),"-",substring(aa.date,5,2),"-",substring(aa.date,7,2)))
in (
select concat(account_id,"|",date_add(concat(substring(date,1,4),"-",substring(date,5,2),"-",substring(date,7,2)),3)) host_iid_seven
from xxx
where date between 20161006 and 20161017
and int(server)<=20000
)