题目
今天做了两道蚂蚁金服的题目,题目蛮不错的,记录下吧
背景说明:
以下表记录了用户每天的蚂蚁森林低碳生活领取的记录流水。
table_name:user_low_carbon
user_id data_dt low_carbon
用户 日期 减少碳排放(g)
蚂蚁森林植物换购表,用于记录申领环保植物所需要减少的碳排放量
table_name: plant_carbon
plant_id plant_name low_carbon
植物编号 植物名 换购植物所需要的碳
—-题目
1.蚂蚁森林植物申领统计
问题:假设2017年1月1日开始记录低碳数据(user_low_carbon),
假设2017年10月1日之前满足申领条件的用户都申领了一颗p004-胡杨,
剩余的能量全部用来领取“p002-沙柳” 。
统计在10月1日累计申领“p002-沙柳” 排名前10的用户信息;以及他比后一名多领了几颗沙柳。
得到的统计结果如下表样式:
user_id plant_count less_count(比后一名多领了几颗沙柳)
u_101 1000 100
u_088 900 400
u_103 500 …
2、蚂蚁森林低碳用户排名分析
问题:查询user_low_carbon表中每日流水记录,条件为:
用户在2017年,连续三天(或以上)的天数里,
每天减少碳排放(low_carbon)都超过100g的用户低碳流水。
需要查询返回满足以上条件的user_low_carbon表中的记录流水。
例如用户u_002符合条件的记录如下,因为2017/1/2~2017/1/5连续四天的碳排放量之和都大于等于100g:
seq(key) user_id data_dt low_carbon
xxxxx10 u_002 2017/1/2 150
xxxxx11 u_002 2017/1/2 70
xxxxx12 u_002 2017/1/3 30
xxxxx13 u_002 2017/1/3 80
xxxxx14 u_002 2017/1/4 150
xxxxx14 u_002 2017/1/5 101
备注:统计方法不限于sql、procedure、python,java等
第一题答案:
-- 第一题
-- t3
select t3.user_id,
t3.num_p002,
nvl(t3.num_p002 - lead(t3.num_p002,1) over(order by num_p002 desc),'后面没有了')
from(
select t2.user_id,
cast((t2.summer_low_carbon - pc.low_carbon) / pc2.low_carbon as int) num_p002
from(
select t1.user_id,sum(t1.low_carbon) summer_low_carbon
from(
select user_id,
date_format(concat_ws('-',substring(date_dt,1,4),substring(date_dt,6,1),substring(date_dt,8,1)),'yyyy-MM-dd') date_d,
low_carbon
from user_low_carbon
)t1
where datediff('2017-10-1',t1.date_d) > 0
group by user_id
order by user_id
)t2
left join plant_carbon pc
on pc.plant_id <=> 'p004'
left join plant_carbon pc2
on pc2.plant_id <=> 'p002'
)t3
解题过程
-- t0
select user_id,
date_format(concat_ws('-',substring(date_dt,1,4),substring(date_dt,6,1),substring(date_dt,8,1)),'yyyy-MM-dd') date_d,
low_carbon
from user_low_carbon
-- t1
select t1.user_id,sum(t1.low_carbon) summer_low_carbon
from(
select user_id,
date_format(concat_ws('-',substring(date_dt,1,4),substring(date_dt,6,1),substring(date_dt,8,1)),'yyyy-MM-dd') date_d,
low_carbon
from user_low_carbon
)t1
where datediff('2017-10-1',t1.date_d) > 0
group by user_id
order by user_id
-- t2
select t2.user_id,
cast((t2.summer_low_carbon - pc.low_carbon) / pc2.low_carbon as int) num_p002
from(
select t1.user_id,sum(t1.low_carbon) summer_low_carbon
from(
select user_id,
date_format(concat_ws('-',substring(date_dt,1,4),substring(date_dt,6,1),substring(date_dt,8,1)),'yyyy-MM-dd') date_d,
low_carbon
from user_low_carbon
)t1
where datediff('2017-10-1',t1.date_d) > 0
group by user_id
order by user_id
)t2
left join plant_carbon pc
on pc.plant_id <=> 'p004'
left join plant_carbon pc2
on pc2.plant_id <=> 'p002'
第二题答案
select t6.user_id,tmp2.key,tmp2.value
from(
select t5.user_id,
tmp.ex_date
from(
select t4.user_id,
t4.date_tong,
collect_list(map(t4.f_date,t4.sum_low_carbon)) list_map_d
from(
select t3.user_id,
t3.f_date,
t3.sum_low_carbon,
date_add(t3.f_date,t3.rank_num) date_tong
from(
select t2.user_id,
t2.f_date,
t2.sum_low_carbon,
rank() over (distribute by t2.user_id sort by t2.f_date desc) rank_num
from(
select t1.user_id,t1.f_date,
sum(t1.low_carbon) sum_low_carbon
from(
select user_id,
date_format(concat_ws('-',substring(date_dt,1,4),substring(date_dt,6,1),substring(date_dt,8,1)),'yyy-MM-dd') f_date,
low_carbon
from user_low_carbon
)t1
group by t1.user_id,t1.f_date
)t2
where t2.sum_low_carbon > 100
)t3
)t4
group by user_id,t4.date_tong
having count(*) >= 3
)t5 lateral view explode(t5.list_map_d) tmp as ex_date
)t6 lateral view explode(t6.ex_date) tmp2 as key,value
order by t6.user_id,tmp2.key;
解题过程
-- 第二题
-- t0
select user_id,
date_format(concat_ws('-',substring(date_dt,1,4),substring(date_dt,6,1),substring(date_dt,8,1)),'yyy-MM-dd') f_date,
low_carbon
from user_low_carbon
-- t1
select t1.user_id,t1.f_date,sum(t1.low_carbon)
from(
select user_id,
date_format(concat_ws('-',substring(date_dt,1,4),substring(date_dt,6,1),substring(date_dt,8,1)),'yyy-MM-dd') f_date,
low_carbon
from user_low_carbon
)t1
group by t1.user_id,t1.f_date
order by t1.user_id asc,t1.f_date desc
-- t2
select t2.user_id,
t2.f_date,
t2.sum_low_carbon,
rank() over (distribute by t2.user_id sort by t2.f_date desc) rank_num
from(
select t1.user_id,t1.f_date,sum(t1.low_carbon) sum_low_carbon
from(
select user_id,
date_format(concat_ws('-',substring(date_dt,1,4),substring(date_dt,6,1),substring(date_dt,8,1)),'yyy-MM-dd') f_date,
low_carbon
from user_low_carbon
)t1
group by t1.user_id,t1.f_date
)t2
where t2.sum_low_carbon > 100
-- order by t2.user_id asc,t2.f_date desc;
-- t3
select t3.user_id,
t3.f_date,
t3.sum_low_carbon,
date_add(t3.f_date,t3.rank_num) date_tong
from(
select t2.user_id,
t2.f_date,
t2.sum_low_carbon,
rank() over (distribute by t2.user_id sort by t2.f_date desc) rank_num
from(
select t1.user_id,t1.f_date,sum(t1.low_carbon) sum_low_carbon
from(
select user_id,
date_format(concat_ws('-',substring(date_dt,1,4),substring(date_dt,6,1),substring(date_dt,8,1)),'yyy-MM-dd') f_date,
low_carbon
from user_low_carbon
)t1
group by t1.user_id,t1.f_date
)t2
where t2.sum_low_carbon > 100
)t3
where user_id = 'u_010'
order by t3.user_id,t3.f_date
-- t4
select t4.user_id,
t4.date_tong,
count(*),
/* collect_list(t4.f_date) date_list,
collect_list(cast(t4.sum_low_carbon as string)) l_c_list,*/ collect_list(map(t4.f_date,t4.sum_low_carbon)) list_map_d
-- `map`(t4.f_date,cast(t4.sum_low_carbon as string)) over(partition by t4.user_id,t4.sum_low_carbon)
-- concat_ws(',',collect_list(t4.f_date)) date_list, -- concat_ws(',',collect_list(cast(t4.sum_low_carbon as string))) l_c_listfrom(
select t3.user_id,
t3.f_date,
t3.sum_low_carbon,
date_add(t3.f_date,t3.rank_num) date_tong
from(
select t2.user_id,
t2.f_date,
t2.sum_low_carbon,
rank() over (distribute by t2.user_id sort by t2.f_date desc) rank_num
from(
select t1.user_id,t1.f_date,
sum(t1.low_carbon) sum_low_carbon
/* concat_ws('|',collect_list(t1.f_date)),
concat_ws('|',collect_list(cast(t1.low_carbon as string)))*/from(
select user_id,
date_format(concat_ws('-',substring(date_dt,1,4),substring(date_dt,6,1),substring(date_dt,8,1)),'yyy-MM-dd') f_date,
low_carbon
from user_low_carbon
)t1
group by t1.user_id,t1.f_date
)t2
where t2.sum_low_carbon > 100
)t3
)t4
group by user_id,t4.date_tong
having count(*) >= 3
-- t5
select t5.user_id,
tmp.ex_date
from(
select t4.user_id,
t4.date_tong,
count(*),
/* collect_list(t4.f_date) date_list,
collect_list(cast(t4.sum_low_carbon as string)) l_c_list,*/ collect_list(map(t4.f_date,t4.sum_low_carbon)) list_map_d
-- `map`(t4.f_date,cast(t4.sum_low_carbon as string)) over(partition by t4.user_id,t4.sum_low_carbon)
-- concat_ws(',',collect_list(t4.f_date)) date_list, -- concat_ws(',',collect_list(cast(t4.sum_low_carbon as string))) l_c_listfrom(
select t3.user_id,
t3.f_date,
t3.sum_low_carbon,
date_add(t3.f_date,t3.rank_num) date_tong
from(
select t2.user_id,
t2.f_date,
t2.sum_low_carbon,
rank() over (distribute by t2.user_id sort by t2.f_date desc) rank_num
from(
select t1.user_id,t1.f_date,
sum(t1.low_carbon) sum_low_carbon
/* concat_ws('|',collect_list(t1.f_date)),
concat_ws('|',collect_list(cast(t1.low_carbon as string)))*/from(
select user_id,
date_format(concat_ws('-',substring(date_dt,1,4),substring(date_dt,6,1),substring(date_dt,8,1)),'yyy-MM-dd') f_date,
low_carbon
from user_low_carbon
)t1
group by t1.user_id,t1.f_date
)t2
where t2.sum_low_carbon > 100
)t3
)t4
group by user_id,t4.date_tong
having count(*) >= 3
)t5 lateral view explode(t5.list_map_d) tmp as ex_date
where t5.user_id = 'u_002'
order by t5.user_id,tmp.ex_date