判空
nvl
select nvl(salary,0) from emp;
coalesce
coalesce(a,b,c,’ddd’)
解析:
// scala
if(a != null){
a
}else if(b != null ){
b
}else if(c != null){
c
}else 'ddd'
对集合的操作
将String 转换成Map
str_to_map()
str_to_map(text,delimited1,delimited2)
-- 解析:
-- delimited1是kv kv kv 之间的分隔符
-- delimited2是k与v之间的分隔符
判断集合中是否包含某元素
array_contains()
-- 这是数仓项目中,对于DWS层访客用户表的装载语句中的一个条件判断
-- 判断是否为新增访客
-- 将is_new字段放入set中去重,然后判断是否存在'0',如果存在则返回true,否者返回false
if(array_contains(collect_set(is_new),'0'),'0','1')
将数据放入结构体中
named_struct
with t1 as
(select mid_id,
page_id,
count(*) page_count,
sum(during_time) during_time
from dwd_page_log
where dt = '2020-06-14'
group by mid_id, page_id)
select mid_id,
collect_set(named_struct('page_id', page_id, 'page_count', page_count, 'during_time', during_time))
from t1;
时间函数
- select date_format(‘2017-3-4’,’yyyy-mm-dd HH:mm:ss’);
- select date_add(‘2017-6-29’,4);
terminal: 2017-7-3 - select date_add(‘2017-6-27’,-4);
terminal: 2017-6-23 - select datediff(‘2017-7-3’,’2017-6-29’);
terminal: 4 - select datediff(‘2017-6-29’,’2017-7-3’);
terminal: -4 - select next_day(‘2020-06-14’,’MO’)
返回下一周周一的日期 - select date_add(next_day(‘2020-06-14’,’MO’),-7);
当前周周一的日期 - select regexp_replace(‘2019/07/03’,’/‘,’-‘);
terminal: 2019-07-03
条件判断
case语句
case when then else end
-- 格式:
case
when finish_time is not null and refund_time is not null and date_add(date_format(finish_time,'yyyy-MM-dd'),7) <= '2020-6-14' then date_format(finish_time,'yyyy-MM-dd')
when refund_finish_time is not null then date_format(refund_finish_time,'yyyy-MM-dd')
when cancel_time is not null then date_format(cancel_time,'yyyy-MM-dd')
when expire_time is not null then date_format(expire_time,'yyyy-MM-dd')
else '9999-99-99'
end;
select sum(case sex when '男' then 1 else 0 end) male_count,
sum(case sex when '女' then 1 else 0 end) female_count
from emp group by deptno order by deptno;
if语句
-- 这是数仓项目中,对于DWS层访客用户表的装载语句中的一个条件判断
-- 判断是否为新增访客
-- 将is_new字段放入set中去重,然后判断是否存在'0',如果存在则返回true,否者返回false
-- 然后放入if中进行判断给与相应返回值
if(array_contains(collect_set(is_new),'0'),'0','1')
with as
with
tmp1 as
(
select * from t1
)
tmp2 as
(
select * from t2
)
tmp3 as
(
select * from t3
)
insert overwirte table t4
select *
from tmp1
left join tmp2
on tmp1.id = tmp2.id
left join tmp3
on coalesce(tmp1.id,tmp2.id) = tmp3.id
-- on nvl(tmp1.id,tmp2.id) = tmp3.id
concat//行转列
第一步 t1
select concat_sw(',',xz,xx) xz_xx
from
emp;
第二步t2
select
t1.xz_xx, concat_sw('|',collect_set(t1.name))
from t1
group by t1.xz_xx;
第三步:整合
select
t1.xz_xx, concat_sw('|',collect_set(t1.name))
from
(
select concat_sw(',',xz,xx) xz_xx
from
emp;
) t1
group by t1.xz_xx;
列转行
explode 将hive中复杂的array 或 map 拆分
example
create table if not exists movie_info(
movie_name string,
category array<string>
)
row format delimited fields terminated by '\t'
collection items terminated by ','
load data local inpath '/opt/module/datas/movie.txt' into table into movie_info;
select
movie,
category_name
from
movie_info lateral view explode(category) table_tmp as category_name;
select
movie_name,
category_name
from
movie_info cateral view explode(category) table_tmp as catgory_name;
窗口函数
over():
指定分析函数工作窗口大小,随着行变化
current row
n preceding: 向前移动n行
n following: 向后移动N行
unbounded 启点
unbounded preceding
unbounded following
lag(col,n);
lead(col,n);
ntile(n) 把有序分区的行分发到指定数据的组中,各个组有编号,编号从1开始。对于每一个ntile返回此行所属的组的编号。
create table if not exists business(
name string,
order_date date,
cost double
)
row format delimited fields terminated by ','
load data local inpath '/opt/module/data/business'
into table business;
查询2017-04购买过商品的顾客及总人数
select name,count(*) over()
from business
where substring(orderdate,1,7) = '2017-04'
group by name;
查询顾客购买明细,及月购买总额。
select *,sum(cost) over(partitioned by month(order_date))
from
business;