hive，

和通数据库htsjk.Com2019-07-13 04:56 来源:未知阅读:14349 评论 357 热度2

标签：

hive，

hive的基本操作

创建表

create table city( province_code INT, province_name string, city_code INT, city_name string ) row FORMAT delimited fields terminated by ',' lines terminated by '\n';

导入本地源表

load data local inpath '/home/bigdata/hive/city.txt' into table city;

注意
sql语句不能用tab键修改格式，用两个空格即可。

设置表头列名显示

set hive.cli.print.header=true;

显示拥有最多市的10个省份

select province_name,count(*) as count from city group by province_name order by count desc limit 10;

查看省的数目

select count(distinct province_name) from city;

查出只有一个市的省份

select
  province_name,cnt
from
  (
  select
    province_name,count(*)
  as
    cnt
  from
    city
  group by
    province_name
  )a
where
 cnt = 1;

外表和内表

外表

CREATE EXTERNAL TABLE city_ex(
  province_code int,
  province_name string,
  city_code int,
  city_name string)
row FORMAT delimited
fields terminated by ','
lines terminated by '\n'
location '/user/dang/city';

内表

create table city_in(
  province_code int,
  province_name string,
  city_code int,
  city_name string)
row format delimited
fields terminated by ','
lines terminated by '\n'
location '/user/dang/city';

注意

分区表

静态分区

create table user_daily(
  uid int,
  city_code int,
  model string,
  access string
)
partitioned by (p_date string);

insert overwrite table user_daily partition (p_date = '2107-01-01')
select * from user;

设置动态分区

set hive.exec.dynamic.partition.mode=nonstrict;

INSERT OVERWRITE TABLE user_daily PARTITION (p_date) 
SELECT *,'2017-09-02' FROM user
UNION ALL
SELECT *,'2017-09-03' FROM user
;

显示所有的分区表

show partitions user_daily;

修改或删除分区

ALTER TABLE user_daily PARTITION (p_date='2017-01-01') RENAME TO PARTITION (p_date='20170101');

DROP TABLE user_daily partition (p_date ='20170101');

select sum(if(access ='2G',1,0))/count(1) from user;

select
  case
    when uid % 10 in (0,1,2,3) then '0-3'
    when uid % 10 in (4,5,6,7) then '4-7'
    else '8-9'
  end as interval,
  count(*) as cnt
from user
group by
  case
    when uid % 10 in (0,1,2,3) then '0-3'
    when uid % 10 in (4,5,6,7) then '4-7'
    else '8-9'
  end;

select collect_set(access) from user;

select collect_list(access) from user;

左连接
select user.uid, user.city_code, city.city_name
from
(select * from user where uid <= 100) user
left join
(select * from city where province_code <= 30) city
on (user.city_code = city.city_code)
limit 20;


右连接
select user.uid, user.city_code, city.city_name
from
(select * from user where uid <= 100) user
right join
(select * from city where province_code <= 30) city
on (user.city_code = city.city_code)
limit 20;


内链接
select user.uid, user.city_code, city.city_name
from
(select * from user where uid <= 100) user
inner join
(select * from city where province_code <= 30) city
on (user.city_code = city.city_code)
limit 20;


完全外连接
select user.uid, user.city_code, city.city_name
from
(select * from user where uid <= 100) user
full join
(select * from city where province_code <= 30) city
on (user.city_code = city.city_code)
limit 200;

分组topN（窗口函数）

1）分组
（2）排序
（3）top1

select access,city_code,uid
from
(
  select uid,access,city_code,
  row_number() over (partition by access order by city_code desc) as row_num
  from user
)a
where row_num = 1;

累计

unbounded preceding 第一行
current row 当前行

select p_date,
  sum(cnt) over (order by p_date asc rows between unbounded preceding and current row)
from
(
  select p_date, count(*) as cnt
  from user_daily
  where p_date between '2017-09-01' and '2017-09-30'
  group by p_date
)a;