hive操作总结
时间:2014-11-04 18:15 来源:linux.it.net.cn 作者:IT
1 将排序结果插入到新文件中
hive> insert overwrite table re_table1 select * from table1 cluster by id;
2 在shell中使用hive
$HIVE_HOME/bin/hive -S -e "select * from table1 cluster by id" > /home/hadoop/hadoop/hadoop-1.2.1/test/re_s.txt
3 hive中操作hadoop命令
hive> dfs -ls /data;
4 hive> select * from table1 cluster by school limit 10;
不能写成hive> select * from table1 limit 10 cluster by school;
5 hive中打印出字段名
a 可以直接在hive中添加命令:set hive.cli.print.header=true
b 在$HOME/.hiverc文件中添加配置:hive.cli.print.header=true(这种形式使用hive会报错并退出)
6 hive去重
hive> select id, concat_ws(',', collect_set(school)) from table1 group by id;
在这里CONCAT_WS是UDF,COLLECT_SET是UDAF,它将group后的id去重,并转换为了array,方便udf使用。
7 查看数据库信息
hive> describe database default;
8 显示是哪个数据库
hive> set hive.cli.print.current.db=true;
9 hive执行按严格格式进行的设置
hive> set hive.mapred.mode=strict
10 创建外部分区表(创建表前,外部路径文件夹下不能有文件,或者不能创建外部文件夹,需要先创建表再创建外部文件夹)
hive> create external table table32(id int, name string, ip string)
> comment 'table32'
> partitioned by (city string, country string)
> row format delimited fields terminated by '\t'
> location '/data/table32';
hive> select * from table321 where city='beijing' and country='China';
将table321中筛选结果插入到table32中,table321无partition结构,table32有partition结构
hive> insert overwrite table table32
> partition (city='beijing', country='China')
> select id,name,ip from table321 t where t.city='beijing' and t.country='China';
11 多个partition选择写入多个分区
hive> from table321 t
> insert overwrite table table32
> partition (city="shanxi", country="China")
> select id, name, ip where t.city="shanxi" and country="China"
> insert overwrite table table32
> partition (city="shanghai", country="China")
> select id, name, ip where t.city="shanghai" and country="China"
> insert overwrite table table32
> partition (city="beijing", country="China")
> select id, name, ip where t.city="beijing" and country="China";
12 在shell中使用-f运行hive多命令
建立test1.sql文件如下:
use test; 注意这是一个hive语句所以要加;
from table321 t
insert overwrite table table32
partition (city="shanxi", country="China")
select id, name, ip where t.city="shanxi" and country="China"
insert overwrite table table32
partition (city="shanghai", country="China")
select id, name, ip where t.city="shanghai" and country="China"
insert overwrite table table32
partition (city="beijing", country="China")
select id, name, ip where t.city="beijing" and country="China"; 注意要加;
建立test1.sh文件如下:
#!/bin/bash
#tablename="table1"
#limitcount="100"
#/usr/hive/hive-0.11.0/bin/hive -S -e "insert table re_table1 select * from ${tablename} cluster by id"
#echo $HIVE_HOME
#$HIVE_HOME/bin/hive -S -e "select * from table1 cluster by id" > /home/hadoop/hadoop/hadoop-1.2.1/test/re_s.txt
echo "hive start!"
$HIVE_HOME/bin/hive -S -f /home/shell/test1.sql
echo "hive ok!"
13 多表连接查询
导入数据到hdfs
[root@centos2 hadoop-1.2.1]# bin/hadoop fs -put /home/hadoop/hadoop/hadoop-1.2.1/test/table351.txt /data/table/table351
[root@centos2 hadoop-1.2.1]# bin/hadoop fs -put /home/hadoop/hadoop/hadoop-1.2.1/test/table352.txt /data/table/table352
[root@centos2 hadoop-1.2.1]# bin/hadoop fs -put /home/hadoop/hadoop/hadoop-1.2.1/test/table353.txt /data/table/table353
hive> select * from table351 a join table352 b on a.id=b.id;输出a和b表的数据,b的数据添加在a的后面
hive> select a.* from table351 a join table352 b on a.id=b.id;只显示a表的数据
hive> select a.* from table351 a join table352 b on a.id=b.id
> join table353 c on c.id=a.id;
14 hadoop/hive
(责任编辑:IT)
1 将排序结果插入到新文件中 hive> insert overwrite table re_table1 select * from table1 cluster by id; 2 在shell中使用hive $HIVE_HOME/bin/hive -S -e "select * from table1 cluster by id" > /home/hadoop/hadoop/hadoop-1.2.1/test/re_s.txt 3 hive中操作hadoop命令 hive> dfs -ls /data; 4 hive> select * from table1 cluster by school limit 10; 不能写成hive> select * from table1 limit 10 cluster by school; 5 hive中打印出字段名 a 可以直接在hive中添加命令:set hive.cli.print.header=true b 在$HOME/.hiverc文件中添加配置:hive.cli.print.header=true(这种形式使用hive会报错并退出) 6 hive去重 hive> select id, concat_ws(',', collect_set(school)) from table1 group by id; 在这里CONCAT_WS是UDF,COLLECT_SET是UDAF,它将group后的id去重,并转换为了array,方便udf使用。 7 查看数据库信息 hive> describe database default; 8 显示是哪个数据库 hive> set hive.cli.print.current.db=true; 9 hive执行按严格格式进行的设置 hive> set hive.mapred.mode=strict 10 创建外部分区表(创建表前,外部路径文件夹下不能有文件,或者不能创建外部文件夹,需要先创建表再创建外部文件夹) hive> create external table table32(id int, name string, ip string) > comment 'table32' > partitioned by (city string, country string) > row format delimited fields terminated by '\t' > location '/data/table32'; hive> select * from table321 where city='beijing' and country='China'; 将table321中筛选结果插入到table32中,table321无partition结构,table32有partition结构 hive> insert overwrite table table32 > partition (city='beijing', country='China') > select id,name,ip from table321 t where t.city='beijing' and t.country='China'; 11 多个partition选择写入多个分区 hive> from table321 t > insert overwrite table table32 > partition (city="shanxi", country="China") > select id, name, ip where t.city="shanxi" and country="China" > insert overwrite table table32 > partition (city="shanghai", country="China") > select id, name, ip where t.city="shanghai" and country="China" > insert overwrite table table32 > partition (city="beijing", country="China") > select id, name, ip where t.city="beijing" and country="China"; 12 在shell中使用-f运行hive多命令 建立test1.sql文件如下: use test; 注意这是一个hive语句所以要加; from table321 t insert overwrite table table32 partition (city="shanxi", country="China") select id, name, ip where t.city="shanxi" and country="China" insert overwrite table table32 partition (city="shanghai", country="China") select id, name, ip where t.city="shanghai" and country="China" insert overwrite table table32 partition (city="beijing", country="China") select id, name, ip where t.city="beijing" and country="China"; 注意要加; 建立test1.sh文件如下: #!/bin/bash #tablename="table1" #limitcount="100" #/usr/hive/hive-0.11.0/bin/hive -S -e "insert table re_table1 select * from ${tablename} cluster by id" #echo $HIVE_HOME #$HIVE_HOME/bin/hive -S -e "select * from table1 cluster by id" > /home/hadoop/hadoop/hadoop-1.2.1/test/re_s.txt echo "hive start!" $HIVE_HOME/bin/hive -S -f /home/shell/test1.sql echo "hive ok!" 13 多表连接查询 导入数据到hdfs [root@centos2 hadoop-1.2.1]# bin/hadoop fs -put /home/hadoop/hadoop/hadoop-1.2.1/test/table351.txt /data/table/table351 [root@centos2 hadoop-1.2.1]# bin/hadoop fs -put /home/hadoop/hadoop/hadoop-1.2.1/test/table352.txt /data/table/table352 [root@centos2 hadoop-1.2.1]# bin/hadoop fs -put /home/hadoop/hadoop/hadoop-1.2.1/test/table353.txt /data/table/table353 hive> select * from table351 a join table352 b on a.id=b.id;输出a和b表的数据,b的数据添加在a的后面 hive> select a.* from table351 a join table352 b on a.id=b.id;只显示a表的数据 hive> select a.* from table351 a join table352 b on a.id=b.id > join table353 c on c.id=a.id; 14 hadoop/hive (责任编辑:IT) |