#检查是否有lzop命令 [wangly@hadoop000 ~]$ which lzop #若没有执行如下安装命令 [wangly@hadoop000 ~]$ yum install -y svn ncurses-devel [wangly@hadoop000 ~]$ yum install -y gcc gcc-c++ make cmake [wangly@hadoop000 ~]$ yum install -y openssl openssl-devel svn ncurses-devel zlib-devel libtool [wangly@hadoop000 ~]$ yum install -y lzo lzo-devel lzop autoconf automake cmake2.准备测试数据并且使用lzo压缩
[wangly@hadoop000 lzo]$ vi user_lzo.txt #lzo压缩:lzop -v file lzo解压:lzop -dv file [wangly@hadoop000 lzo]$ lzop -v user_lzo.txt henren,18,girl haungtian,20,man qingdi,22,man3.hadoop-lzo编译
4maven编译 4.2改hadoop版本hadoop-lzo的源码在GitHub上是开源的,源码地址:GitHub - twitter/hadoop-lzo: Refactored version of code.google.com/hadoop-gpl-compression for hadoop 0.20
我这里hadoop是3.2.2版本
[root@hadoop000 hadoop-lzo-master]# vi pom.xml4.3编译UTF-8 3.2.2 1.0.4
[root@hadoop000 hadoop-lzo-master]# mvn clean package -Dmaven.test.skip=true #进入编译好的目录 [root@hadoop000 hadoop-lzo-master]# cd target/ [root@hadoop000 target]# ll total 436 ... -rw-r--r-- 1 root root 199669 Jan 15 22:11 hadoop-lzo-0.4.21-SNAPSHOT.jar ...4.4拷贝编译好的lzo文件到hadoop
[root@hadoop000 target]# cp hadoop-lzo-0.4.21-SNAPSHOT.jar hadoop/share/hadoop/common/5.改hadoop配置文件 core-site.xml 添加
#配置压缩类mapred-site.xml添加io.compression.codecs org.apache.hadoop.io.compress.GzipCodec, org.apache.hadoop.io.compress.DefaultCodec, org.apache.hadoop.io.compress.BZip2Codec, org.apache.hadoop.io.compress.SnappyCodec, com.hadoop.compression.lzo.LzoCodec, com.hadoop.compression.lzo.LzopCodec io.compression.codec.lzo.class com.hadoop.compression.lzo.LzoCodec
#map输出压缩mapred.compress.map.output true #reduce输出压缩 mapred.map.output.compression.codec com.hadoop.compression.lzo.LzoCodec mapreduce.output.fileoutputformat.compress true mapreduce.output.fileoutputformat.compress.codec org.apache.hadoop.io.compress.BZip2Codec
集群模式同步到其他节点
6. 测试hive表#创建表
hive (wangly)> create table user_lzo(
> name string,
> age int,
> gender string
> ) row format delimited fields terminated by ','
> STORED AS INPUTFORMAT "com.hadoop.mapred.DeprecatedLzoTextInputFormat"
> OUTPUTFORMAT "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat";
#加载数据--数据是上边lzo压缩的数据
hive (wangly)> load data local inpath'/home/wangly/data/lzo/user_lzo.txt.lzo' into table user_lzo;
#简单查询测试
hive (wangly)> select * from user_lzo;
OK
user_lzo.name user_lzo.age user_lzo.gender
henren 18 girl
haungtian 20 man
qingdi 22 man



