tar -zxf tar -zxvf sqoop-1.4.6-cdh5.14.2.tar.gz //解压文件夹 mv sqoop-1.4.6-cdh5.14.2 /soft/sqoop146 //移动文件夹到文件目录下 cd /opt/soft/sqoop146/conf //进入目录下 cp sqoop-env-template.sh sqoop-env.sh //拷贝文件修改名称2.编辑文件 vim sqoop-env.sh 插入以下文件配置
export HADOOP_COMMON_HOME=/opt/soft/hadoop260 export HADOOP_MAPRED_HOME=/opt/soft/hadoop260 export Hbase_HOME=/opt/soft/hbase120 export HIVE_HOME=/opt/soft/hive110 export ZOOCFGDIR=/opt/soft/zk345/conf3.将sqoop需要连接数据库的jar包拷贝到安装目录的lib目录下
命令: cp /opt/soft/hadoop260/share/hadoop/common/hadoop-common-2.6.0-cdh5.14.2.jar /opt/soft/sqoop146/lib/ cp /opt/soft/hadoop260/share/hadoop/hdfs/hadoop-hdfs-2.6.0-cdh5.14.2.jar /opt/soft/sqoop146/lib/ cp /opt/soft/hadoop260/share/hadoop/mapreduce/hadoop-mapreduce-client-core-2.6.0-cdh5.14.2.jar /opt/soft/sqoop146/lib/ cp /opt/soft/hive110/lib/hive-jdbc-1.1.0-cdh5.14.2.jar /opt/soft/sqoop146/lib/ cp /opt/soft/hive110/lib/hive-jdbc-1.1.0-cdh5.14.2-standalone.jar /opt/soft/sqoop146/lib/ cp mysql-connector-java-5.1.25.jar /opt/soft/sqoop146/lib/ cp ojdbc6-11.2.0.1.0.jar /opt/soft/sqoop146/lib/ cp java-json.jar /opt/soft/sqoop146/lib/4.配置环境变量
#sqoop envirment
export SQOOP_HOME=/opt/soft/sqoop146
export PATH=$PATH:$SQOOP_HOME/bin
source /etc/profile 重新编译
5.启动sqoopsqoop version
启动成功
二.sqoop的使用 1、mysql->hdfs,把mysql(RDBMS)的数据导入到hdfssqoop import --connect jdbc:mysql://192.168.221.140:3306/company --username root --password kb10 --table staff --target-dir '/kb13/test' --delete-target-dir --num-mappers 1 --fields-terminated-by 't'mysql —— hdfs (条件导入):
sqoop import --connect jdbc:mysql://192.168.1.101:3306/mydemo --username root --password root --target-dir '/kb13/students' --delete-target-dir --num-mappers 1 --fields-terminated-by ',' --query 'select * from students where age<70 and $CONDITIONS;'2、hdfs->mysql,使用sqoop把hdfs里的文件导出到mysql(RDBMS)里
需要现在mysql里新建一个表,这里新建了一个数据库company,新建表的名字叫hive_staff
a).在MySQL创建需要的表结构:
sqoop export --connect jdbc:mysql://192.168.221.140:3306/company --username root --password kb10 --table hive_staff --num-mappers 1 --export-dir '/kb13/test' --input-fields-terminated-by 't'3、mysql->hive,从mysql(RDBMS)中导入到HIVE
该过程分为两步,第一步将数据导入到 HDFS,第二步将导入到 HDFS 的数据迁移到Hive 仓库,第一步默认的临时目录暂时未知,需要再研究下
sqoop import --connect jdbc:mysql://192.168.1.101:3306/mydemo --username root --password root --table students --num-mappers 1 --hive-import --fields-terminated-by ',' --hive-overwrite --delete-target-dir --hive-table mydemo.studentshive —— mysql
sqoop export --connect jdbc:mysql://192.168.1.101:3306/mydemo --username root --password root --table stu1 --export-dir '/hive110/warehouse/mydemo.db/students' --num-mappers 1 --input-fields-terminated-by ','hbase->hive,hbase导出到hive
这个步骤不需要连接操作mysql,但需要提前把hive和hbase里的jar包互相拷贝,相应的jar包如下(在hive的bin目录下):
#hive的lib下面的jar包拷到hbase的lib下面
[root@chust01 lib]# cp hive-hbase-handler-1.1.0-cdh5.14.2.jar /opt/software/hadoop/hbase120/lib/
#hbase的lib下面的jar包拷到hive的lib下面
[root@chust01 lib]# cp /opt/software/hadoop/hbase120/lib/hbase-client-1.2.0-cdh5.14.2.jar ./
[root@chust01 lib]# cp /opt/software/hadoop/hbase120/lib/hbase-hadoop2-compat-1.2.0-cdh5.14.2.jar ./
[root@chust01 lib]# cp /opt/software/hadoop/hbase120/lib/hbase-hadoop-compat-1.2.0-cdh5.14.2.jar ./
[root@chust01 lib]# cp /opt/software/hadoop/hbase120/lib/hbase-it-1.2.0-cdh5.14.2.jar ./
[root@chust01 lib]# cp /opt/software/hadoop/hbase120/lib/hbase-server-1.2.0-cdh5.14.2.jar ./
这个操作不需要连接mysql数据库,直接在hive里创建表结构,然后把hbase的数据映射过去即可
create external table hbase_student(sid int,student_id int,course_id int,score int)
stored by 'org.apache.hadoop.hive.hbase.HbaseStorageHandler'
with serdeproperties("hbase.columns.mapping" = ":key,scores:student_id,scores:course_id,scores:score")
tblproperties("hbase.table.name" = "kb10:mysql_stu")



