tar -zxvf sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz
mv sqoop-1.4.7.bin__hadoop-2.6.0 sqoop
2.Sqoop 的配置文件 cp sqoop-env.sh。
[root@master ~]# cd /opt/sqoop/conf/ [root@master conf]# cp sqoop-env-template.sh sqoop-env.sh cp sqoop-env-template.sh sqoop-env.sh vi sqoop-env.sh [root@master conf]# cp sqoop-env-template.sh sqoop-env.sh [root@master conf]# vi sqoop-env.sh export HADOOP_COMMON_HOME=/opt/hadoop export HADOOP_MAPRED_HOME=/opt/hadoop export Hbase_HOME=/opt/hbase export HIVE_HOME=/opt/hive3.vi /etc/profile
[root@master conf]# vi /etc/profile #在文件末尾添加 # set sqoop environment export SQOOP_HOME=/opt/sqoop export PATH=$PATH:$SQOOP_HOME/bin export CLASSPATH=$CLASSPATH:$SQOOP_HOME/lib4.连接数据库
[root@master ~]# cp /opt/mysql-connector-java-5.1.46.jar /opt/sqoop/lib/5.执行 Sqoop 前需要先启动 Hadoop 集群。
在 master 节点切换到 hadoop 用户执行 start-all.sh 命令启动 Hadoop 集群。 [root@master ~]# su - hadoop [hadoop@master ~]$ source /etc/profile [hadoop@master ~]$ start-all.sh 1.4.3.2. 步骤二:检查 Hadoop 集群的运行状态。 [hadoop@master ~]$ jps 1457 NameNode 1795 ResourceManager 2060 Jps 1646 SecondaryNameNode6.步骤三:测试 Sqoop 是否能够正常连接 MySQL 数据库。
Sqoop 连接 MySQL 数据库 P 大写 密码 Password123$ [hadoop@master ~]$ sqoop list-databases --connect jdbc:mysql://127.0.0.1:3306/ --username root -P Warning: /home/hadoop/sqoop/../hcatalog does not exist! HCatalog jobs will fail. Please set $HCAT_HOME to the root of your HCatalog installation. Warning: /home/hadoop/sqoop/../accumulo does not exist! Accumulo imports will fail. Please set $ACCUMULO_HOME to the root of your Accumulo installation. Warning: /home/hadoop/sqoop/../zookeeper does not exist! Accumulo imports will fail. Please set $ZOOKEEPER_HOME to the root of your Zookeeper installation. 19/04/22 18:54:10 INFO sqoop.Sqoop: Running Sqoop version: 1.4.7 Enter password: # 此处需要输入 mysql 数据库的密码 19/04/22 18:54:14 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset. information_schema hive mysql performance_schema sys
7.连接hive
为了使 Sqoop 能够连接 Hive,需要将 hive 组件/usr/local/src/hive/lib 目录下hive-common-2.0.0.jar 也放入 Sqoop 安装路径的 lib 目录中。
[hadoop@master ~] cp /opt/hive-common-2.0.0.jar /opt/sqoop/lib/
cp hive-common-2.0.0.jar /opt/sqoop/lib/
实验任务四:Sqoop 模板命令
步骤一:创建 MySQL 数据库和数据表。
# 登录 MySQL 数据库
[hadoop@master ~]$ mysql -uroot -p
Enter password:
# 创建 sample 库
mysql> create database sample;
Query OK, 1 row affected (0.00 sec)
# 使用 sample 库
mysql> use sample;
Database changed
mysql> create table student(number char(9) primary key, name varchar(10));
Query OK, 0 rows affected (0.01 sec) # 创建 student 表,该数据表有
number 学号和 name 姓名两个字段
# 向 student 表插入几条数据
mysql> insert into student values('01','zhangsan');
Query OK, 1 row affected (0.05 sec)
mysql> insert into student values('02','lisi');
Query OK, 1 row affected (0.01 sec)
mysql> insert into student values('03','wangwu');
Query OK, 1 row affected (0.00 sec)
mysql>
mysql>
# 查询 student 表的数据
mysql> select * from student;
| number | name |
+--------+----------+
| 01 | zhangsan |
| 02 | lisi |
| 03 | wangwu |
+--------+----------+
3 rows in set (0.00 sec)
mysql>
mysql> exit
在 Hive 中创建 sample 数据库和 student 数据表。
在 Hive 中创建 sample 数据库和 student 数据表。 [hadoop@master ~]$ hive # 启动 hive 命令行 Logging initialized using configuration in jar:file:/usr/local/src/hive/lib/hive-common-1.1.0.jar!/hivelog4j.properties SLF4J: Class path contains multiple SLF4J bindings. SLF4J: Found binding in [jar:file:/usr/hadoop/share/hadoop/common/lib/slf4j-log4j12- 1.7.5.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: Found binding in [jar:file:/usr/local/src/hive/lib/hivejdbc-1.1.0- standalone.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory] hive> create database sample; # 创建 sample 库 OK Time taken: 0.679 seconds hive> show databases; # 查询所有数据库 OK default Time taken: 0.178 seconds, Fetched: 2 row(s) hive> use sample; # 使用 sample 库 OK hive> create table student(number STRING, name STRING); row format delimited fields terminated by "|" stored as textfile; # 创建 student 表 OK hive> exit; # 退出 hive 命令行步骤三:从 MySQL 导出数据,导入 Hive。
[hadoop@master ~]$ sqoop import --connect jdbc:mysql://master:3306/sample --username root --password Password123$ --table student --fields-terminated-by '|' --delete-target-dir --num-mappers 1 --hive-import --hive-database sample --hive-table student 删除 MySQL 数据, student 表中 number 为主键,添加信息导致主键重复,报错,所以删除表数据 [hadoop@master ~]$mysql -u root -p #密码 Password123$ mysql>use sample; mysql>delete from student; mysql>exit;步骤四:从 Hive 导出数据,导入到 MySQL。
sqoop export --connect "jdbc:mysql://master:3306/sample?useUnicode=true&characterEncoding=utf-8" -- username root --password Password123$ --table student --input-fields-terminated-by '|' --export-dir /user/hive/warehouse/sample.db/student/*实验任务五:Sqoop 组件应用
sqoop list-databases -connect jdbc:mysql://localhost:3306/ -username root -password Password123$
sqoop list-tables -connect jdbc:mysql://localhost:3306/sample -username root -password Password123$
sqoop create-hive-table -connect jdbc:mysql://localhost:3306/sample -table student -username root -password Password123$ -hive-table test
sqoop import --connect jdbc:mysql://master:3306/sample --username root --password Password123$ --table student --delete-target-dir --num-mappers 1 --hive-import --hive-database default --hive-table test
(5)将 Hive 中的表数据导入到 MySQL 中,在进行导入之前,MySQL 中的表
hive_test 表必须已经提前创建好。
删除 MySQL 数据, student 表中 number 为主键,添加信息导致主键重复,报错,所以删除表数据
[hadoop@master ~]$mysql -u root -p #密码 Password123$
mysql>use sample;
mysql>delete from student;
mysql>exit;
sqoop export -connect jdbc:mysql://master:3306/sample -username root -password Password123$ -table student --input-fields-terminated-by ' 01' -export-dir /user/hive/warehouse/test
sqoop import -connect jdbc:mysql://master:3306/sample -username root -password Password123$ -table student --num-mappers 1 -target-dir /user/test
(7)从数据库增量导入表数据到 HDFS 中。
#密码 Password123$
[hadoop@master ~]$mysql -u root -p
mysql>use sample;
mysql> insert into student values('04','sss');
mysql> insert into student values('05','ss2');
mysql> insert into student values('06','ss3');
#非数值型的值不能当做增量
mysql> alter table student modify column number int;
mysql> exit;
sqoop import -connect jdbc:mysql://master:3306/sample -username root -password Password123$ -table student --num-mappers 1 -target-dir /user/test -check-column number -incremental append -last-value 0
查看导入数据
[hadoop@master ~]$hdfs dfs -cat /user/test/part-m-00000
[hadoop@master ~]$hdfs dfs -cat /user/test/part-m-00001



