栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 前沿技术 > 大数据 > 大数据系统

02Hadoop学习笔记-Hive入门

02Hadoop学习笔记-Hive入门

Hive

hive是基于Hadoop的一个数据仓库软件,主要用于HDFS结构化文件按照规则解析映射为数据表。使用结构化语言Sql来操作数据仓库。

1 安装部署

卸载Centos7自带的mariadb

[root@hadoop-node01 ~]# rpm -qa|grep mariadb
mariadb-libs-5.5.65-1.el7.x86_64
[root@hadoop-node01 ~]# rpm -e mariadb-libs-5.5.64-1.el7.x86_64 --nodeps
错误:未安装软件包 mariadb-libs-5.5.64-1.el7.x86_64
[root@hadoop-node01 ~]# rpm -e mariadb-libs-5.5.65-1.el7.x86_64 --nodeps
[root@hadoop-node01 ~]# rpm -qa|grep mariadb

安装mysql:如果缺少依赖,使用yum安装缺失的依赖即可

mkdir /export/software/mysql

#上传mysql-5.7.29-1.el7.x86_64.rpm-bundle.tar 到上述文件夹下  解压
tar -zxvf mysql-5.7.29-1.el7.x86_64.rpm-bundle.tar

#执行安装
yum -y install libaio

[root@node3 mysql]# rpm -ivh mysql-community-common-5.7.29-1.el7.x86_64.rpm mysql-community-libs-5.7.29-1.el7.x86_64.rpm mysql-community-client-5.7.29-1.el7.x86_64.rpm mysql-community-server-5.7.29-1.el7.x86_64.rpm 

[root@hadoop-node01 mysql]# rpm -ivh mysql-community-common-5.7.29-1.el7.x86_64.rpm mysql-community-libs-5.7.29-1.el7.x86_64.rpm mysql-community-client-5.7.29-1.el7.x86_64.rpm mysql-community-server-5.7.29-1.el7.x86_64.rpm
警告:mysql-community-common-5.7.29-1.el7.x86_64.rpm: 头V3 DSA/SHA1 Signature, 密钥 ID 5072e1f5: NOKEY
准备中...                          ################################# [100%]
正在升级/安装...
   1:mysql-community-common-5.7.29-1.e################################# [ 25%]
   2:mysql-community-libs-5.7.29-1.el7################################# [ 50%]
   3:mysql-community-client-5.7.29-1.e################################# [ 75%]
   4:mysql-community-server-5.7.29-1.e################################# [100%]

mysql初始化

#初始化
mysqld --initialize

#更改所属组
chown mysql:mysql /var/lib/mysql -R

#启动mysql
systemctl start mysqld.service

#查看生成的临时root密码
cat  /var/log/mysqld.log

[Note] A temporary password is generated for root@localhost: o+TU+KDOm004

修改root密码 授权远程访问 设置开机自启动

[root@hadoop-node01 mysql]# mysql -u root -p
Enter password:
Welcome to the MySQL monitor.  Commands end with ; or g.
Your MySQL connection id is 2
Server version: 5.7.29

Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.

Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.

Type 'help;' or 'h' for help. Type 'c' to clear the current input statement.

mysql>  alter user user() identified by "root";
Query OK, 0 rows affected (0.03 sec)

mysql> use mysql;
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A

Database changed
mysql> GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY 'root' WITH GRANT OPTION;
Query OK, 0 rows affected, 1 warning (0.00 sec)

mysql> FLUSH PRIVILEGES;
Query OK, 0 rows affected (0.01 sec)

mysql> exit
Bye

查看mysql服务指令

[root@hadoop-node01 mysql]# systemctl status mysqld
● mysqld.service - MySQL Server
   Loaded: loaded (/usr/lib/systemd/system/mysqld.service; enabled; vendor preset: disabled)
   Active: active (running) since 一 2022-03-07 10:56:25 CST; 3min 39s ago
     Docs: man:mysqld(8)
           http://dev.mysql.com/doc/refman/en/using-systemd.html
  Process: 13989 ExecStart=/usr/sbin/mysqld --daemonize --pid-file=/var/run/mysqld/mysqld.pid $MYSQLD_OPTS (code=exited, status=0/SUCCESS)
  Process: 13971 ExecStartPre=/usr/bin/mysqld_pre_systemd (code=exited, status=0/SUCCESS)
 Main PID: 13992 (mysqld)
   CGroup: /system.slice/mysqld.service
           └─13992 /usr/sbin/mysqld --daemonize --pid-file=/var/run/mysqld/mysqld.pid

3月 07 10:56:23 hadoop-node01 systemd[1]: Starting MySQL Server...
3月 07 10:56:25 hadoop-node01 systemd[1]: Started MySQL Server.

开机自启

#建议设置为开机自启动服务
[root@node2 ~]# systemctl enable  mysqld                             
Created symlink from /etc/systemd/system/multi-user.target.wants/mysqld.service to /usr/lib/systemd/system/mysqld.service.

#查看是否已经设置自启动成功
[root@node2 ~]# systemctl list-unit-files | grep mysqld
mysqld.service                                enabled 

卸载服务!!按需要执行

#关闭mysql服务
systemctl stop mysqld.service

#查找安装mysql的rpm包
[root@node3 ~]# rpm -qa | grep -i mysql      
mysql-community-libs-5.7.29-1.el7.x86_64
mysql-community-common-5.7.29-1.el7.x86_64
mysql-community-client-5.7.29-1.el7.x86_64
mysql-community-server-5.7.29-1.el7.x86_64

#卸载
[root@node3 ~]# yum remove mysql-community-libs-5.7.29-1.el7.x86_64 mysql-community-common-5.7.29-1.el7.x86_64 mysql-community-client-5.7.29-1.el7.x86_64 mysql-community-server-5.7.29-1.el7.x86_64

#查看是否卸载干净
rpm -qa | grep -i mysql

#查找mysql相关目录 删除
[root@node1 ~]# find / -name mysql
/var/lib/mysql
/var/lib/mysql/mysql
/usr/share/mysql

[root@node1 ~]# rm -rf /var/lib/mysql
[root@node1 ~]# rm -rf /var/lib/mysql/mysql
[root@node1 ~]# rm -rf /usr/share/mysql

#删除默认配置 日志
rm -rf /etc/my.cnf 
rm -rf /var/log/mysqld.log

Hive的安装

上传安装包 解压

tar -xvf apache-hive-3.1.2-bin.tar.gz

解决Hive与Hadoop之间guava版本差异

cd /export/server/apache-hive-3.1.2-bin/
rm -rf lib/guava-19.0.jar
cp /export/server/hadoop-3.3.0/share/hadoop/common/lib/guava-27.0-jre.jar ./lib/

修改配置文件

hive-env.sh

cd /export/server/apache-hive-3.1.2-bin/conf
mv hive-env.sh.template hive-env.sh

vim hive-env.sh
export HADOOP_HOME=/export/server/hadoop-3.3.0
export HIVE_CONF_DIR=/export/server/apache-hive-3.1.2-bin/conf
export HIVE_AUX_JARS_PATH=/export/server/apache-hive-3.1.2-bin/lib

hive-site.xml

vi hive-site.xml



	javax.jdo.option.ConnectionURL
	jdbc:mysql://hadoop-node01:3306/hive3?createDatabaseIfNotExist=true&useSSL=false&useUnicode=true&characterEncoding=UTF-8



	javax.jdo.option.ConnectionDriverName
	com.mysql.jdbc.Driver



	javax.jdo.option.ConnectionUserName
	root



	javax.jdo.option.ConnectionPassword
	hadoop




    hive.server2.thrift.bind.host
    hadoop-node01




    hive.metastore.uris
    thrift://hadoop-node01:9083


 

    hive.metastore.event.db.notification.api.auth
    false


上传mysql jdbc驱动到hive安装包lib下

mysql-connector-java-5.1.32.jar

初始化元数据

cd /export/server/apache-hive-3.1.2-bin/

bin/schematool -initSchema -dbType mysql -verbos
#初始化成功会在mysql中创建74张表


在hdfs创建hive存储目录(如存在则不用操作)

hadoop fs -mkdir /tmp
hadoop fs -mkdir -p /user/hive/warehouse
hadoop fs -chmod g+w /tmp
hadoop fs -chmod g+w /user/hive/warehouse
2 启动hive

1、启动metastore服务

#前台启动  关闭ctrl+c
/export/server/apache-hive-3.1.2-bin/bin/hive --service metastore

#前台启动开启debug日志
/export/server/apache-hive-3.1.2-bin/bin/hive --service metastore --hiveconf hive.root.logger=DEBUG,console  

#后台启动 进程挂起  关闭使用jps+ kill -9
nohup /export/server/apache-hive-3.1.2-bin/bin/hive --service metastore &

启动二代客户端

首先启动hiveserver2服务然后启动beeline客户端

  nohup /export/server/apache-hive-3.1.2-bin/bin/hive --service hiveserver2 &

  #注意 启动hiveserver2需要一定的时间  不要启动之后立即beeline连接 可能连接不上

为了模拟远程,我们把安装包复制到第三台机器上

scp -r /export/server/apache-hive-3.1.2-bin/ root@hadoop-node03:/export/server/

链接

[root@hadoop-node03 ~]# /export/server/apache-hive-3.1.2-bin/bin/beeline
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/export/server/apache-hive-3.1.2-bin/lib/log4j-slf4j-impl-2.10.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/export/server/hadoop-3.3.0/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/export/server/apache-hive-3.1.2-bin/lib/log4j-slf4j-impl-2.10.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/export/server/hadoop-3.3.0/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
Beeline version 3.1.2 by Apache Hive
beeline>  ! connect jdbc:hive2://hadoop-node01:10000
Connecting to jdbc:hive2://hadoop-node01:10000
Enter username for jdbc:hive2://hadoop-node01:10000: root
Enter password for jdbc:hive2://hadoop-node01:10000:没有密码直接回车
Connected to: Apache Hive (version 3.1.2)
Driver: Hive JDBC (version 3.1.2)
Transaction isolation: TRANSACTION_REPEATABLE_READ
0: jdbc:hive2://hadoop-node01:10000>
3 使用DG连接


编辑数据源信息

4 创建数据库
-- 1.查看数据库
show databases ;
-- 2.创建数据库
create database itmark1;
-- 3.user切换数据库
use itmark1;
-- 4.drop database db名称
drop database itmark1;

发现创建的表在HDFS中是有映射的

建表并上传数据集

create table t_archer(
id int comment "ID",
name string comment "英雄名称",
hp_max int comment "最大生命",
mp_max int comment "最大法力",
attack_max int comment "最高物攻",
defense_max int comment "最大物防",
attack_range string comment "攻击范围",
role_main string comment "主要定位",
role_assist string comment "次要定位"
) comment "王者荣耀射手信息"
row format delimited
fields terminated by "t";
1	后羿	5986	1784	396	336	remotely	archer
2	马可波罗	5584	200	362	344	remotely	archer
3	鲁班七号	5989	1756	400	323	remotely	archer
4	李元芳	5725	1770	396	340	remotely	archer
5	孙尚香	6014	1756	411	346	remotely	archer
6	黄忠	5898	1784	403	319	remotely	archer
7	狄仁杰	5710	1770	376	338	remotely	archer
8	虞姬	5669	1770	407	329	remotely	archer
9	成吉思汗	5799	1742	394	329	remotely	archer
10	百里守约	5611	1784	410	329	remotely	archer	assassin


查询

select * from t_archer;

转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/758558.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号