镜像文件下载地址:https://dlcdn.apache.org/hadoop/common/hadoop-3.2.2/
hadoop3.2.2文档:https://hadoop.apache.org/docs/r3.2.2/
hadoop3.2.2集群部署
1.环境
redhat7.5+hadoop3.2.2+jdk1.81+
2.iso镜像挂载 0_master_local_yum.sh
#!/bin/sh
base_path=$(cd `dirname $0`; pwd)
iso_path=${base_path}/iso/rhel-server-7.5-x86_64-dvd.iso
mkdir -p /mnt/cdrom|mount -t iso9660 -o,loop ${iso_path} /mnt/cdrom/;
touch /etc/yum.repos.d/local_os.repo;
echo '[local_iso]' >> /etc/yum.repos.d/local_os.repo;
echo 'name=CentOS-$releasever - Media' >> /etc/yum.repos.d/local_os.repo;
echo 'baseurl=file:///mnt/cdrom' >> /etc/yum.repos.d/local_os.repo;
echo 'gpgcheck=0' >> /etc/yum.repos.d/local_os.repo;
echo 'enabled=1' >> /etc/yum.repos.d/local_os.repo
echo "镜像挂载文件是5099则正确"
yum repolist
echo "Complete!"
3.解压hadoop3.2.2
echo "解压hadoop文件" tar -zxvf hadoop-3.2.2.tar.gz
4.防火墙相关命令(此处需要关闭防火墙)
添加端口 firewall-cmd --zone=public --add-port=9091/tcp --permanent 移除端口 firewall-cmd --remove-port=9091/tcp --permanent 查看开放端口 firewall-cmd --zone=public --list-ports 重载端口 firewall-cmd --reload 查看防火墙状态 firewall-cmd --state systemctl status firewalld 关闭防火墙 systemctl stop firewalld service firewalld stop 开启防火墙 systemctl start firewalld service firewalld start 配置文件 vim /etc/selinux/config 设置 SELINUX=disabled
5.jdk安装 运行脚本 ./jdkinstall jdk-8u181-linux-x64.tar.gz 脚本内容如下
#!/bin/sh
echo '解压jdk-8u181-linux-x64.tar.gz包...'
tar -xvf $1
echo '将jdk1.8.0_181目录移到/usr/local/java/目录下...'
mkdir /usr/local/java
cp -r jdk1.8.0_181 /usr/local/java/
echo '删除jdk1.8.0_181'
rm -rf jdk1.8.0_181
echo '设置环境变量'
echo '#JAVA_HOME' >> /etc/profile
echo 'export JAVA_HOME=/usr/local/java/jdk1.8.0_181' >> /etc/profile
echo 'export JRE_HOME=${JAVA_HOME}/jre' >> /etc/profile
echo 'export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib' >> /etc/profile
echo 'export PATH=${JAVA_HOME}/bin:$PATH' >> /etc/profile
echo '刷新环境变量'
source /etc/profile
echo 'java安装成功...'
java -version
6.hadoop环境变量
echo '#HADOOP_HOME' >> /etc/profile
echo 'export HADOOP_HOME=/apps/bigdata/hadoop-3.2.2' >> /etc/profile
echo 'export PATH=$PATH:${HADOOP_HOME}/bin' >> /etc/profile
echo 'export PATH=$PATH:${HADOOP_HOME}/sbin' >> /etc/profile
echo "刷新环境变量"
source /etc/profile
7.分发文件
同步jdk文件到Hadoop2,3上 scp -r /usr/local/java root@hadoop2:/usr/local/java scp -r /etc/profile root@hadoop2:/etc/profile scp -r /usr/local/java root@hadoop3:/usr/local/java scp -r /etc/profile root@hadoop3:/etc/profile 同步hadoop文件到Hadoop2,3上 mkdir -p /apps/bigdata scp -r /apps/bigdata/hadoop-3.2.2 root@hadoop2:/apps/bigdata/hadoop-3.2.2 scp -r /apps/bigdata/hadoop-3.2.2 root@hadoop3:/apps/bigdata/hadoop-3.2.2 或者同步语法 rsync -rvl 其他的一致
8.xsync同步脚本
#!/bin/bash #1 获取输入参数个数,如果没有参数,直接退出 pcount=$# if((pcount==0)); then echo no args; exit; fi #2 获取文件名称 p1=$1 fname=`basename $p1` echo fname=$fname #3 获取上级目录到绝对路径 pdir=`cd -P $(dirname $p1); pwd` echo pdir=$pdir #4 获取当前用户名称 user=`whoami` #5 循环,这里host根据自己的节点数和主机名设置 for((host=2; host<4; host++)); do echo --------------- hadoop$host ---------------- rsync -rvl $pdir/$fname $user@hadoop$host:$pdir done 一般放置在/usr/local/bin目录下 chmod 777 xsync 例如:xsync 文件名
9.集群配置
核心配置文件:core-site.xml
参考配置文件地址:https://hadoop.apache.org/docs/r3.2.2/hadoop-project-dist/hadoop-common/core-default.xml
fs.defaultFS hdfs://hadoop1:9000 hadoop.tmp.dir file:/apps/bigdata/hadoop-3.2.2/data/tmp
配置文件:hadoop-env.sh
export JAVA_HOME=/usr/local/java/jdk1.8.0_181
配置文件:hdfs-site.xml
参考配置文件地址:https://hadoop.apache.org/docs/r3.2.2/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml
dfs.namenode.http-address hadoop1:9870 dfs namenode web ui 将侦听的地址和基本端口。 dfs.namenode.name.dir file:/apps/bigdata/hadoop-3.2.2/data/dfs/name 在本地文件系统上,namenode持久化存储命名空间和事务日志的路径。 dfs.datanode.data.dir file:/apps/bigdata/hadoop-3.2.2/data/dfs/data DataNode的本地文件系统上应该存储块的逗号分隔的路径列表。 dfs.namenode.secondary.http-address hadoop3:9868 辅助名称节点 http 服务器地址和端口。
配置文件:yarn-env.sh
export JAVA_HOME=/usr/local/java/jdk1.8.0_181
配置文件:yarn-site.xml
参考配置文件地址:https://hadoop.apache.org/docs/r3.2.2/hadoop-yarn/hadoop-yarn-common/yarn-default.xml
yarn.resourcemanager.hostname hadoop2 yarn.nodemanager.aux-services mapreduce_shuffle yarn.log-aggregation-enable true yarn.log-aggregation.retain-seconds 604800
配置文件:mapred-env.sh
export JAVA_HOME=/usr/local/java/jdk1.8.0_181
配置文件:maperd-site.xml
参考配置文件地址:https://hadoop.apache.org/docs/r3.2.2/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml
mapreduce.framework.name yarn mapreduce.jobhistory.address hadoop2:10020 MapReduce JobHistory 服务器主机:端口 mapreduce.jobhistory.webapp.address hadoop2:19888 MapReduce JobHistory 服务器 Web UI主机:端口
hadoop 配置文件同步
xsync /apps/bigdata/hadoop-3.2.2/etc/hadoop/
sbin目录修改配置文件
start-dfs.sh和stop-dfs.sh
HDFS_DATANODE_USER=root HDFS_DATANODE_SECURE_USER=hdfs HDFS_NAMENODE_USER=root HDFS_SECONDARYNAMENODE_USER=root
start-yarn.sh和stop-yarn.sh
YARN_RESOURCEMANAGER_USER=root HADOOP_SECURE_DN_USER=yarn YARN_NODEMANAGER_USER=root
10.ssh配置
#hadoop1 找到.ssh目录 cd ~/.ssh 文件known_hosts包含相应的主机访问记录 不提示直接生成秘钥文件(官方文档的三步),公钥添加入authorized_keys文件,赋权 ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys chmod 0600 ~/.ssh/authorized_keys 或者提示生成秘钥文件 (enter回车三次) ssh-keygen -t rsa 公钥 id_rsa.pub 私钥 id_rsa 将公钥放入主机访问记录,拷贝到hadoop2,hadoop3 ssh-copy-id hadoop2 ssh-copy-id hadoop3 自己登陆自己免密 ssh-copy-id hadoop1 #hadoop2 ssh-keygen -t rsa ssh-copy-id hadoop2 ssh-copy-id hadoop3 ssh-copy-id hadoop1
11.单机启动
#hadoop1 /apps/bigdata/hadoop-3.2.2/sbin echo "格式化namenode" hdfs namenode -format ()此次一般不用) bin目录下的启动方式 cd /apps/bigdata/hadoop-3.2.2/sbin echo "启动hdfs" hdfs --daemon start namenode hdfs --daemon start datanode #Hadoop2 hdfs --daemon start datanode #hadoop3 hdfs --daemon start datanode
12.时间同步
hadoop1 查看ntp是否安装 rpm -qa|grep ntp 出现如下 ntpdate-4.2.6p5-28.el7.x86_64 fontpackages-filesystem-1.44-8.el7.noarch 安装ntp rpm -ivh autogen-libopts-5.18-5.el7.x86_64.rpm rpm -ivh ntp-4.2.6p5-29.el7.centos.2.x86_64.rpm --nodeps 修改配置文件 vi /etc/ntp.conf 授权网段192.168.1.0-255.255.255.0网段上的所有机器可以从这台机器上查询和时间同步 #restrict 192.168.1.0 mask 255.255.255.0 nomodify notrap restrict 192.168.1.0 mask 255.255.255.0 nomodify notrap 集群在局域网中,不使用其他互联网上的时间 server 0.centos.pool.ntp.org iburst server 1.centos.pool.ntp.org iburst server 2.centos.pool.ntp.org iburst server 3.centos.pool.ntp.org iburst 注释 #server 0.centos.pool.ntp.org iburst #server 1.centos.pool.ntp.org iburst #server 2.centos.pool.ntp.org iburst #server 3.centos.pool.ntp.org iburst 当节点丢失网络连接,依然可以采用本地时间作为时间服务器为集群中的其他节点提供时间同步 server127.127.1.0 fudge 127.127.1.0 stratum 10 修改/etc/sysconfig/ntpd 硬件时间与系统时间一起同步 SYNC_HWCLOCK=yes ntpd启动 systemctl start ntpd 查询状态 systemctl status ntpd 设置开机自启 systemctl enable ntpd 其他机器配置5分钟与时间服务器同步一次 hadoop2,hadoop3 定时器编辑 crontab -e */5 * * * * /usr/sbin/ntpdate hadoop1 定时器查看 crontab -l
13.集群启动
/apps/bigdata/hadoop-3.2.2/etc/hadoop
编辑集群配置文件:workers
hadoop1 hadoop2 hadoop3
分发脚本
xsync workers
hadoop1
启动集群namenode,datanode
cd /apps/bigdata/hadoop-3.2.2/sbin ./start-dfs.sh jps查看服务进程
hadoop2
启动resourcemanager ./start-yarn.sh 启动JobHistoryServer ./mr-jobhistory-daemon.sh start historyserver 启动日志job查看
查看页面
NameNode:http://192.168.189.10:9870/ 默认端口:9870
ResourceManager: http://192.168.189.11:8088/ 默认端口8088
MapReduce JobHistory Server: http://192.168.189.11:19888 默认端口:19888
!



