hadoop@ubuntu:sudo apt-get install open-vm-tools-desktop -y2、安装vim编辑器
hadoop@ubuntu:sudo apt install vim2、安装jdk 1、解压安装包
hadoop@ubuntu:~$ sudo tar -zxvf jdk-8u171-linux-x64.tar.gz -C /usr/local2、修改环境变量
hadoop@ubuntu:~$ sudo vim ~/.bashrc #JAVA export JAVA_HOME=/usr/local/jdk1.8.0_121 PATH=$PATH:$JAVA_HOME/bin3、环境变量生效
hadoop@ubuntu:~$ source ~/.bashrc4、查看jdk是否安装成功
hadoop@ubuntu:~$ java -version java version "1.8.0_121" Java(TM) SE Runtime Environment (build 1.8.0_121-b13) Java HotSpot(TM) 64-Bit Server VM (build 25.121-b13, mixed mode)3、安装hadoop 1、解压安装包
hadoop@ubuntu:~$ sudo tar -zxvf hadoop-2.7.7.tar.gz -C /usr/local2、修改环境变量
hadoop@ubuntu:~$ sudo vim ~/.bashrc #Hadoop export HADOOP_HOME=/usr/local/hadoop-2.7.7 PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin3、环境变量生效
hadoop@ubuntu:~$ source ~/.bashrc4、克隆两个子节点
jdk和hadoop无需安装配置
5、配置主机名、静态IP地址、主机名和ip映射 1、以root用户登录,分别修改3台虚拟的主机名伟maser、slave1、slave21、//第1台虚机主机名改为master
root@ubuntu:~# vi /etc/hostname master
2、//第2台虚机主机名改为slave 1
root@ubuntu:~# vi /etc/hostname slave1 root@ubuntu:~# reboot
3、//第3台虚机主机名改为slave2
root@ubuntu:~# vi /etc/hostname slave2 root@ubuntu:~# reboot2、以root用户登录,设置静态ip地址(3个节点都要配置)
1、//以mster为例,修改网络配置文件,方法如下:
root@master:~# vim /etc/netplan/01-network-manager-all.yaml
# Let NetworkManager manage all devices on this system
#network:
# version: 2
# renderer: NetworkManager
network:
ethernets:
ens33: # 配置的网卡名称
dhcp4: no # 关闭dhcp4
dhcp6: no # 关闭dhcp6
addresses: [192.168.126.143/24] # 设置本机IP地址及掩码
gateway4: 192.168.126.2 # 设置网关
nameservers:
addresses: [192.168.126.2, 114.114.114.114, 8.8.8.8] # 设置DNS
version: 2
注意:网关需与电脑主机vmnet8中网关相同
2、配置生效
root@master:~# netplan apply3、配置ip与主机名的映射关系
//以root用户的登录,分别在3台虚机上配置ip与主机名的映射关系
root@master:~# vi /etc/hosts 127.0.0.1 localhost 192.168.126.143 master 192.168.126.146 slave1 192.168.126.147 slave2 root@slave1:~# vi /etc/hosts 127.0.0.1 localhost 192.168.126.143 master 192.168.126.146 slave1 192.168.126.147 slave2 root@slave2:~# vi /etc/hosts 127.0.0.1 localhost 192.168.126.143 master 192.168.126.146 slave1 192.168.126.147 slave26、配置免密ssh 1、以root登录,每台linux主机上均打开22端口
hadoop@master:~$ su - root Password: root@ master:~# vim /etc/ssh/ssh_config port 222、每台linux主机安装openssh-server,并生成密钥对
//hadoop用户登录master root@master:~# su - hadoop hadoop@master:~$ sudo apt install openssh-server hadoop@master:~$ ssh-keygen -t rsa hadoop@master:~$ ls ~/.ssh id_rsa id_rsa.pub // hadoop用户登录slave1 root@slave1:~# su - hadoop hadoop@slave1:~$ sudo apt install openssh-server hadoop@slave1:~$ ssh-keygen -t rsa hadoop@slave1:~$ ls ~/.ssh id_rsa id_rsa.pub //hadoop用户登录slave2 root@slave2:~# su - hadoop hadoop@slave2:~$ sudo apt install openssh-server hadoop@slave2:~$ ssh-keygen -t rsa hadoop@slave2:~$ ls ~/.ssh id_rsa id_rsa.pub3、设置各节点免密登录
//hadoop用户登录master,在master节点上将公钥拷到一个特定文件authorized_keys中 hadoop@master:~/.ssh$ cp id_rsa.pub authorized_keys //将master节点的authorized_keys传至slave1节点 hadoop@master:~/.ssh$ scp /home/hadoop/.ssh/authorized_keys hadoop@slave1:/home/hadoop/.ssh/authorized_keys authorized_keys // hadoop用户登录slave1,将slave1节点的公钥追加到authorized_keys中 hadoop@slave1:~/.ssh$ cat id_rsa.pub >> authorized_keys //将slave1节点的authorized_keys传至slave2节点 hadoop@slave1:~/.ssh$ scp /home/hadoop/.ssh/authorized_keys hadoop@slave2:/home/hadoop/.ssh/authorized_keys // hadoop用户登录slave2,将slave2节点的公钥追加到authorized_keys中 hadoop@slave2:~/.ssh$ cat id_rsa.pub >> authorized_keys //将slave2节点中的authorized_keys分别传到slave1中 hadoop@slave2:~/.ssh$ scp /home/hadoop/.ssh/authorized_keys hadoop@slave1:/home/hadoop/.ssh/authorized_keys hadoop@slave1's password: authorized_keys 100% 1196 804.0KB/s 00:00 //将slave2节点中的authorized_keys分别传到master中 hadoop@slave2:~/.ssh$ scp /home/hadoop/.ssh/authorized_keys hadoop@master:/home/hadoop/.ssh/authorized_keys hadoop@master's password: authorized_keys4、验证免密登录
使用ssh登录各节点测试是否免密
7、集群节点配置 1、配置master的xml文件slaves、core-site.xml、hdfs-site.xml、mapred-site.xml、yarn-site.xml
1、hadoop@master:~$ vim /usr/local/hadoop-2.7.7/etc/hadoop/slaves
slave1 slave2
2、hadoop@master:~$ vim /usr/local/hadoop-2.7.7/etc/hadoop/core-site.xml
fs.defaultFS hdfs://master:9000 hadoop.tmp.dir /usr/local/hadoop-2.7.7/data/ A base for other temporary directories.
3、hadoop@master:~$ vim /usr/local/hadoop-2.7.7/etc/hadoop/hdfs-site.xml
dfs.replication 1 dfs.namenode.secondary.http-address master:50090 dfs.datanode.directoryscan.throttle.limit.ms.per.sec 1000
4、hadoop@master:~$ vim /usr/local/hadoop-2.7.7/etc/hadoop/mapred-site.xml
mapreduce.framework.name yarn mapreduce.jobhistory.address master:10020 mapreduce.jobhistory.webapp.address master:19888
5、hadoop@master:~$ vim /usr/local/hadoop-2.7.7/etc/hadoop/yarn-site.xml
2、检查yarn-env.sh 、mapred-env.sh、 hadoop-env.sh中均配置了JAVA_HOME的环境变量yarn.nodemanager.aux_services mapreduce_shuffle yarn.resourcemanager.hostname master
# The java implementation to use. export JAVA_HOME=/usr/local/jdk1.8.0_1218、同步各节点配置 1、在slave1建立临时文件,并修改权限
hadoop@slave1:~$ cd /usr/local hadoop@slave1:/usr/local$ sudo mkdir tmp hadoop@slave1:/usr/local$ sudo chown -R hadoop tmp hadoop@slave1:/usr/local$ chgrp hadoop tmp hadoop@slave1:/usr/local$ chmod -R 777 tmp2、scp传输文件
hadoop@master:/usr/local$ scp -r /usr/local/jdk1.8.0_121/ hadoop@slave1:/usr/local/tmp hadoop@master:/usr/local$ scp -r /usr/local/hadoop-2.7.7/ hadoop@slave1:/usr/local/tmp3、将临时文件移动到/usr/local
hadoop@slave1:/usr/local$ su - root Password: root@slave1:~# mv -f /usr/local/tmp/jdk1.8.0_121/ /usr/local/jdk1.8.0_121/ root@slave1:~# mv -f /usr/local/tmp/hadoop-2.7.7/ /usr/local/hadoop-2.7.7/4、操作(1-3)在slave2上再执行一遍 9、启动集群 1、首次启动集群需格式化,再次启动不要再格式化 2、如果需要重新进行格式化,则需要3个步骤进行操作
hadoop@slave2:/usr/local/hadoop-2.7.7$ stop-all.sh hadoop@slave2:/usr/local/hadoop-2.7.7/data$ rm -rf * hadoop@master:/usr/local/hadoop-2.7.7/logs$ rm -rf * hadoop@master:/usr/local/hadoop-2.7.7/logs$ hdfs namenode –format 21/08/19 19:47:22 INFO util.ExitUtil: Exiting with status 03、start-all.sh启动所有进程
hadoop@master:/usr/local/hadoop-2.7.7/logs$ start-all.sh4、检查各节点是否正常
//检查master节点是否都正常 hadoop@master:/usr/local/hadoop-2.7.7/logs$ jps 7286 Jps 6631 NameNode 6874 SecondaryNameNode 7036 ResourceManager //检查slave1节点是否都正常 hadoop@master:/usr/local/hadoop-2.7.7/logs$ ssh slave1 hadoop@slave1:~$ jps 2791 DataNode 3213 Jps 3039 NodeManager hadoop@slave1:~$ exit //检查slave2节点是否都正常 hadoop@master:/usr/local/hadoop-2.7.7/logs$ ssh slave2 hadoop@slave2:~$ jps 2801 NodeManager 2977 Jps 2553 DataNode hadoop@slave2:~$ exit10、测试HDFS和YARN 1、虚拟机中访问
http://localhost:50070
http://master:8088
二、Zookeeper集群搭建 1、解压安装文件hadoop@master:~$ sudo rm -rf /usr/local/zookeeper/ hadoop@master:~$sudo mkdir /usr/local/zookeeper/ hadoop@master:~$sudo chown -R hadoop:hadoop /usr/local/zookeeper/ hadoop@master:~$sudo tar -zxvf ~/Downloads/apache-zookeeper-3.5.9-bin.tar.gz* -C /usr/local/zookeeper/ hadoop@master:~$cd /usr/local/zookeeper hadoop@master:/usr/local/zookeeper$sudo chown -R hadoop:hadoop apache-zookeeper-3.5.9-bin/ hadoop@master:/usr/local/zookeeper$sudo mv apache-zookeeper-3.5.9-bin/ zookeeper hadoop@master:/usr/local/zookeeper$ cd zookeeper/ hadoop@master:/usr/local/zookeeper/zookeeper$ ll total 48 drwxr-xr-x 6 hadoop hadoop 4096 Nov 22 18:33 ./ drwxr-xr-x 3 hadoop hadoop 4096 Nov 22 18:34 ../ drwxr-xr-x 2 hadoop hadoop 4096 Jan 6 2021 bin/ drwxr-xr-x 2 hadoop hadoop 4096 Jan 6 2021 conf/ drwxr-xr-x 5 hadoop hadoop 4096 Jan 6 2021 docs/ drwxr-xr-x 2 hadoop hadoop 4096 Nov 22 18:34 lib/ -rw-r--r-- 1 hadoop hadoop 11358 Oct 5 2020 LICENSE.txt -rw-r--r-- 1 hadoop hadoop 432 Jan 6 2021 NOTICE.txt -rw-r--r-- 1 hadoop hadoop 1560 Jan 6 2021 README.md -rw-r--r-- 1 hadoop hadoop 1347 Jan 6 2021 README_packaging.txt2、配置环境变量
hadoop@master:/usr/local$sudo vim ~/.bashrc #Zookeeper export ZK_HOME=/usr/local/zookeeper/zookeeper export PATH=$PATH:$ZK_HOME/bin hadoop@master:/usr/local$source ~/.bashrc3、指定目录下创建data和datalog文件夹
hadoop@master:/usr/local/zookeeper/zookeeper$mkdir data hadoop@master:/usr/local/zookeeper/zookeeper$mkdir datalog4、配置zoo.cfg文件
hadoop@master:/usr/local/zookeeper/zookeeper$cd conf hadoop@master:/usr/local/zookeeper/zookeeper/conf$cp zoo_sample.cfg zoo.cfg hadoop@master:/usr/local/zookeeper/zookeeper/conf$vim zoo.cfg #Thenumberofmillisecondsofeachtick tickTime=2000 #Thenumberofticksthattheinitial #synchronizationphasecantake initLimit=10 #Thenumberofticksthatcanpassbetween #sendingarequestandgettinganacknowledgement syncLimit=5 #thedirectorywherethesnapshotisstored. #donotuse/tmpforstorage,/tmphereisjust #examplesakes. dataDir=/usr/local/zookeeper/zookeeper/data dataLogDir=/usr/local/zookeeper/zookeeper/datalog #theportatwhichtheclientswillconnect clientPort=2181 #themaximumnumberofclientconnections. #increasethisifyouneedtohandlemoreclients server.1=192.168.126.153:2888:3888 ——此处3个ip地址配置集群环境的实际ip地址 server.2=192.168.126.154:2889:3889 server.3=192.168.126.155:2890:3890 maxClientCnxns=60 # #Besuretoreadthemaintenancesectionofthe #administratorguidebeforeturningonautopurge. # #http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance # #ThenumberofsnapshotstoretainindataDir #autopurge.snapRetainCount=3 #Purgetaskintervalinhours #Setto"0"todisableautopurgefeature #autopurge.purgeInterval=15、配置同步至其他节点 1、scp传输文件
hadoop@master:/usr/local$ scp -r /usr/local/zookeeper/ hadoop@slave1:/usr/local/tmp2、将临时目录下的文件移动到/usr/local
hadoop@slave1:~$ sudo mv -f /usr/local/tmp/zookeeper/ /usr/local/zookeeper/3、配置环境变量
root@slave1:~#su - hadoop hadoop@slave1:~$ sudo vim ~/.bashrc #Zookeeper export ZK_HOME=/usr/local/zookeeper/zookeeper export PATH=$PATH:$ZK_HOME/bin4、环境变量生效
hadoop@slave1:~$ source ~/.bashrc5、将操作(1-4)在slave2上再执行一遍 6、创建myid文件
hadoop@master:~$vim /usr/local/zookeeper/zookeeper/data/myid 1 hadoop@slave1:~$vim /usr/local/zookeeper/zookeeper/data/myid 2 hadoop@slave2:~$vim /usr/local/zookeeper/zookeeper/data/myid 37、启动zookeeper
启动master的zookeeper
hadoop@master:~$ zkServer.sh start
启动slave1的zookeeper
hadoop@slave1:~$ zkServer.sh start
启动slave2的zookeeper
hadoop@slave2:~$ zkServer.sh start
8、测试连接zookeeper
hadoop@slave2:/usr/local/zookeeper/zookeeper/bin$zkCli.sh



