一 基本环境配置
1. vi /etc/hosts(所有节点) 10.8.20.11 azurepana01 10.8.20.12 azurepana02 10.8.20.13 azurepana03 10.8.20.14 azurepana04 设置ssh免密登陆(所有节点) ssh-keygen -t rsa 拷贝公钥(所有节点): ssh-copy-id GSPAgent@azurepana01 ssh-copy-id GSPAgent@azurepana02 ssh-copy-id GSPAgent@azurepana03 ssh-copy-id GSPAgent@azurepana04 关闭selinux(所有节点) vi /etc/selinux/config SELINUX=disabled 关闭防火墙(所有节点) systemctl stop firewalld systemctl disable firewalld systemctl status firewalld 修改时区(所有节点) sudo cp -f /usr/share/zoneinfo/Asia/Shanghai /etc/localtime sudo clock -w
二 .安装jdk
卸载openJDK(所有节点) rpm -qa | grep jdk ... yum -y remove ... 上传jdk-8u202-linux-x64.rpm到/opt/cdh/rpm/目录 安装jdk yum localinstall jdk*.rpm -y 配置JAVA_HOME 修改环境变量 vi /etc/profile # 末尾追加以下内容 export JAVA_HOME=/usr/java/default export PATH=$JAVA_HOME/bin:$PATH export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar # 使修改生效 source /etc/profile
三.在hive主节点安装mysql(元数据库)
1. 上传包 mysql-5.7.26-1.el7.x86_64.rpm-bundle.tar mysql-connector-java.jar到/opt/cdh/rpm/目录 2.在所有节点安装mysql驱动程序,执行以下命令: sudo mkdir -p /usr/share/java sudo cp mysql-connector-java.jar /usr/share/java/mysql-connector-java.jar 3.卸载干净mariadb(主节点) # 查看mariadb服务 rpm -qa | grep mariadb # 卸载mariadb服务 rpm -e --nodeps mariadb-libs-* 4. 安装mysql服务(只在元数据库节点) # 创建mysql目录,并上传tar包到该目录 mkdir /opt/cdh/mysql # 解压 tar -xvf mysql-5.7.26-1.el7.x86_64.rpm-bundle.tar -C /opt/cdh/mysql 按以下顺序安装相关rpm: 安装cdh时需要 [root@cdhmaster01 mysql]# rpm -ivh mysql-community-common-5.7.26-1.el7.x86_64.rpm 警告:mysql-community-common-5.7.26-1.el7.x86_64.rpm: 头V3 DSA/SHA1 Signature, 密钥 ID 5072e1f5: NOKEY 准备中... ################################# [100%] 正在升级/安装... 1:mysql-community-common-5.7.26-1.e################################# [100%] [root@cdhmaster01 mysql]# rpm -ivh mysql-community-libs-5.7.26-1.el7.x86_64.rpm 警告:mysql-community-libs-5.7.26-1.el7.x86_64.rpm: 头V3 DSA/SHA1 Signature, 密钥 ID 5072e1f5: NOKEY 准备中... ################################# [100%] 正在升级/安装... 1:mysql-community-libs-5.7.26-1.el7################################# [100%] [root@cdhmaster01 mysql]# rpm -ivh mysql-community-libs-compat-5.7.26-1.el7.x86_64.rpm 警告:mysql-community-libs-compat-5.7.26-1.el7.x86_64.rpm: 头V3 DSA/SHA1 Signature, 密钥 ID 5072e1f5: NOKEY 准备中... ################################# [100%] 正在升级/安装... 1:mysql-community-libs-compat-5.7.2################################# [100%] [root@cdhmaster01 mysql]# rpm -ivh mysql-community-client-5.7.26-1.el7.x86_64.rpm 警告:mysql-community-client-5.7.26-1.el7.x86_64.rpm: 头V3 DSA/SHA1 Signature, 密钥 ID 5072e1f5: NOKEY 准备中... ################################# [100%] 正在升级/安装... 1:mysql-community-client-5.7.26-1.e################################# [100%] [root@cdhmaster01 mysql]# rpm -ivh mysql-community-server-5.7.26-1.el7.x86_64.rpm 警告:mysql-community-server-5.7.26-1.el7.x86_64.rpm: 头V3 DSA/SHA1 Signature, 密钥 ID 5072e1f5: NOKEY 准备中... ################################# [100%] 正在升级/安装... 1:mysql-community-server-5.7.26-1.e################################# [100%] [root@cdhmaster01 mysql]# #启动mysql sudo service mysqld start #查看默认生成的密码 sudo grep 'temporary password' /var/log/mysqld.log #登陆mysql mysql -uroot -p #第一次重设密码 ALTER USER 'root'@'localhost' IDENTIFIED BY 'MyNewPasswd4!'; #修改host值 mysql>use mysql; mysql> update user set host = '%' where user ='root'; mysql>flush privileges; mysql> select host,user from user where user='root'; #修改为简单密码 #只验证密码长度 set global validate_password_policy=0; mysql>flush privileges; #查看密码长度 select @@validate_password_length; 修改默认密码的长度(这里修改为6) set global validate_password_length=6; mysql>flush privileges; #重设密码为passwd ALTER USER 'root'@'%' IDENTIFIED BY 'passwd'; mysql>flush privileges; exit; #设置开机自启动 chkconfig mysqld on #重新用新密码登陆 mysql -uroot -ppasswd mysql> Grant all privileges on *.* to 'root'@'%' identified by 'passwd' with grant option; Query OK, 0 rows affected, 1 warning (0.00 sec) mysql> flush privileges; Query OK, 0 rows affected (0.00 sec)
四.安装zookeeper
1.安装zookeeper(所有节点) mkdir /opt/cluster sudo chown GSPAgent:GSPAgent /opt/cluster ..上传包 sudo mkdir /opt/cluster/hadoop mkdir /opt/cluster/zookeeper mkdir /opt/cluster/zookeeper/data vi /opt/cluster/zookeeper/data/myid 1 (1 or 2 or 3) tar -xzvf apache-zookeeper-* -C ./zookeeper/ vi ./zoo.cfg # The number of milliseconds of each tick tickTime=2000 # The number of ticks that the initial # synchronization phase can take initLimit=5 # The number of ticks that can pass between # sending a request and getting an acknowledgement syncLimit=2 # the directory where the snapshot is stored. # do not use /tmp for storage, /tmp here is just # example sakes. #dataDir=/tmp/zookeeper dataDir=/opt/cluster/zookeeper/data # the port at which the clients will connect clientPort=2181 # the maximum number of client connections. # increase this if you need to handle more clients #maxClientCnxns=60 # # Be sure to read the maintenance section of the # administrator guide before turning on autopurge. # # http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance # # The number of snapshots to retain in dataDir #autopurge.snapRetainCount=3 # Purge task interval in hours # Set to "0" to disable auto purge feature #autopurge.purgeInterval=1 ## Metrics Providers # # https://prometheus.io Metrics Exporter #metricsProvider.className=org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider #metricsProvider.httpPort=7000 #metricsProvider.exportJvmInfo=true server.1=azurepana02:2888:3888 server.2=azurepana03:2888:3888 server.3=azurepana04:2888:3888 cp ./zoo.cfg ./zookeeper/apache-zookeeper-3.6.3-bin/conf/zoo.cfg # 所有节点 zkServer.sh start zkServer.sh status zkServer.sh stop
五. 安装hadoop
1.上传安装包到其中一个节点 upload hadoop-3.2.2.tar.gz to /opt/cluster tar -zxvf spark-3.1.2-bin-hadoop3.2.tgz -C /opt/cluster/hadoop 2.配置 core-site.xml,hdfs-site.xml,yarn-site.xml,mapred-site.xml,workers 文件 vi /opt/cluster/hadoop/hadoop-3.2.2/etc/hadoop/core-site.xmlvi /opt/cluster/hadoop/hadoop-3.2.2/etc/hadoop/hdfs-site.xml hadoop.tmp.dir /opt/cluster/hadoop/tmp fs.defaultFS hdfs://azurepana01:9820 ha.zookeeper.quorum azurepana02:2181,azurepana03:2181,azurepana04:2181 hadoop.proxyuser.GSPAgent.hosts * hadoop.proxyuser.GSPAgent.groups * vi /opt/cluster/hadoop/hadoop-3.2.2/etc/hadoop/yarn-site.xml dfs.namenode.name.dir /opt/cluster/hadoop/dfs/name dfs.datanode.data.dir /opt/cluster/hadoop/dfs/data dfs.replication 2 dfs.webhdfs.enabled true dfs.nameservices mycluster dfs.permissions.enabled false dfs.ha.namenodes.mycluster nn1,nn2 dfs.namenode.rpc-address.mycluster.nn1 azurepana01:9820 dfs.namenode.rpc-address.mycluster.nn2 azurepana02:9820 dfs.namenode.http-address.mycluster.nn1 azurepana01:9870 dfs.namenode.http-address.mycluster.nn2 azurepana02:9870 dfs.namenode.shared.edits.dir qjournal://azurepana02:8485;azurepana03:8485;azurepana04:8485/mycluster dfs.client.failover.proxy.provider.hbzx org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider dfs.ha.fencing.methods sshfence dfs.ha.fencing.ssh.private-key-files /home/GSPAgent/.ssh/id_rsa dfs.journalnode.edits.dir /opt/cluster/hadoop/journal/data dfs.ha.automatic-failover.enabled true dfs.journalnode.edits.dir /home/GSPAgent/data/journaldata/jn dfs.ha.fencing.methods shell(/bin/true) dfs.ha.fencing.ssh.connect-timeout 10000 dfs.namenode.handler.count 100 vi /opt/cluster/hadoop/hadoop-3.2.2/etc/hadoop/mapred-site.xml yarn.nodemanager.aux-services mapreduce_shuffle yarn.nodemanager.env-whitelist JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME yarn.resourcemanager.ha.enabled true yarn.resourcemanager.cluster-id mycluster yarn.resourcemanager.ha.rm-ids rm1,rm2 yarn.resourcemanager.hostname.rm1 azurepana01 yarn.resourcemanager.hostname.rm2 azurepana02 yarn.resourcemanager.webapp.address.rm1 azurepana01:8088 yarn.resourcemanager.webapp.address.rm2 azurepana02:8088 hadoop.zk.address azurepana02:2181,azurepana03:2181,azurepana04:2181 yarn.nodemanager.resource.detect-hardware-capabilities true yarn.scheduler.capacity.maximum-am-resource-percent 0.5 vi workers azurepana02 azurepana03 azurepana04 3.sudo vi /etc/profile export JAVA_HOME=/usr/java/default export PATH=$JAVA_HOME/bin:$PATH export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar export HADOOP_HOME=/opt/cluster/hadoop/hadoop-3.2.2 export PATH=$PATH:$HADOOP_HOME/bin export PATH=$PATH:$HADOOP_HOME/sbin export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop export SPARK_HOME=/opt/cluster/spark-3.1.2-bin-hadoop3.2 export PATH=$PATH:$SPARK_HOME/bin # By default, we want umask to get set. This sets it for login shell export SCALA_HOME=/opt/cluster/scala-2.11.8 export PATH=$PATH:$SCALA_HOME/bin:$SCALA_HOME/sbin export HIVE_HOME=/opt/cluster/apache-hive-3.1.2-bin export PATH=$PATH:$HIVE_HOME/bin export FLUME_HOME=/opt/cluster/flume export PATH=$PATH:$FLUME_HOME/bin export KAFKA_HOME=/opt/cluster/kafka/kafka_2.11-1.1.0 export PATH=$PATH:$KAFKA_HOME/bin export DATAX_HOME=/opt/cluster/datax export PATH=$PATH:$DATAX_HOME/bin 4.source /etc/profile 5.从该节点分发已配置的文件到其他节点 tar -zcvf /opt/cluster/hadoop/hadoop-3.2.2.tar.gz /opt/cluster/hadoop/hadoop-3.2.2 scp hadoop-3.2.2.tar.gz azurepana02:/opt/cluster/hadoop tar -xzvf /opt/cluster/hadoop/hadoop-3.2.2.tar.gz -C /opt/cluster/hadoop/ 6.启动zookeeeper格式化hdfs zkServer.sh start(所有节点) hdfs zkfc -formatZK hdfs --daemon start journalnode #hdfs --daemon stop journalnode hdfs namenode -format hdfs --daemon start namenode # 另一个节点同步 hdfs namenode -bootstrapStandby # 检查nn1状态(因为mysql安装在nn1所在节点,所以必须保证nn1 active) hdfs haadmin -getServiceState nn1 # 强行切换name node 主节点(因为mysql安装在nn1所在节点,所以必须保证nn1 active,如果nn1不是active,需要在nn2所在节点执行以下) /opt/cluster/hadoop/hadoop-3.2.2/sbin/hadoop-daemon.sh stop zkfc /opt/cluster/hadoop/hadoop-3.2.2/sbin/hadoop-daemon.sh start zkfc mapreduce.framework.name yarn
六.hive安装
upload apache-hive-3.1.2-bin.tar.gz to /opt/cluster
tar -zxvf apache-hive-3.1.2-bin.tar.gz -C ./
cd /opt/cluster/apache-hive-3.1.2-bin/conf
cp hive-env.sh.template hive-env.sh
vi hive-env.sh
export HADOOP_HOME=/opt/cluster/hadoop/hadoop-3.2.2
export HIVE_CONF_DIR=/opt/cluster/apache-hive-3.1.2-bin/conf
#export SPARK_HOME=/opt/cluster/spark-3.1.2-bin-hadoop3.2
#export HIVE_AUX_JARS_PATH=$(find ${SPARK_HOME}/jars/ -name '*.jar' -and -not -name '*hadoop*' -printf '%p:' | head -c-1)
#export HIVE_AUX_JARS_PATH=${SPARK_HOME}/jars
export HIVE_AUX_JARS_PATH=/opt/cluster/apache-hive-3.1.2-bin/lib
vi /opt/cluster/apache-hive-3.1.2-bin/conf/hive-site.xml
datanucleus.schema.autoCreateAll
true
hive.metastore.schema.verification
false
javax.jdo.option.ConnectionURL
jdbc:mysql://azurepana02:3306/hive?createDatabaseIfNotExist=true&useSSL=false&serverTimezone=CST&characterEncoding=utf8
javax.jdo.option.ConnectionDriverName
com.mysql.jdbc.Driver
javax.jdo.option.ConnectionUserName
root
javax.jdo.option.ConnectionPassword
passwd
hive.metastore.warehouse.dir
/user/hive/warehouse
hive.exec.scratchdir
/opt/cluster/apache-hive-3.1.2-bin/tmp
hive.querylog.location
/opt/cluster/apache-hive-3.1.2-bin/log
hive.metastore.uris
thrift://azurepana02:9083
hive.server2.thrift.port
10000
hive.server2.thrift.bind.host
0.0.0.0
hive.server2.webui.host
0.0.0.0
hive.server2.authentication
NONE
hive.server2.active.passive.ha.enable
true
hive.metastore.event.db.notification.api.auth
false
hive.metastore.local
false
hive.server2.thrift.client.user
GSPAgent
Username to use against thrift client
hive.server2.thrift.client.password
GSPAgent
Password to use against thrift client
hive.server2.webui.port
10002
hive.server2.long.polling.timeout
5000
hive.server2.enable.doAs
true
datanucleus.autoCreateSchema
false
datanucleus.fixedDatastore
true
hive.execution.engine
mr
hive.cli.print.header
true
hive.resultset.use.unique.column.names
false
hive.metastore.client.socket.timeout
1800
hive.server.read.socket.timeout
1800
hive.server.write.socket.timeout
1800
hive.server.thrift.socket.timeout
1800
hive.client.thrift.socket.timeout
1800
hive.merge.mapfiles
true
hive.merge.mapredfiles
true
hive.stats.column.autogather
false
hive.exec.dynamic.partition
true
hive.exec.dynamic.partition.mode
nonstrict
cd /opt/cluster/apache-hive-3.1.2-bin/lib
mv guava-19.0.jar guava-19.0.jar.bak
cp /opt/cluster/hadoop/hadoop-3.2.2/share/hadoop/common/lib/guava-27.0-jre.jar /opt/cluster/apache-hive-3.1.2-bin/lib/
cp mysql-connector-java-5.1.49.jar /opt/cluster/apache-hive-3.1.2-bin/lib/
sudo vi /etc/profile
source /etc/profile
schematool -dbType mysql -initSchema
nohub /opt/cluster/apache-hive-3.1.2-bin/bin/hive --service metastore & (非必须)
# 发送到其他节点
tar -zcvf /opt/cluster/apache-hive-3.1.2-bin.tar.gz /opt/cluster/apache-hive-3.1.2-bin
scp ./apache-hive-3.1.2-bin.tar.gz azurepana04:/opt/cluster/
tar -zxvf apache-hive-3.1.2-bin.tar.gz -C ./
sudo vi /etc/profile
source /etc/profile
# 测试
hive
hive问题解决
1.Operation category READ is not supported in state standby
# 强行切换name node 主节点
hdfs haadmin -getServiceState nn1
/opt/cluster/hadoop/hadoop-3.2.2/sbin/hadoop-daemon.sh stop zkfc
/opt/cluster/hadoop/hadoop-3.2.2/sbin/hadoop-daemon.sh start zkfc
yarn rmadmin -getServiceState rm1
yarn rmadmin -transitionToActive --forcemanual rm1
2.The specified datastore driver ("com.mysql.jdbc.Driver") was not found in the CLASSPATH
cp mysql-connector-java.jar /opt/cluster/apache-hive-3.1.2-bin/lib/
3.metaException(message:Version information not found in metastore.)
hive.metastore.schema.verification
false
4.hive> show databases;
FAILED: HiveException java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHivemetaStoreClient
# 后台启动hive metastore
nohup /opt/cluster/apache-hive-3.1.2-bin/bin/hive --service metastore 2>&1 >> /opt/cluster/apache-hive-3.1.2-bin/log.log &
nohup hiveserver2 >> /opt/cluster/apache-hive-3.1.2-bin/hiveserver2.log &
hive --service beeline
hive --service beeline -u jdbc:hive2://azurepana02:10000/default -n GSPAgent
hive --service beeline -u jdbc:hive2://azurepana02:10000/mytest -n GSPAgent
!connect jdbc:hive2://azurepana02:10000/mytest
beeline -u jdbc:hive2://azurepana02:10000/default -n GSPAgent
set hive.execution.engine=spark
5.required table missing : "`DBS`" in Catalog "" Schema "". DataNucleus requires this table
在hive-site.xml中配置
datanucleus.schema.autoCreateAll
true
6.Error: Table 'CTLGS' already exists (state=42S01,code=1050)
去mysql数据库的hive下删掉该表
7.hive insert 在非overwrite插入数据,如果使用非overwrite模式并且表已经存在数据的时候可能遇到如下错误:
Loading data to table test.test1
FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.StatsTask
set hive.stats.column.autogather=false;
8.提交spark submit 时Exception in thread "main" org.apache.spark.sql.AnalysisException: Table or view not found: test.test1; line 1 pos 13;
'Project [*]
+- 'UnresolvedRelation [test, test1], [], false
cp /opt/cluster/apache-hive-3.1.2-bin/conf/hive-site.xml /opt/cluster/spark-3.1.2-bin-hadoop3.2/conf/hive-site.xml
七. spark安装
upload spark-3.1.2-bin-hadoop3.2.tgz to /opt/cluster
tar -zxvf spark-3.1.2-bin-hadoop3.2.tgz -C ./
sudo vi /etc/profile
source /etc/profile
cd $SPARK_HOME
cp conf/spark-env.sh.template conf/spark-env.sh
vi conf/spark-env.sh
export JAVA_HOME=/usr/java/default
export HADOOP_HOME=/opt/cluster/hadoop/hadoop-3.2.2
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
# 显式配置主机号和端口号
export SPARK_MASTER_HOST=azurepana02
export SPARK_MASTER_PORT=7077
# 主机内存和核
export SPARK_WORKER_MEMORY=4g
export SPARK_WORKER_CORES=2
export SPARK_DIST_CLASSPATH=$(/opt/cluster/hadoop/hadoop-3.2.2/bin/hadoop classpath)
cp conf/workers.template conf/workers
vi conf/workers
azurepana01
azurepana02
azurepana04
cp $HIVE_HOME/conf/hive-site.xml $SPARK_HOME/conf/hive-site.xml
# 发送到其他节点
tar -zcvf /opt/cluster/spark-3.1.2-bin-hadoop3.2.tar.gz /opt/cluster/spark-3.1.2-bin-hadoop3.2
scp ./spark-3.1.2-bin-hadoop3.2.tar.gz azurepana04:/opt/cluster/
# 其他节点解压
tar -zxvf spark-3.1.2-bin-hadoop3.2.tgz -C ./
# 其他节点修改/etc/profile
sudo vi /etc/profile
source /etc/profile
# 测试
spark-shell
val file=sc.textFile("hdfs:/test/test.txt")
file.collect
自行编译spark(可选)
./dev/make-distribution.sh --name build --tgz -Phive-3.1.2 -Phive-thriftserver -Phadoop-3.2 -Phadoop-provided -Pyarn -Pscala-2.12 -Dhadoop.version=3.2.2 -DskipTests ./dev/make-distribution.sh --name build --tgz -Phive-3.1.2 -Phive-thriftserver -Phadoop-3.2 -Phadoop-provided -Pparquet-provided,-Porc-provided,-Pyarn -Pscala-2.12 -Dhadoop.version=3.2.2 -DskipTests /opt/cluster/spark-3.1.2-bin-hadoop3.2 /opt/cluster/apache-hive-3.1.2-bin/log /opt/cluster/apache-hive-3.1.2-bin/tmp /opt/cluster/spark-3.1.2-bin-hadoop3.2/sbin/stop-all.sh /opt/cluster/spark-3.1.2-bin-hadoop3.2/sbin/start-all.sh tail -100f /opt/cluster/spark-3.1.2-bin-hadoop3.2/logs/spark-GSPAgent-org.apache.spark.deploy.master.Master-1-azurepana04.out tail -100f /opt/cluster/spark-3.1.2-bin-hadoop3.2/logs/spark-GSPAgent-org.apache.spark.deploy.worker.Worker-1-azurepana04.out cp $HADOOP_HOME/etc/hadoop/core-site.xml $SPARK_HOME/conf/ cp $HADOOP_HOME/etc/hadoop/hdfs-site.xml $SPARK_HOME/conf/ cp $HIVE_HOME/conf/hive-site.xml $SPARK_HOME/conf/ ls jars |grep hive* /opt/cluster/spark-3.1.2-bin-without-hadoop/jars
hive on spark hive3.1.2集成spark2.4.7安装(可选)
mv spark-2.4.7-bin-without-hadoop spark-3.1.2-bin-hadoop3.2
cd $SPARK_HOME/conf/
cp spark-env.sh.template spark-env.sh
vi spark-env.sh
export JAVA_HOME=/usr/java/default
export HADOOP_HOME=/opt/cluster/hadoop/hadoop-3.2.2
export HADOOP_HDFS_HOME=${HADOOP_HOME}
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export SPARK_HOME=/opt/cluster/spark-3.1.2-bin-hadoop3.2
export SPARK_DIST_CLASSPATH=$(hdfs classpath)
#export MASTER_WEBUI_PORT=8079
export SPARK_LOG_DIR=/home/GSPAgent/tmp/data/spark/logs
export SPARK_LIBRARY_PATH=${SPARK_HOME}/jars
cp spark-defaults.conf.template spark-defaults.conf
vi spark-defaults.conf
spark.executor.memory 1g
spark.driver.cores 1
spark.driver.maxResultSize 0
vi $HIVE_HOME/conf/hive-site.xml
hive.execution.engine
spark
spark.master
yarn-cluster
spark.serializer
org.apache.spark.serializer.KryoSerializer
spark.driver.cores
1
spark.executor.extraJavaOptions
-XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
cd $SPARK_HOME/jars
cp spark-core_*.jar spark-network-common_*.jar spark-unsafe_*.jar spark-yarn_*.jar $HIVE_HOME/lib/
ls -al $HIVE_HOME/lib/ | grep 'spark'
cd $SPARK_HOME/jars
ls -al | grep 'orc-core'
mv orc-core-*.jar orc-core-1.5.5-nohive.jar.bak
hive
select count(1) from test.test1;
八.flume安装
mv apache-flume-1.8.0-bin flume sudo vi /etc/profile cd ./flume/conf cp flume-env.sh.template flume-env.sh vi flume-env.sh export JAVA_HOME=/usr/java/default # 新增flume任务配置文件(一个同步任务一个配置文件) /opt/cluster/flume/conf/table_1.conf # 启动flume agent a1 flume-ng agent -c . -f /opt/cluster/flume/conf/table_1.conf -n a1 -Dflume.root.logger=INFO,console flume-ng agent -c . -f /opt/cluster/flume/conf/spool.conf -n a1 -Dflume.root.logger=INFO,console echo "spool test1" > /opt/cluster/flume/conf/logs/spool_text.log
九.kafka 安装测试
cd /opt/cluster tar -zxvf kafka_2.11-1.1.0.tgz mv kafka_2.11-1.1.0 kafka sudo vi /etc/profile source /etc/profile vi server.properties broker.id=1 delete.topic.enable=true auto.create.topics.enable=true default.replication.factor=2 listeners=PLAINTEXT://azurepana01:9092 log.dirs=/opt/cluster/kafka/logs zookeeper.connect=azurepana02:2181,azurepana03:2181,azurepana04:2181 # Timeout in ms for connecting to zookeeper zookeeper.connection.timeout.ms=6000 mkdir logs vi kafka-start-all.sh vi kafka-stop-all.sh sh kafka-start-all.sh kafka-topics.sh --create --zookeeper azurepana02:2181 --replication-factor 1 --partitions 1 --topic test_topic kafka-topics.sh --describe --zookeeper azurepana02:2181 --topic test_topic kafka-topics.sh --list --zookeeper azurepana02:2181 kafka-console-producer.sh --broker-list azurepana02:9092 --topic test_topic kafka-console-consumer.sh --zookeeper azurepana02:2181 --topic test_topic --from-beginning
10 各个组件一键启动脚本
[GSPAgent@azurepana01 start]$ cat kafka-start-all.sh
ssh azurepana01 << eeooff
kafka-server-start.sh -daemon /opt/cluster/kafka/kafka_2.11-1.1.0/config/server.properties
exit
eeooff
ssh azurepana02 << eeooff
kafka-server-start.sh -daemon /opt/cluster/kafka/kafka_2.11-1.1.0/config/server.properties
exit
eeooff
ssh azurepana03 << eeooff
kafka-server-start.sh -daemon /opt/cluster/kafka/kafka_2.11-1.1.0/config/server.properties
exit
eeooff
[GSPAgent@azurepana01 start]$ cat kafka-stop-all.sh
ssh azurepana01 << eeooff
kafka-server-stop.sh -daemon /opt/cluster/kafka/kafka_2.11-1.1.0/config/server.properties
exit
eeooff
ssh azurepana02 << eeooff
kafka-server-stop.sh -daemon /opt/cluster/kafka/kafka_2.11-1.1.0/config/server.properties
exit
eeooff
ssh azurepana03 << eeooff
kafka-server-stop.sh -daemon /opt/cluster/kafka/kafka_2.11-1.1.0/config/server.properties
exit
eeooff
[GSPAgent@azurepana01 start]$ cat start-hadoop.sh
#/opt/cluster/spark-3.1.2-bin-hadoop3.2/sbin/start-all.sh
/opt/cluster/hadoop/hadoop-3.2.2/sbin/start-all.sh
state=`hdfs haadmin -getServiceState nn1`
echo $state
if [ "${state}" = "standby" ];then
/opt/cluster/hadoop/hadoop-3.2.2/sbin/hadoop-daemon.sh stop zkfc
sleep 15s
/opt/cluster/hadoop/hadoop-3.2.2/sbin/hadoop-daemon.sh start zkfc
fi
state=`hdfs haadmin -getServiceState nn1`
if [ "${state}" = "active" ];then
echo "启动成功..."
exit 0
else
echo "切换nn1失败..."
exit 1
fi
[GSPAgent@azurepana01 start]$ cat stop-hadoop.sh
#/opt/cluster/spark-3.1.2-bin-hadoop3.2/sbin/stop-all.sh
/opt/cluster/hadoop/hadoop-3.2.2/sbin/stop-all.sh
[GSPAgent@azurepana01 start]$ cat zookeeper-start-all.sh
ssh azurepana02 << eeooff
zkServer.sh start
exit
eeooff
ssh azurepana03 << eeooff
zkServer.sh start
exit
eeooff
ssh azurepana04 << eeooff
zkServer.sh start
exit
eeooff
[GSPAgent@azurepana01 start]$ cat zookeeper-start-all.sh
ssh azurepana02 << eeooff
zkServer.sh start
exit
eeooff
ssh azurepana03 << eeooff
zkServer.sh start
exit
eeooff
ssh azurepana04 << eeooff
zkServer.sh start
exit
eeooff
#ssh azurepana02
[GSPAgent@azurepana01 start]$ cat zookeeper-stop-all.sh
ssh azurepana02 << eeooff
zkServer.sh stop
exit
eeooff
ssh azurepana03 << eeooff
zkServer.sh stop
exit
eeooff
ssh azurepana04 << eeooff
zkServer.sh stop
exit
eeooff
[GSPAgent@azurepana01 start]$ cat start-spark.sh
/opt/cluster/spark-3.1.2-bin-hadoop3.2/sbin/start-all.sh
[GSPAgent@azurepana01 start]$ cat stop-spark.sh
/opt/cluster/spark-3.1.2-bin-hadoop3.2/sbin/stop-all.sh



