hostnamectl set-hostname master
vi /etc/hosts
tzselect
vi /etc/profile
echo “TZ=‘Asia/Shanghai’; export TZ” >> /etc/profile && source /etc/profile
vi /etc/ntp.conf
server 127.127.1.0
fudge 127.127.1.0 stratum 10
service ntpd start
systemctl status ntpd
crontab -e
*/30 10-17 * * * /usr/sbin/ntpdate master
ssh-keygen -t rsa
ssh-copy-id master
ssh localhost
ssh-copy-id slave1
ssh-copy-id slave2
mkdir java
tar -zxvf jdk1.8.0_221.tar.gz -C /usr/java
vi /etc/profile
export JAVA_HOME=/usr/java/jdk1.8.0_171
export PATH=
P
A
T
H
:
PATH:
PATH:JAVA_HOME/bin
source /etc/profile
java -version
mkdir zookeeper
tar xvf zookeeper-3.4.10.tar.gz -C /usr/zookeeper
vi /etc/profile
export ZOOKEEPER_HOME=/usr/zookeeper/zookeeper-3.4.10
export PATH=
P
A
T
H
:
PATH:
PATH:ZOOKEEPER_HOME/bin
cd /usr/zookeeper/zookeeper-3.4.10/conf
mv zoo_sample.cfg zoo.cfg
vi zoo.cfg
dataDir=/usr/zookeeper/zookeeper-3.4.10/zkdata
dataLogDir=/usr/zookeeper/zookeeper-3.4.10/zkdatalog
server.1=master:2888:3888
server.2=slave1:2888:3888
server.3=slave2:2888:3888
cd /usr/zookeeper/zookeeper-3.4.10 && mkdir zkdata zkdatalog
cd /usr/zookeeper/zookeeper-3.4.10/zkdata && touch myid
echo 1 >> myid
bin/zkServer.sh start
bin/zkServer.sh status
mkdir -p /usr/hadoop
tar -zxvf hadoop-2.7.3.tar.gz -C /usr/hadoop
vi /etc/profile
export HADOOP_HOME=/usr/hadoop/hadoop-2.7.3
export PATH=
P
A
T
H
:
PATH:
PATH:HADOOP_HOME/bin
export PATH=
P
A
T
H
:
PATH:
PATH:HADOOP_HOME/sbin
source /etc/profile
cd /usr/hadoop/hadoop-2.7.3/etc/hadoop && vi hadoop-env.sh
export JAVA_HOME=/usr/java/jdk1.8.0_171
---------vi core-site.xml----------------
fs.default.name hdfs://master:9000 fs.default.name hdfs://master:9000 hadoop.tmp.dir /usr/hadoop/hadoop-2.7.3/hdfs/tmp io.file.buffer.size 131072 fs.checkpoint.period 60 fs.checkpoint.size 67108864
---------hdfs-site.xml----------------
dfs.replication 2 dfs.namenode.name.dir file:/usr/hadoop/hadoop-2.7.3/hdfs/name dfs.datanode.data.dir file:/usr/hadoop/hadoop-2.7.3/hdfs/data
vi yarn-env.sh
export JAVA_HOME=/usr/java/jdk1.8.0_171
-----------vi yarn-site.xml--------------
yarn.resourcemanager.address master:18040 yarn.resourcemanager.scheduler.address master:18030 yarn.resourcemanager.webapp.address master:18088 yarn.resourcemanager.resource-tracker.address master:18025 yarn.resourcemanager.admin.address master:18141 yarn.nodemanager.aux-services mapreduce_shuffle yarn.nodemanager.auxservices.mapreduce.shuffle.class org.apache.hadoop.mapred.ShuffleHandler
-----------vi mapred-site.xml------------
mapreduce.framework.name yarn
cd /usr/hadoop/hadoop-2.7.3/etc/hadoop/
echo master > master && echo slave1 > slaves && echo slave2 >> slaves
hadoop namenode -format
start-all.sh start
systemctl start mysqld
systemctl status mysqld
grep “temporary password” /var/log/mysqld.log
mysql> set global validate_password_policy=0; mysql> set global validate_password_length=4; mysql> alter user'root'@'localhost'identified by '123456'; mysql> grant all privileges on *.* to 'root'@'%' identified by '123456' with grant option; mysql> flush privileges;
mkdir -p /usr/hive
cd /usr/package
tar -zxvf apache-hive-2.1.1-bin.tar.gz -C /usr/hive
vi /etc/profile
#hive
export HIVE_HOME=/usr/hive/apache-hive-2.1.1-bin
export PATH=
P
A
T
H
:
PATH:
PATH:HIVE_HOME/bin
cd /usr/hive/apache-hive-2.1.1-bin/conf && mv hive-env.sh.template hive-env.sh
cd $HIVE_HOME/conf && vim hive-env.sh
#配置Hadoop 安装路径
export HADOOP_HOME=/usr/hadoop/hadoop-2.7.3
#配置Hive 配置文件存放路径
export HIVE_CONF_DIR=/usr/hive/apache-hive-2.1.1-bin/conf
#配置Hive 运行资源库路径
export HIVE_AUX_JARS_PATH=/usr/hive/apache-hive-2.1.1-bin/lib
cp mysql-connector-java-5.1.47-bin.jar /usr/hive/apache-hive-2.1.1-bin/lib
cd /usr/hive/apache-hive-2.1.1-bin/lib
cp jline-2.12.jar /usr/hadoop/hadoop-2.7.3/share/hadoop/yarn/lib
----------hive-site.xml------------
cd /usr/hive/apache-hive-2.1.1-bin/conf && vim hive-site.xml
hive.metastore.warehouse.dir /user/hive_remote/warehouse javax.jdo.option.ConnectionURL jdbc:mysql://slave2:3306/hive?createDatabaseIfNotExist=true&characterEncoding=UTF-8&useSSL=false JDBC connect string for a JDBC metastore javax.jdo.option.ConnectionDriverName com.mysql.jdbc.Driver javax.jdo.option.ConnectionUserName root javax.jdo.option.ConnectionPassword 123456 hive.metastore.schema.verification false datanucleus.schema.autoCreateALL true
--------hive-site.xml------------
master上执行,
cd /usr/hive/apache-hive-2.1.1-bin/conf && vim hive-site.xml
hive.metastore.warehouse.dir /user/hive_remote/warehouse hive.metastore.local false hive.metastore.uris thrift://slave1:9083
cd /usr/hadoop/hadoop-2.7.3
start-dfs.sh
start-yarn.sh
slave1上执行,初始化数据库
schematool -dbType mysql -initSchema
cd /usr/hive/apache-hive-2.1.1-bin
bin/hive --service metastore
master上执行
cd /usr/hive/apache-hive-2.1.1-bin
bin/hive
create database student;
动态添加/删除节点
hostnamectl set-hostname slave3
systemctl stop firewalld
systemctl disable firewalld
vi /etc/hosts
172.18.39.85 master
172.18.39.89 slave1
172.18.39.86 slave2
172.18.39.87 slave3
tzselect
5,9,1,1
source /etc/profile
echo “TZ=‘Asia/Shanghai’; export TZ” >> /etc/profile && source /etc/profile
crontab -e
*/10 * * * * /usr/sbin/ntpdate master
[root@master ~]# ssh-copy-id slave3
mkdir -p /usr/java
cd /usr/package
tar -zxvf jdk1.8.0_221.tar.gz -C /usr/java
vi /etc/profile
#JAVA_HOME
export JAVA_HOME=/usr/java/jdk1.8.0_171
export PATH=
P
A
T
H
:
PATH:
PATH:JAVA_HOME/bin
mkdir -p /usr/hadoop
cd /usr/package
tar -zxvf hadoop-2.7.3.tar.gz -C /usr/hadoop
vi /etc/profile
#HADOOP_HOME
export HADOOP_HOME=/usr/hadoop/hadoop-2.7.3
export PATH=
P
A
T
H
:
PATH:
PATH:HADOOP_HOME/bin
export PATH=
P
A
T
H
:
PATH:
PATH:HADOOP_HOME/sbin
cd /usr/hadoop/hadoop-2.7.3/etc/hadoop && vi hadoop-env.sh
export JAVA_HOME=/usr/java/jdk1.8.0_171
ssh-keygen -t rsa
ssh-copy-id slave3
ssh-copy-id master
cd /usr/hadoop/hadoop-2.7.3/etc/hadoop/
echo master >> master && echo slave1 >> slaves && echo slave2 >> slaves && echo slave3 >> slaves
其他节点(master、slave1、slave2)
cd /usr/hadoop/hadoop-2.7.3/etc/hadoop/
echo slave3 >> slaves
cd /usr/hadoop/hadoop-2.7.3/
start-dfs.sh
start-yarn.sh
------hdfs-site.xml(在master上)---------
cd /usr/hadoop/hadoop-2.7.3/etc/hadoop
vi hdfs-site.xml
dfs.hosts /usr/hadoop/hadoop-2.7.3/etc/hadoop/datanode-allow.list dfs.hosts.exclude /usr/hadoop/hadoop-2.7.3/etc/hadoop/datanode-deny.list
修改datanode-allow.list文件,添加如下内容
master
slave1
slave2
slave3
slave3
hadoop-daemon.sh start datanode
yarn-daemon.sh start nodemanager
master
hdfs dfsadmin -refreshNodes
yarn rmadmin -refreshNodes
删除节点
在master,slave1,slave2,slave3上进行操作
cd /usr/hadoop/hadoop-2.7.3/etc/hadoop
vi datanode-deny.list
slave3
在master上
cd /usr/hadoop/hadoop-2.7.3/
hdfs dfsadmin -refreshNodes
yarn rmadmin -refreshNodes
在slave3节点上关闭DN和NM
cd /usr/hadoop/hadoop-2.7.3
hadoop-daemon.sh stop datanode
yarn-daemon.sh stop nodemanager
从slaves、dn-include.conf和hosts文件中将退役节点slave3剔除(在master,slave1,slave2上操作)
cd /usr/hadoop/hadoop-2.7.3/etc/hadoop
vi slaves
vi datanode-allow.list
vi datanode-deny.list
vi /etc/hosts
在master上刷新DN和RM
hdfs dfsadmin -refreshNodes
yarn rmadmin -refreshNodes
此时,成功将slave3节点剔除
yum install lrzsz -y
文件上传:rz
文件下载:sz
hadoop fs -mkdir -p /college
hadoop fs -put /root/college/loan.csv /college/
hadoop fs -ls /college/
建库:
create database hive;
use hive;
//
LoanStatus:贷款状态
BorrowerRate:贷款率
ProsperScore:信用得分
Occupation:职业
EmploymentStatus:就业状态
IsBorrowerHomeowner:是否有房
CreditScoreRangeLower:信用评分下限
CreditScoreRangeUppe:信用得分上限
IncomeRange:收入范围
建表:
create table loan ( LoanStatus string, BorrowerRate decimal(10,5), ProsperScore int, Occupation string, EmploymentStatus string , IsBorrowerHomeowner string, CreditScoreRangeLower int , CreditScoreRangeUpper int , IncomeRange string) row format delimited fields terminated by ',';
本地数据导入
load data local inpath ‘/root/college/loan.csv’ into table loan;
统计表数据,结果写入本地/root/college000/01/中。
insert overwrite local directory ‘/root/college000/’ row format delimited fields terminated by ‘t’ select count() from loan;
以信用得分ProsperScore为变量,对借款进行计数统计(降序),结果写入本地/root/college001/中。
insert overwrite local directory ‘/root/college001/’ row format delimited fields terminated by ‘t’ select count() from(select ProsperScore from loan where ProsperScore >1 order by ProsperScore desc)a;
给出借款较多的行业top5,结果写入本地/root/college002/中。
select Occupation, count(1) AS avg from loan GROUP BY Occupation ORDER BY avg desc limit 5;
分析贷款状态为违约(Defaulted)的贷款人就业信息,对不同就业状态进行计数统计,将结果top3写入/root/college003/
select EmploymentStatus,count(EmploymentStatus) as num from loan where LoanStatus=‘Defaulted’ group by EmploymentStatus order by num desc limit 3;
对数据中收入范围进行分组统计(降序),查看贷款人收入情况,结果写入/root/college004/
select IncomeRange ,count(IncomeRange ) as num from loan group by IncomeRange order by num desc;
对信用得分进行中间数求值作为信用得分,计算各职业中的信用得分最高分。结果top5写入/root/college005/(对信用得分上限及下限进行中间数求值作为信用得分,计算各职业中的信用得分最高分。要求按照信用的得分降序,职业升序,将结果top5写入/root/college005/)
select Occupation,max(num) from (select Occupation, (CreditScoreRangeLower+CreditScoreRangeUppe)/2 as num from loan group by Occupation,num order by Occupation ,num desc)a group by Occupation limit 5;
6.请根据 Apriori 关联规则算法的原理找出与违约最多的(借款状态, 后项) 之间的关联度最强的职业(前项) ,支持度写到本地/root/college006/中(保留五位小数)。
select Occupation, round(sum(1)/113937 , 5) as support
from loan where LoanStatus=‘Defaulted’ group by Occupation;



