关闭防火墙
- systemctl stop firewalld
查看防火墙状态
- systemctl status firewalld
主机名与映射
- hostnamectl set-hostname<主机名> master slave1 slave2
- bash 立即生效
- vim /etc/hosts
时区
tzselect
- Asia
- china
- Beijing Time
- yes
- 生成的命令写入/etc/profile
- source /etc/profile
yum install -y ntp
/etc/ntp.conf
写入
server 127.127.1.0
fudge 127.127.1.0 stratum 1
从节点 ntpdate master
定时任务
- crontab -e
- i
- */10 * * * * usr/sbin/ntpdate master
- 查看定时任务crontab -l
ssh免密登录
cd ~
/usr/sbin/sshd
netstat -tnulp
ssh-keygen
ll
cd /root
ls -a
cd .ssh/
ls
cp id_rsa.pub authorized_keys
ll
scp ./authorized_keys root@slave1:~/.ssh/
ssh slave1
jdk
查看自带的openjdk并卸载
- rpm -qa | grep java
- rpm -e java-1.6.0-openjdk-1.6.0.41-1.13.13.1.el6_8.x86_64 tzdata-java2016j-1.el6.noarch java-1.7.0-openjdk-1.7.0.131-2.6.9.0.el6_8.x86_64 -nodeps
创建安装目录
- mkdir -p /export/softwares #软件包存放目录
- mkdir -p /export/servers #安装目录
解压
- tar -zxvf jdk-8u141-linux-x64.tar.gz -C …/servers/
配置环境变量
- vim /etc/profile
export JAVA_HOME=/export/servers/jdk1.8.0_141
export PATH=:$JAVA_HOME/bin:$PATH
- source /etc/profile
zookeeper
- cd /export/software
- tar -zxvf zookeeper-3.4.9.tar.gz -C …/servers/
第一台机器修改配置文件
- cd /export/servers/zookeeper-3.4.9/conf/
- cp zoo_sample.cfg zoo.cfg
- mkdir -p /export/servers/zookeeper-3.4.9/zkdatas/
vim zoo.cfg
dataDir=/export/servers/zookeeper-3.4.9/zkdatas
# 保留多少个快照
autopurge.snapRetainCount=3
# 日志多少小时清理一次
autopurge.purgeInterval=1
\# 集群中服务器地址
server.1=master:2888:3888
server.2=slave1:2888:3888
server.3=slave2:2888:3888
添加myid配置
在第一台机器的 /export/servers/zookeeper-3.4.9/zkdatas/
这个路径下创建一个文件,文件名为myid ,文件内容 为1
- echo 1 > /export/servers/zookeeper-3.4.9/zkdatas/myid
第五步:安装包分发并修改myid的值
安装包分发到其他机器
第一台机器上面执行以下两个命令
- scp -r /export/servers/zookeeper-3.4.9/ slave1:/export/servers/
- scp -r /export/servers/zookeeper-3.4.9/ slave2:/export/servers/
第二台机器上修改myid的值为2
- echo 2 > /export/servers/zookeeper-3.4.9/zkdatas/myid
第三台机器上修改myid的值为3
- echo 3 > /export/servers/zookeeper-3.4.9/zkdatas/myid
第六步:三台机器启动zookeeper服务
三台机器启动zookeeper服务
这个命令三台机器都要执行
- /export/servers/zookeeper-3.4.9/bin/zkServer.sh start
查看启动状态
- /export/servers/zookeeper-3.4.9/bin/zkServer.sh status
hadoop安装
解压
- cd /export/softwares
- tar -zxvf hadoop-2.7.5.tar.gz -C …/servers/
第一台机器执行以下命令
修改core-site.xml
- cd /export/servers/hadoop-2.7.5/etc/hadoop
- vim core-site.xml
<configuration>
<!-- 指定集群的文件系统类型:分布式文件系统 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
<!-- 指定临时文件存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/export/servers/hadoop-2.7.5/hadoopDatas/tempDatas</value>
</property>
修改hdfs-site.xml
- cd /export/servers/hadoop-2.7.5/etc/hadoop
- vim hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master:50090</value>
</property>
<!-- 指定namenode的访问地址和端口 -->
<property>
<name>dfs.namenode.http-address</name>
<value>master:50070</value>
</property>
<!-- 指定namenode元数据的存放位置 -->
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///export/servers/hadoop-2.7.5/hadoopDatas/namenodeDatas,file:///export/servers/hadoop-2.7.5/hadoopDatas/namenodeDatas2</value>
</property>
<!-- 定义dataNode数据存储的节点位置,实际工作中,一般先确定磁盘的挂载目录,然后多个目录用,进行分割 -->
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///export/servers/hadoop-2.7.5/hadoopDatas/datanodeDatas,file:///export/servers/hadoop-2.7.5/hadoopDatas/datanodeDatas2</value>
</property>
<!-- 指定namenode日志文件的存放目录 -->
<property>
<name>dfs.namenode.edits.dir</name>
<value>file:///export/servers/hadoop-2.7.5/hadoopDatas/nn/edits</value>
</property>
<property>
<name>dfs.namenode.checkpoint.dir</name>
<value>file:///export/servers/hadoop-2.7.5/hadoopDatas/snn/name</value>
</property>
<property>
<name>dfs.namenode.checkpoint.edits.dir</name>
<value>file:///export/servers/hadoop-2.7.5/hadoopDatas/dfs/snn/edits</value>
</property>
<!-- 文件切片的副本个数-->
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<!-- 设置HDFS的文件权限-->
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<!-- 设置一个文件切片的大小:128M-->
<property>
<name>dfs.blocksize</name>
<value>134217728</value>
</property>
</configuration>
修改hadoop-env.sh
- cd /export/servers/hadoop-2.7.5/etc/hadoop
- vim hadoop-env.sh
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Set Hadoop-specific environment variables here.
# The only required environment variable is JAVA_HOME. All others are
# optional. When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.
# The java implementation to use.
export JAVA_HOME=/export/servers/jdk1.8.0_141 添加jdk环境
# The jsvc implementation to use. Jsvc is required to run secure datanodes
# that bind to privileged ports to provide authentication of data transfer
# protocol. Jsvc is not required if SASL is configured for authentication of
# data transfer protocol using non-privileged ports.
#export JSVC_HOME=${JSVC_HOME}
修改mapred-site.xml
- cd /export/servers/hadoop-2.7.5/etc/hadoop
- vim mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- 设置历史任务的主机和端口 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<!-- 设置网页访问历史任务的主机和端口 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
</configuration>
修改yarn-site.xml
- cd /export/servers/hadoop-2.7.5/etc/hadoop
- vim yarn-site.xml
<configuration>
<!-- 配置yarn主节点的位置 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- 开启日志聚合功能 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 设置聚合日志在hdfs上的保存时间 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
<!-- 设置yarn集群的内存分配方案 -->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>20480</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>2.1</value>
</property>
</configuration>
修改mapred-env.sh
- cd /export/servers/hadoop-2.7.5/etc/hadoop
- vim mapred-env.sh
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
export JAVA_HOME=/export/servers/jdk1.8.0_141 添加jdk环境
export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000
export HADOOP_MAPRED_ROOT_LOGGER=INFO,RFA
#export HADOOP_JOB_HISTORYSERVER_OPTS=
#export HADOOP_MAPRED_LOG_DIR="" # Where log files are stored. $HADOOP_MAPRED_HOME/logs by default.
#export HADOOP_JHS_LOGGER=INFO,RFA # Hadoop JobSummary logger.
#export HADOOP_MAPRED_PID_DIR= # The pid files are stored. /tmp by default.
#export HADOOP_MAPRED_IDENT_STRING= #A string representing this instance of hadoop. $USER by default
#export HADOOP_MAPRED_NICENESS= #The scheduling priority for daemons. Defaults to 0.
修改slaves
- cd /export/servers/hadoop-2.7.5/etc/hadoop
- vim slaves
slave1
slave2
创建数据文件夹
mkdir -p /export/servers/hadoop-2.7.5/hadoopDatas/tempDatas
mkdir -p /export/servers/hadoop-2.7.5/hadoopDatas/namenodeDatas
mkdir -p /export/servers/hadoop-2.7.5/hadoopDatas/namenodeDatas2
mkdir -p /export/servers/hadoop-2.7.5/hadoopDatas/datanodeDatas
mkdir -p /export/servers/hadoop-2.7.5/hadoopDatas/datanodeDatas2
mkdir -p /export/servers/hadoop-2.7.5/hadoopDatas/nn/edits
mkdir -p /export/servers/hadoop-2.7.5/hadoopDatas/snn/name
mkdir -p /export/servers/hadoop-2.7.5/hadoopDatas/dfs/snn/edits
安装包的分发
cd /export/servers/
scp -r hadoop-2.7.5 slave:$PWD
scp -r hadoop-2.7.5 slave:$PWD
配置hadoop的环境变量
vim /etc/profile
export HADOOP_HOME=/export/servers/hadoop-2.7.5
export PATH=:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
source /etc/profile
启动
cd /export/servers/hadoop-2.7.5/
bin/hdfs namenode -format
sbin/start-dfs.sh
sbin/start-yarn.sh
sbin/mr-jobhistory-daemon.sh start historyserver
三个端口查看界面
http://node01:50070/explorer.html#/ 查看hdfs
http://node01:8088/cluster 查看yarn集群
http://node01:19888/jobhistory 查看历史完成的任务
wordcount案例
hadoop fs -mkdir -p /wordcount/input
hadoop fs -put /root/data/word.txt /wordcount/input
hadoop jar hadoop-map...2.7.7.jar wordcount /wordcount/input /wordcount/output
mysql安装
yum install mysql mysql-server mysql-devel
/etc/init.d/mysqld start
/usr/bin/mysql_secure_installation
enter 回车
y 设置密码
123456
123456
y 移除匿名用户
n 不能远程登录
n 移除测试表
y 重新加载
grant all privileges on *.* to 'root'@'%' identified by '123456' with grant option;
flush privileges
hive安装
cd /export/softwares/
tar -zxvf apache-hive-2.1.1-bin.tar.gz -C ../servers/
环境变量
- vim /etc/profile
export HIVE_HOME=/root/software/apache-hive-2.3.4-bin
export PATH=$PATH:$HIVE_HOME/bin
- source /etc/profile
- hive --version
修改hive配置文件
修改hive-env.sh
cd /export/servers/apache-hive-2.1.1-bin/conf
cp hive-env.sh.template hive-env.sh
vim hive-env.sh
# hadoop安装路径
HADOOP_HOME=/export/servers/hadoop-2.7.5
# hive配置文件路径
export HIVE_CONF_DIR=/export/servers/apache-hive-2.1.1-bin/conf
# hive运行资源库
export HIVE_AUX_JARS_PATH=/export/servers/apache-hive-2.1.1-bin/lib
## schematool -dbType derby -initSchema
修改hive-site.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://master:3306/hive?createDatabaseIfNotExist=true&useSSL=false</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
<property>
<name>datanucleus.schema.autoCreateAll</name>
<value>true</value>
</property>
<property>
<name>hive.server2.thrift.bind.host</name>
<value>node03</value>
</property>
</configuration>
mysql-connector-java-5.1.38.jar上传到 /export/servers/apache-hive-2.1.1-bin/lib
交互方式
cd /export/servers/apache-hive-2.1.1-bin/ bin/hive
create database hive;