Hadoop3,zookeeper3,hive2,hbase2高可用配置

1:集群规划
gpm   namenode,ZKFC,Resourcemanager,HMaster
gps     namenode,ZKFC,Resourcemanager,HMaster
gp1    datanode,nodemanager,zookeeper,Journalnode,hiveserve2,HRegionServer
gp2    datanode,nodemanager,zookeeper,Journalnode,hiveserve2,HRegionServer
gp2    datanode,nodemanager,zookeeper,Journalnode,hiveserve2,HRegionServer

2:系统基本配置,静态IP,免密,java,防火墙,ntp等
export JAVA_HOME=/usr/java/jdk1.8.0_181-cloudera
export CLASSPATH=.:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
export PATH=$JAVA_HOME/lib:$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$PATH:$HOME/bin:$PATH
export JAVA_TOOLS=$JAVA_HOME/lib/tools.jar

3:安装zookeeper-3.5.7
mkdir -p /root/zookeeper-3.5.7/data
vi /root/zookeeper-3.5.7/conf/zoo.cfg

tickTime=2000
initLimit=10
syncLimit=5
dataDir=/root/zookeeper-3.5.7/data
clientPort=2181
server.1=gp1:2888:3888
server.2=gp2:2888:3888
server.3=gp3:2888:3888

scp -r  /root/zookeeper-3.5.7 root@gp1:/root/
scp -r  /root/zookeeper-3.5.7 root@gp2:/root/
scp -r  /root/zookeeper-3.5.7 root@gp3:/root/

gp1: echo 1 > /root/zookeeper-3.5.7/data/myid
gp2: echo 2 > /root/zookeeper-3.5.7/data/myid
gp3: echo 3 > /root/zookeeper-3.5.7/data/myid

gp1:/root/zookeeper-3.5.7/bin/zkServer.sh start
gp2:/root/zookeeper-3.5.7/bin/zkServer.sh start
gp3:/root/zookeeper-3.5.7/bin/zkServer.sh start

jps:
23283 Jps
23046 QuorumPeerMain

gp1:
/root/zookeeper-3.5.7/bin/zkCli.sh  -server 127.0.0.1:2181
ls /

4:安装hadoop-3.0.0
mkdir -p /root/hadoop-3.0.0/data/tmp
mkdir -p /root/hadoop-3.0.0/data/journal

vi /root/hadoop-3.0.0/etc/hadoop/hadoop-env.sh 
export JAVA_HOME=/usr/java/jdk1.8.0_181-cloudera
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export HDFS_ZKFC_USER=root
export HDFS_JOURNALNODE_USER=root

vi /root/hadoop-3.0.0/etc/hadoop/yarn-env.sh
export JAVA_HOME=/usr/java/jdk1.8.0_181-cloudera
export YARN_RESOURCEMANAGER_USER=root
export HADOOP_SECURE_DN_USER=yarn
export YARN_NODEMANAGER_USER=root

vi /root/hadoop-3.0.0/etc/hadoop/mapred-env.sh 
export JAVA_HOME=/usr/java/jdk1.8.0_181-cloudera

vi /root/hadoop-3.0.0/etc/hadoop/hdfs-site.xml
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
 <name>dfs.blocksize</name>
 <value>134217728</value>
</property>
<property>
 <name>dfs.ha.namenodes.ns1</name>
  <value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.ns1.nn1</name>
<value>gpm:8020</value>
</property>
<!-- nn1的http通信地址,外部访问地址 -->
<property>
<name>dfs.namenode.http-address.ns1.nn1</name>
<value>gpm:50070</value>
<property>
<name>dfs.namenode.rpc-address.ns1.nn2</name>
<value>gps:8020</value>
</property>
<!-- nn2的http通信地址,外部访问地址 -->
<property>
<name>dfs.namenode.http-address.ns1.nn2</name>
<value>gps:50070</value>
</property>
<!-- 指定NameNode的元数据在JournalNode日志上的存放位置(一般和zookeeper部署在一起) -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://gp1:8485;gp2:8485;gp3:8485/ns1</value>
</property>
<!-- 指定JournalNode在本地磁盘存放数据的位置 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/root/hadoop-3.0.0/data/journal</value>
</property>
<!--客户端通过代理访问namenode,访问文件系统,HDFS 客户端与Active 节点通信的Java 类,使用其确定Active 节点是否活跃  -->
<property>
<name>dfs.client.failover.proxy.provider.ns1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!--这是配置自动切换的方法,有多种使用方法,具体可以看官网,在文末会给地址,这里是远程登录杀死的方法  -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<!-- 这个是使用sshfence隔离机制时才需要配置ssh免登陆 -->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!-- 配置sshfence隔离机制超时时间,这个属性同上,如果你是用脚本的方法切换,这个应该是可以不配置的 -->
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
<!-- 这个是开启自动故障转移,如果你没有自动故障转移,这个可以先不配 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>

vi /root/hadoop-3.0.0/etc/hadoop/core-site.xml 
<property>
  <name>fs.defaultFS</name>
    <value>hdfs://ns1</value>
</property>
<property>
        <name>hadoop.tmp.dir</name>
        <value>/root/hadoop-3.0.0/data/tmp</value>
</property>
<property>
        <name>hadoop.http.staticuser.user</name>
        <value>root</value>
</property>
<property>
        <name>ha.zookeeper.quorum</name>
        <value>gp1:2181,gp2:2181,gp3:2181</value>
</property>

<property>
    <name>hadoop.proxyuser.root.hosts</name>
    <value>*</value>
</property>
<property>
    <name>hadoop.proxyuser.root.groups</name>
    <value>*</value>
</property>

vi /root/hadoop-3.0.0/etc/hadoop/yarn-site.xml
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- Site specific YARN configuration properties -->
<!--启用resourcemanager ha-->
<!--是否开启RM ha,默认是开启的-->
<property>
       <name>yarn.resourcemanager.ha.enabled</name>
      <value>true</value>
</property>
    <!--声明两台resourcemanager的地址-->
<property>
      <name>yarn.resourcemanager.cluster-id</name>
     <value>rmcluster</value>
</property>
<property>
      <name>yarn.resourcemanager.ha.rm-ids</name>
      <value>rm1,rm2</value>
</property>
<property>
       <name>yarn.resourcemanager.hostname.rm1</name>
     <value>gpm</value>
</property>
<property>
     <name>yarn.resourcemanager.hostname.rm2</name>
     <value>gps</value>
</property>

<!--指定zookeeper集群的地址-->
<property>
     <name>yarn.resourcemanager.zk-address</name>
        <value>gp1:2181,gp2:2181,gp3:2181</value>
</property>
<!--启用自动恢复,当任务进行一半,rm坏掉,就要启动自动恢复,默认是false-->
<property>
       <name>yarn.resourcemanager.recovery.enabled</name>
       <value>true</value>
</property>

<!--指定resourcemanager的状态信息存储在zookeeper集群,默认是存放在FileSystem里面。-->
<property>
      <name>yarn.resourcemanager.store.class</name>
     <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>

<property>
    <name>yarn.resourcemanager.webapp.address.rm1</name>
    <value>gpm:8088</value>
</property>

<property>
    <name>yarn.resourcemanager.webapp.address.rm2</name>
    <value>gps:8088</value>
</property>

注意在gpm和gps分别添加
<property>
   <name>yarn.resourcemanager.ha.id</name>
   <value>rm1</value>
</property>

<property>
   <name>yarn.resourcemanager.ha.id</name>
   <value>rm2</value>
</property>

vi /root/hadoop-3.0.0/etc/hadoop/mapred-site.xml
<property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
</property>
<property>
    <name>mapreduce.jobhistory.address</name>
    <value>gpm:10020</value>
</property>
<property>
    <name>mapreduce.jobhistory.webapp.address</name>
    <value>gpm:19888</value>
</property>
<property>
  <name>yarn.app.mapreduce.am.env</name>
  <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
  <name>mapreduce.map.env</name>
  <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
  <name>mapreduce.reduce.env</name>
  <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>

vi /root/hadoop-3.0.0/etc/hadoop/workers
gp1
gp2
gp3

scp -r /root/hadoop-3.0.0 root@gpm:/root/
scp -r /root/hadoop-3.0.0 root@gps:/root/
scp -r /root/hadoop-3.0.0 root@gp1:/root/
scp -r /root/hadoop-3.0.0 root@gp2:/root/
scp -r /root/hadoop-3.0.0 root@gp3:/root/

启动journalnode:
gp1:/root/hadoop-3.0.0/sbin/hadoop-daemon.sh  start journalnode
gp2:/root/hadoop-3.0.0/sbin/hadoop-daemon.sh  start journalnode
gp3:/root/hadoop-3.0.0/sbin/hadoop-daemon.sh  start journalnode

在其中一台NameNode格式化zkfc
gpm:/root/hadoop-3.0.0/bin/hdfs zkfc -formatZK  

格式化主节点namenode
gpm:/root/hadoop-3.0.0/bin/hdfs namenode -format
gpm:/root/hadoop-3.0.0/bin/hdfs --daemon start namenode

副节点同步主节点格式化
gps:/root/hadoop-3.0.0/bin/hdfs namenode -bootstrapStandby

启动集群
/root/hadoop-3.0.0/sbin/start-all.sh

查看是否一个active一个standby
http://192.168.142.135:50070/dfshealth.html
http://192.168.142.136:50070/dfshealth.html
http://192.168.142.135:8088/cluster
http://192.168.142.136:8088/cluster

5:安装hive-2.3.6,安装mysql5.7
wget -c http://dev.mysql.com/get/mysql57-community-release-el7-10.noarch.rpm
yum  install mysql57-community-release-el7-10.noarch.rpm
yum  install mysql-community-server
systemctl start  mysqld.service
systemctl enable  mysqld.service
grep "password" /var/log/mysqld.log
mysql -uroot -p 
mysql> set global validate_password_policy=0;
mysql> set global validate_password_length=1;
mysql> ALTER USER 'root'@'%' IDENTIFIED BY '123456';
mysql> flush privilges;

yum install mysql-connector-java
scp /usr/share/java/mysql-connector-java.jar root@gpm:/usr/schare/java

mkdir -p /root/hive-2.3.6/log

vi /root/hive-2.3.6/conf/hive-env.sh 
export HADOOP_HOME=/root/hadoop-3.0.0
export HIVE_CONF_DIR=/root/hive-2.3.6/conf

vi /root/hive-2.3.6/conf/hive-site.xml
<!--Hive作业的HDFS根目录位置 -->
<property>
<name>hive.exec.scratchdir</name>
<value>/user/hive/tmp</value>
</property>
<!--Hive作业的HDFS根目录创建写权限 -->
<property>
<name>hive.scratch.dir.permission</name>
<value>755</value>
</property>
<!--hdfs上hive元数据存放位置 -->
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
</property>
<!--连接数据库地址,名称 -->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://gps:3306/hive_metastore?createDatabaseIfNotExist=true</value>
</property>
<!--连接数据库驱动 -->
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<!--连接数据库用户名称 -->
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<!--连接数据库用户密码 -->
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
</property>
<!--客户端显示当前查询表的头信息 -->
<property>
<name>hive.cli.print.header</name>
<value>true</value>
</property>
<!--客户端显示当前数据库名称信息 -->
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
<!--高可用-->
<property>
<name>hive.server2.support.dynamic.service.discovery</name>
<value>true</value>
</property>

<property>
<name>hive.server2.zookeeper.namespace</name>
<value>hiveserver2_zk</value>
</property>

<property>
<name>hive.zookeeper.quorum</name>
<value>gp1:2181,gp2:2181,gp3:2181</value>
</property>

<property>
<name>hive.zookeeper.client.port</name>
<value>2181</value>
</property>

<property>
<name>hive.server2.thrift.port</name>
<value>10001</value>
</property>

 <!--高可用,注意填写gp1,gp2,gp3-->
<property>
<name>hive.server2.thrift.bind.host</name>
<value>gp1</value>
</property>

初始化mysql
cp /usr/share/java/mysql-connector-java.jar /root/apache-hive-2.3.6-bin/lib/
/root/hive-2.3.6/bin/schematool -dbType mysql -initSchema 

scp -r /root/hive-2.3.6 root@gp1:/root/
scp -r /root/hive-2.3.6 root@gp2:/root/
scp -r /root/hive-2.3.6 root@gp3:/root/

启动hiveserver2
gp1:/root/hive-2.3.6/bin/hiveserver2
gp2:/root/hive-2.3.6/bin/hiveserver2
gp3:/root/hive-2.3.6/bin/hiveserver2

java连接hivesever2
jdbc:/root/hive-2.3.6/jdbc/hive-jdbc-2.3.6-standalone.jar

package hive;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;

public class HA {

    private static String driverName = "org.apache.hive.jdbc.HiveDriver";
    private static String connctUrl = "jdbc:hive2://gp1:2181,gp2:2181,gp3:2181/default;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2_zk";
    private static String userName = "root";
    private static String password = "123456";
    
    public static void main(String[] args) throws Exception {
        Class.forName(driverName);
   
        Connection con = DriverManager.getConnection(connctUrl, userName, password);
        Statement stmt = con.createStatement();
         
        ResultSet res = stmt.executeQuery( "select * from dual" );
        while (res.next()) {
            System.out.println( res.getInt(1) +"," + res.getString(2)  );
        }    
        stmt.close();
        con.close();
    }



6:安装hbase-2.1.0
mkdir -p /root/hbase-2.1.0/data/pids
mkdir -p /root/hbase-2.1.0/logs

vi /root/hbase-2.1.0/conf/hbase-env.sh
export JAVA_HOME=/usr/java/jdk1.8.0_181-cloudera
export HADOOP_HOME=/root/hadoop-3.0.0
export HBASE_HOME=/root/hbase-2.1.0
export HBASE_CLASSPATH=/root/hadoop-3.0.0/etc/hadoop
export HBASE_LOG_DIR=/root/hbase-2.1.0/logs
export HBASE_PID_DIR=/root/hbase-2.1.0/data/pids
export HBASE_MANAGES_ZK=false

vi /root/hbase-2.1.0/conf/regionservers
gp1
gp2
gp3

vi /root/hbase-2.1.0/conf/hbase-site.xml
<property>
        <name>hbase.rootdir</name>
        <value>hdfs://ns1/hbase</value>
 </property>

<!-- 分布式开关 -->
<property>
        <name>hbase.cluster.distributed</name>
        <value>true</value>
</property>

<!-- zookeeper集群地址 -->
<property>
      <name>hbase.zookeeper.quorum</name>
      <value>gp1,gp2,gp3</value>
</property>

<property>
   <name>hbase.zookeeper.property.clientPort</name>
   <value>2181</value>
</property>

<property>
        <name>hbase.unsafe.stream.capability.enforce</name>
        <value>false</value>
</property>

rm -rf /root/hbase-2.1.0/lib/client-facing-thirdparty/slf4j-log4j12-1.7.25.jar
scp -r /root/hbase-2.1.0 root@gps:/root/
scp -r /root/hbase-2.1.0 root@gp1:/root/
scp -r /root/hbase-2.1.0 root@gp2:/root/

gpm:
/root/hbase-2.1.0/bin/start-hbase.sh

gps:
/root/hbase-2.1.0/bin/hbase-daemon.sh start master

http://gpm:16010/master-status
http://gps:16010/master-status
http://gp1:16030/rs-status
http://gp2:16030/rs-status
http://gp3:16030/rs-status


整合hive与hbase
vi /root/hive-2.3.6/conf/hive-site.xml
<!-- hbase -->
<property>
      <name>hbase.zookeeper.quorum</name>
      <value>gp1,gp2,gp3</value>
</property>

<property>
   <name>hbase.zookeeper.property.clientPort</name>
   <value>2181</value>
</property>


vi /root/hive-2.3.6/conf/hive-env.sh
export HBASE_HOME=/root/hbase-2.1.0

创建hive外部表
create EXTERNAL table hbase_test(id int,tid string,tname string) 
stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' 
with serdeproperties("hbase.columns.mapping"=":key,cf1:tid,cf1:tname") tblproperties("hbase.table.name"="t_test");

select * from hbase_test;

JAVA读取HBASE表
<dependency>
    <groupId>org.apache.hbase</groupId>
    <artifactId>hbase-client</artifactId>
    <version>2.1.0</version>
</dependency>

package com.lijiahong.hbase;

import java.util.Date;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Table;

public class Test {

public static void main(String[] args) throws Exception {
System.out.println(new Date());

Configuration conf = new Configuration();
conf.set("hbase.zookeeper.quorum", "gp1,gp2,gp3");
conf.set("hbase.zookeeper.property.clientPort", "2181");
conf.set("hbase.ipc.client.socket.timeout", "5");

Connection conn = ConnectionFactory.createConnection(conf);

System.out.println("conn:"+new Date());

Table table = conn.getTable(TableName.valueOf("t_test"));

Get get = new Get("1001".getBytes());
Result r = table.get(get);
List<Cell> cells = r.listCells();
for(Cell c: cells) {
System.out.println(new String(c.getFamilyArray(),c.getFamilyOffset(),c.getFamilyLength()));
System.out.println(new String(c.getQualifierArray(),c.getQualifierOffset(),c.getQualifierLength()));
System.out.println(new String(c.getValueArray(),c.getValueOffset(),c.getValueLength()));
}
System.out.println(new Date());

Get get1 = new Get("1002".getBytes());
Result r1 = table.get(get1);
List<Cell> cells1 = r1.listCells();
for(Cell c: cells1) {
System.out.println(new String(c.getFamilyArray(),c.getFamilyOffset(),c.getFamilyLength()));
System.out.println(new String(c.getQualifierArray(),c.getQualifierOffset(),c.getQualifierLength()));
System.out.println(new String(c.getValueArray(),c.getValueOffset(),c.getValueLength()));
}

System.out.println(new Date());
conn.close();
}

}