创建3个虚拟机,分别为hsm01, hss01, hss02
hostname | ip |
---|---|
hsm01 | 192.168.99.145 |
hss01 | 192.168.99.151 |
hss02 | 192.168.99.152 |
# 执行命令 service iptables stop # 验证 service iptables status # 关闭防火墙的自动运行 chkconfig iptables off # 验证 chkconfig --list | grep iptables
hostname hss01 vim /etc/sysconfig/network # ip 与 hostname 绑定 vim /etc/hosts
# 设置 ssh 免密码登录(在三个节点分别执行以下命令) ssh-keygen -t rsa # ~/.ssh/id_rsa.pub就是生成的公钥,把三个id_rsa.pub的内容合并,写入以下文件 cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys # 复制到其他节点 scp ~/.ssh/authorized_keys zkpk@hss01:~/.ssh/ scp ~/.ssh/authorized_keys zkpk@hss02:~/.ssh/
# root用户(也可以其他用户安装) vim /etc/profile export JAVA_HOME=/opt/jdk1.8.0_45 export PATH=$PATH:$JAVA_HOME/bin export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar source /etc/profile
程序 | 版本 |
---|---|
JDK | 1.8.0_45 |
Hadoop | 2.6.4 |
zookeeper | 3.4.6 |
hbase | 1.2.2 |
hive | 1.2.1 |
mysql | 5.7.14 |
sqoop | 1.99.7 |
spark | 1.6.2 |
节点 | 安装软件 | 进程 |
---|---|---|
hsm01 | jdk, hadoop, zookeeper, hbase, hive, sqoop, spark | NameNode, ResourceManager, JournalNode, QuorumPeerMain, DFSZKFailoverController, HMaster, Worker, Master |
hss01 | jdk, hadoop, zookeeper, hbase, spark | NameNode, ResourceManager(需单独启动), JournalNode, QuorumPeerMain, DataNode, NodeManager, DFSZKFailoverController, Worker |
hss02 | jdk, hadoop, zookeeper, hbase, mysql, spark | DataNode, NodeManager, JournalNode, QuorumPeerMain, Worker |
hadoop相关程序都是用zkpk用户进行操作,并安装在/home/zkpk目录下
tar -xf zookeeper-3.4.6.tar.gz
cd ~/zookeeper-3.4.6/conf cp zoo_sample.cfg zoo.cfg vim zoo.cfg # 修改 dataDir=/home/zkpk/zookeeper-3.4.6/data # 添加 dataLogDir=/home/zkpk/zookeeper-3.4.6/logs # 在最后添加 server.1=hsm01:2888:3888 server.2=hss01:2888:3888 server.3=hss02:2888:3888
# zookeeper根目录执行 mkdir data mkdir logs # 在dataDir目录下创建myid文件写入1 vim data/myid
scp -r ~/zookeeper-3.4.6/ zkpk@hss01:~/ scp -r ~/zookeeper-3.4.6/ zkpk@hss02:~/ # 将hss01中的myid改为2,hss02中的myid改为3 vim ~/zookeeper-3.4.6/data/myid
vim ~/.bash_profile export ZOOKEEPER_HOME=/home/zkpk/zookeeper-3.4.6 export PATH=$PATH:$ZOOKEEPER_HOME/bin source ~/.bash_profile
zkServer.sh start zkServer.sh status
zookeeper环境搭建中的几个坑[Error contacting service. It is probably not running]的分析及解决
http://www.paymoon.com/index.php/2015/06/04/zookeeper-building/
安装zookeeper时候,可以查看进程启动,但是状态显示报错:Error contacting service. It is probably not running
http://www.cnblogs.com/xiaohua92/p/5460515.html
所有节点的系统时间要同步
# root用户 date -s "yyyyMMdd HH:mm:ss" clock -w
Zookeeper 日志输出到指定文件夹
http://www.tuicool.com/articles/MbUb63n
tar -xf hadoop-2.6.4.tar.gz
cd hadoop-2.6.4 # namenode信息存放目录 mkdir name # datanode信息存放目录 mkdir data
cd etc/hadoop vim yarn-env.sh vim hadoop-env.sh vim mapred-env.sh export JAVA_HOME=/opt/jdk1.8.0_45
vim core-site.xmlfs.defaultFS hdfs://ns1 hadoop.tmp.dir /home/zkpk/hadoop-2.6.4/tmp ha.zookeeper.quorum hsm01:2181,hss01:2181,hss02:2181
注:不要忘了创建tmp目录
vim hdfs-site.xmldfs.replication 1 dfs.permissions false dfs.namenode.name.dir /home/zkpk/hadoop-2.6.4/name true dfs.datanode.data.dir /home/zkpk/hadoop-2.6.4/data true dfs.nameservices ns1 dfs.ha.namenodes.ns1 nn1,nn2 dfs.namenode.rpc-address.ns1.nn1 hsm01:9000 dfs.namenode.http-address.ns1.nn1 hsm01:50070 dfs.namenode.rpc-address.ns1.nn2 hss01:9000 dfs.namenode.http-address.ns1.nn2 hss01:50070 dfs.namenode.shared.edits.dir qjournal://hsm01:8485;hss01:8485;hss02:8485/ns1 dfs.journalnode.edits.dir /home/zkpk/hadoop-2.6.4/journal dfs.ha.automatic-failover.enabled true dfs.client.failover.proxy.provider.ns1 org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider dfs.ha.fencing.methods sshfence shell(/bin/true) dfs.ha.fencing.ssh.private-key-files /home/zkpk/.ssh/id_rsa dfs.ha.fencing.ssh.connect-timeout 30000
cp mapred-site.xml.template mapred-site.xml vim mapred-site.xmlmapreduce.framework.name yarn
vim yarn-site.xmlyarn.nodemanager.aux-services mapreduce_shuffle yarn.nodemanager.aux-services.mapreduce.shuffle.class org.apache.hadoop.mapred.ShuffleHandler yarn.resourcemanager.ha.enabled true yarn.resourcemanager.ha.automatic-failover.enabled true yarn.resourcemanager.ha.id rm1 yarn.resourcemanager.cluster-id yrc yarn.resourcemanager.ha.rm-ids rm1,rm2 yarn.resourcemanager.hostname.rm1 hsm01 yarn.resourcemanager.hostname.rm2 hss01 yarn.resourcemanager.zk-address hsm01:2181,hss01:2181,hss02:2181 yarn.resourcemanager.recovery.enabled true yarn.resourcemanager.store.class org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore
vim slaves hss01 hss02
scp -r ~/hadoop-2.6.4 hss01:~/ scp -r ~/hadoop-2.6.4 hss02:~/
打开: vim ~/.bash_profile 添加: export HADOOP_HOME=/home/zkpk/hadoop-2.6.4 export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin 刷新: source ~/.bash_profile 验证:(输入以下命令,如果出现hadoop对应的版本,则hadoop配置成功。) hadoop version
zkServer.sh start # 查看状态:一个leader,两个follower zkServer.sh status
hadoop-daemon.sh start journalnode # 运行jps命令检验,hsm01、hss01、hss02上多了JournalNode进程
# hsm01上执行 hdfs namenode -format
scp -r ~/hadoop-2.6.4/name hss01:~/hadoop-2.6.4/ scp -r ~/hadoop-2.6.4/name hss02:~/hadoop-2.6.4/
# hsm01上执行 hdfs zkfc -formatZK
# 启动 zkServer.sh start start-dfs.sh start-yarn.sh # 关闭 stop-dfs.sh stop-yarn.sh zkServer.sh stop
待续
http://blog.csdn.net/u013980127/article/details/52261400
# 创建hadoop用户 grant all on *.* to hadoop@'%' identified by 'hadoop'; grant all on *.* to hadoop@'localhost' identified by 'hadoop'; grant all on *.* to hadoop@'hsm01' identified by 'hadoop'; flush privileges; # 创建数据库 create database hive_121;
tar -xf apache-hive-1.2.1-bin.tar.gz # 文件名修改为hive-1.2.1 mv apache-hive-1.2.1-bin/ hive-1.2.1
# 在hive-1.2.1/conf下,修改文件名 mv hive-default.xml.template hive-site.xml mv hive-log4j.properties.template hive-log4j.properties mv hive-exec-log4j.properties.template hive-exec-log4j.properties mv hive-env.sh.template hive-env.sh
export HADOOP_HOME=/home/zkpk/hadoop-2.6.4 export HIVE_CONF_DIR=/home/zkpk/hive-1.2.1/conf
hive.log.dir=/home/zkpk/hive-1.2.1/logs # 创建日志目录 mkdir /home/zkpk/hive-1.2.1/logs
删除所有内容,添加如下内容:
hive.metastore.warehouse.dir hdfs://ns1/hive/warehouse hive.exec.scratchdir hdfs://ns1/hive/scratchdir hive.querylog.location /home/zkpk/hive-1.2.1/logs javax.jdo.option.ConnectionURL jdbc:mysql://hss02:3306/hive_121?characterEncoding=UTF-8 javax.jdo.option.ConnectionDriverName com.mysql.jdbc.Driver javax.jdo.option.ConnectionUserName hadoop javax.jdo.option.ConnectionPassword hadoop
vim ~/.bash_profile export HIVE_HOME=/home/zkpk/hive-1.2.1 export PATH=$PATH:$HIVE_HOME/bin source ~/.bash_profile
在hive/lib下有个jline的jar,将hadoop内的这个jar包换成一致的,否则会启动hive会报错。
将mysql-connector-java-5.1.29.jar连接jar拷贝到hive-1.2.1/lib目录下
# 运行下面命令 hive # http://hsm01:50070,查看是否多了hive目录。
tar -xf sqoop-1.99.7-bin-hadoop200.tar.gz # 修改目录名 mv sqoop-1.99.7-bin-hadoop200/ sqoop-1.99.7
# 配置代理 vim $HADOOP_HOME/etc/hadoop/core-site.xml # zkpk是运行server的用户hadoop.proxyuser.zkpk.hosts * # 由于用户id小于1000(可用id命令查看),设置此项 vim $HADOOP_HOME/etc/hadoop/container-executor.cfg allowed.system.users=zkpk hadoop.proxyuser.zkpk.groups *
# @LOGDIR@修改为/home/zkpk/sqoop-1.99.7/logs # @BASEDIR@修改为/home/zkpk/sqoop-1.99.7 # hadoop配置文件路径 org.apache.sqoop.submission.engine.mapreduce.configuration.directory=/home/zkpk/hadoop-2.6.4/etc/hadoop/ # 设置验证机制(去掉注释) org.apache.sqoop.security.authentication.type=SIMPLE org.apache.sqoop.security.authentication.handler=org.apache.sqoop.security.authentication.SimpleAuthenticationHandler org.apache.sqoop.security.authentication.anonymous=true
复制mysql驱动jar文件到$SQOOP_HOME/extra(创建extra目录)
export SQOOP_SERVER_EXTRA_LIB=$SQOOP_HOME/extra
vim ~/.bash_profile export SQOOP_HOME=/home/zkpk/sqoop-1.99.7 export SQOOP_SERVER_EXTRA_LIB=$SQOOP_HOME/extra export PATH=$PATH:$SQOOP_HOME/bin source ~/.bash_profile
# 验证配置是否有效 sqoop2-tool verify # 开启服务器 sqoop2-server start # 客户端验证 sqoop2-shell show connector
tar -xf hbase-1.2.2-bin.tar.gz
cd hbase-1.2.2/lib cp ~/hadoop-2.6.4/share/hadoop/mapreduce/lib/hadoop-annotations-2.6.4.jar . cp ~/hadoop-2.6.4/share/hadoop/tools/lib/hadoop-auth-2.6.4.jar . cp ~/hadoop-2.6.4/share/hadoop/common/hadoop-common-2.6.4.jar . cp ~/hadoop-2.6.4/share/hadoop/hdfs/hadoop-hdfs-2.6.4.jar . cp ~/hadoop-2.6.4/share/hadoop/mapreduce/hadoop-mapreduce-client-app-2.6.4.jar . cp ~/hadoop-2.6.4/share/hadoop/mapreduce/hadoop-mapreduce-client-common-2.6.4.jar . cp ~/hadoop-2.6.4/share/hadoop/mapreduce/hadoop-mapreduce-client-core-2.6.4.jar . cp ~/hadoop-2.6.4/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-2.6.4.jar . cp ~/hadoop-2.6.4/share/hadoop/mapreduce/hadoop-mapreduce-client-shuffle-2.6.4.jar . cp ~/hadoop-2.6.4/share/hadoop/yarn/hadoop-yarn-api-2.6.4.jar . cp ~/hadoop-2.6.4/share/hadoop/yarn/hadoop-yarn-client-2.6.4.jar . cp ~/hadoop-2.6.4/share/hadoop/yarn/hadoop-yarn-common-2.6.4.jar . cp ~/hadoop-2.6.4/share/hadoop/yarn/hadoop-yarn-server-common-2.6.4.jar . # 解决java.lang.NoClassDefFoundError: org/htrace/Trace cp ~/hadoop-2.6.4/share/hadoop/common/lib/htrace-core-3.0.4.jar . # 删除老版的jar rm *-2.5.1.jar
export JAVA_HOME=/opt/jdk1.8.0_45 export HBASE_MANAGES_ZK=false export HBASE_CLASSPATH=/home/zkpk/hadoop-2.6.4/etc/hadoop # 注释掉下面的配置(因为1.8JDK没有这个选项) #export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -XX:PermSize=128m -XX:MaxPermSize=128m" #export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -XX:PermSize=128m -XX:MaxPermSize=128m"
hbase.cluster.distributed true hbase.tmp.dir /home/zkpk/hbase-1.2.2/tmp hbase.rootdir hdfs://ns1/hbase zookeeper.session.timeout 120000 hbase.zookeeper.property.tickTime 6000 hbase.zookeeper.property.clientPort 2181 hbase.zookeeper.quorum hsm01,hss01,hss02 hbase.zookeeper.property.dataDir /home/zkpk/zookeeper-3.4.6/data dfs.replication 1 hbase.master.maxclockskew 180000
hss01 hss02
把hadoop的hdfs-site.xml和core-site.xml 放到hbase/conf下
cp hadoop-2.6.4/etc/hadoop/hdfs-site.xml hbase-1.2.2/conf/ cp hadoop-2.6.4/etc/hadoop/core-site.xml hbase-1.2.2/conf/ scp -r /home/zkpk/hbase-1.2.2 hss01:~/ scp -r /home/zkpk/hbase-1.2.2 hss02:~/
# 各节点分别配置 vim ~/.bash_profile export HBASE_HOME=/home/zkpk/hbase-1.2.2 export PATH=$PATH:$HBASE_HOME/bin source ~/.bash_profile
# 启动 start-hbase.sh # 通过浏览器访问hbase HMaster Web页面 http://hsm01:16010 # HRegionServer Web页面 http://hss01:16030 http://hss02:16030 # shell验证 hbase shell # list验证 list # 建表验证 create 'user','name','sex'
集群时间记得要同步,同步方式界面操作调整时区和格式。
date -s "yyyyMMdd HH:mm:dd" clock -w
hbase启动时报错:java.lang.NoClassDefFoundError: org/htrace/Trace
或者用ntp设置
Linux NTP配置详解 (Network Time Protocol)
# root安装(其他用户也可以) tar -xf scala-2.11.7.tgz mv scala-2.11.7/ /opt/ # 环境变量 vim /etc/profile export SCALA_HOME=/opt/scala-2.11.7 export PATH=$PATH:$SCALA_HOME/bin source /etc/profile # 验证 scala -version # 将scala复制到其他节点,并配置环境变量 scp -r scala-2.11.7 root@hss01:/opt scp -r scala-2.11.7 root@hss02:/opt
tar -xf spark-1.6.2-bin-hadoop2.6.tgz mv spark-1.6.2-bin-hadoop2.6/ spark-1.6.2
# conf目录 cp spark-env.sh.template spark-env.sh vim spark-env.sh export JAVA_HOME=/opt/jdk1.8.0_45 export SCALA_HOME=/opt/scala-2.11.7 export SPARK_MASTER_IP=hsm01 export SPARK_WORKER_MEMORY=1g export HADOOP_CONF_DIR=/home/zkpk/hadoop-2.6.4/etc/hadoop export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib:$HADOOP_HOME/lib/native"
cp slaves.template slaves hsm01 hss01 hss02
scp -r spark-1.6.2/ hss01:~/ scp -r spark-1.6.2/ hss02:~/
vim ~/.bash_profile export SPARK_HOME=/home/zkpk/spark-1.6.2 export PATH=$PATH:$SPARK_HOME/bin source ~/.bash_profile
# 启动(由于和hadoop的启动shell名字一样,需要注意) $SPARK_HOME/sbin/start-all.sh # 查看集群状态 http://hsm01:8080/ # 命令行交互验证 ./bin/spark-shell scala> val textFile = sc.textFile("file:///home/zkpk/spark-1.6.2/README.md") textFile: org.apache.spark.rdd.RDD[String] = file:///home/zkpk/spark-1.6.2/README.md MapPartitionsRDD[1] at textFile at:27 scala> textFile.count() res0: Long = 95 scala> textFile.first() res1: String = # Apache Spark