1.下载
地址://www.trieuvan.com/apache/hadoop/common/
2.解压
3.配置hadoop环境变量
gedit ~/.bashrc
export JAVA_HOME=/home/yanghaidong123123/桌面/jdk1.8.0_77
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:$
{JAVA_HOME}/lib:${JRE_HOME}/lib
export PATH=$
{JAVA_HOME}/bin:$PATH
export SCALA_HOME=/home/yanghaidong123123/桌面/scala-2.11.8
export PATH=$
{SCALA_HOME}/bin:$PATH
export HADOOP_INSTALL=/home/yanghaidong123123/桌面/hadoop
export PATH=$PATH:$HADOOP_INSTALL/bin
export PATH=$PATH:$HADOOP_INSTALL/sbin
export HADOOP_MAPRED_HOME=$HADOOP_INSTALL
export HADOOP_COMMON_HOME=$HADOOP_INSTALL
export HADOOP_HDFS_HOME=$HADOOP_INSTALL
export YARN_HOME=$HADOOP_INSTALL
配置core-site.xml
加入:
fs.defaultFS
hdfs://master:9000
io.file.buffer.size
131072
hadoop.tmp.dir
/home/yanghaidong123123/桌面/tmp
Abase for other temporary directories.
hadoop.proxyuser.hduser.hosts
*
hadoop.proxyuser.hduser.groups
*
配置yarn-site.xml加入:
yarn.nodemanager.aux-services
mapreduce_shuffle
yarn.nodemanager.aux-services.mapreduce.shuffle.class
org.apache.hadoop.mapred.ShuffleHandler
yarn.resourcemanager.address
master:8032
yarn.resourcemanager.scheduler.address
master:8030
yarn.resourcemanager.resource-tracker.address
master:8031
yarn.resourcemanager.admin.address
master:8033
yarn.resourcemanager.webapp.address
master:8088
配置mapred-site.xml加入
mapreduce.framework.name
yarn
mapreduce.jobhistory.address
master:10020
mapreduce.jobhistory.webapp.address
master:19888
配置hdfs-site.xml
dfs.namenode.secondary.http-address
master:9001
dfs.namenode.name.dir
file:/hdfs/namenode
dfs.datanode.data.dir
file:/hdfs/datanode
dfs.replication
3
dfs.webhdfs.enabled
true
格式化namenode(在hadoop根目录下)
hadoop namenode -format
启动 : start-all.sh
通过jps和hadoop dfsadmin -report查看状态
终止stop
1.下载spark1.0.2- bin- hadoop2
官网:www.spark.apache.org/downloads.html
2.解压到tar -xzvf spark-1.0.2-bin-hadoop2.tgz -C /usr/local
3.改变属性:chown -R hadoop:hadoop spark-1.0.2/
4.改名:mvspark-env.sh.template spark-env.sh
mvspark-1.0.2-bin-hadoop2/ spark-1.0.2
5.配置环境变量:根目录下/usr/local/spark-1.0.2/conf
6.配置conf/spark-env.sh
export JAVA_HOME=/home/yanhaidong123123/桌面/jdk1.8.0_77
exportSCALA_HOME=/home/yanghaidong123123/桌面/scala-2.11.8
export HADOOP_HOME=/usr/local/hadoop
export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop
export SPARK_WORKER_MEMORY=512M
export SPARK_MASTER_IP=master
export MASTER=spark://master:7077
7.
参数设置
#set java
export JAVA_HOME=/home/spark/opt/jdk1.7.0_79
export JRE_HOME=$JAVA_HOME/jre
export CLASSPATH=.:$JAVA_HOME/lib:$JRE_HOME/lib
export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
#set scala
export SCALA_HOME=/home/spark/opt/scala-2.10.4
export PATH=$SCALA_HOME/bin:$PATH
#set hadoop
export HADOOP_HOME=/home/spark/opt/hadoop-2.6.0
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export SPARK_HOME=/home/spark/opt/spark-1.5.0
export PATH=$SPARK_HOME/bin:$PATH
hadoop2.6.0版本集群环境搭建 - stark_summer的专栏 - 博客频道 - CSDN.NET
//www.open-open.com/lib/view/open1435217148153.html
//blog.csdn.net/stark_summer/article/details/42424279
spark
//blog.csdn.net/stark_summer/article/details/43495623
注意
更改hadoop目录文件所有者
在/etc/profile 添加HADOOP_HOME和PATH
修改/etc/hostname 和/etc/hosts
配置 mapred-env.sh文件-->修改JAVA_HOME(与本机java安装目录一致)
配置 hadoop-env.sh文件-->修改JAVA_HOME(与本机java安装目录一致)
(在hadoop-env.sh中添加exportHADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native")
配置 yarn-env.sh 文件-->>修改JAVA_HOME(与本机java安装目录一致)
在hadoop安装目录下新建目录dfs和tmp
在dfs下新建目录name和data
在hadoop-env.sh添加
export JAVA_HOME=/home/spark/opt/jdk1.7.0_79
exportHADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
masters 文件可以从slaves复制得到
masters中只有master
slaves中有slave1 slave2 slave3...
mapred-site.xml可以从mapred-site.xml.template复制得到
配置core-site.xml
增加hadoop核心配置(hdfs文件端口是9000、file:/home/spark/opt/hadoop-2.6.0/tmp)
fs.defaultFS
hdfs://master:9000
io.file.buffer.size
131072
hadoop.tmp.dir
file:/home/spark/opt/hadoop-2.6.0/tmp
Abasefor other temporary directories.
hadoop.proxyuser.spark.hosts
*
hadoop.proxyuser.spark.groups
*
配置 hdfs-site.xml文件-->>增加hdfs配置信息(namenode、datanode端口和目录位置)
dfs.namenode.secondary.http-address
master:9001
dfs.namenode.name.dir
file:/home/spark/opt/hadoop-2.6.0/dfs/name
dfs.datanode.data.dir
file:/home/spark/opt/hadoop-2.6.0/dfs/data()
dfs.replication
3
dfs.webhdfs.enabled
true
配置 mapred-site.xml文件-->>增加mapreduce配置(使用yarn框架、jobhistory使用地址以及web地址)
mapreduce.framework.name
yarn
mapreduce.jobhistory.address
master:10020
mapreduce.jobhistory.webapp.address
master:19888
配置 yarn-site.xml 文件-->>增加yarn功能
yarn.nodemanager.aux-services
mapreduce_shuffle
yarn.nodemanager.aux-services.mapreduce.shuffle.class
org.apache.hadoop.mapred.ShuffleHandler
yarn.resourcemanager.address
master:8032
yarn.resourcemanager.scheduler.address
master:8030
yarn.resourcemanager.resource-tracker.address
master:8031
yarn.resourcemanager.admin.address
master:8033
yarn.resourcemanager.webapp.address
master:8088
复制hadoop到其他节点
sudo scp -r hadoop slave1@IP:/tmp