1、前提条件
确保本地windows环境已参考之前的教程配置好
Hadoop入门学习之(三)虚拟机9000端口拒绝访问
Hadoop入门学习之(四)windows下缺少windutils.exe和hadoop.dll的解决方法
2、新建maven项目
pom引入以下配置(如果不需要调试mapreduce,可以不引入hadoop-mapreduce-client-common包)
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.8.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.8.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-common</artifactId>
<version>2.8.1</version>
</dependency>
</dependencies>
<repositories>
<repository>
<id>apache</id>
<url>http://maven.apache.org</url>
</repository>
</repositories>
3、新建访问hdfs的测试类
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
public class CopyToLocalDemo {
public static void main(String[] args) throws IOException, URISyntaxException {
// 获得FileSystem对象
FileSystem fileSystem = FileSystem.get(new URI("hdfs://192.168.1.101:9000"), new Configuration());
// 调用open方法进行下载,参数HDFS路径
InputStream in = fileSystem.open(new Path("/hadoop/LICENSE.txt"));
// 创建输出流,参数指定文件输出地址
OutputStream out = new FileOutputStream("E://data/hadoop/LICENSE.txt");
// 使用Hadoop提供的IOUtils,将in的内容copy到out,设置buffSize大小,是否关闭流设置true
IOUtils.copyBytes(in, out, 4096, true);
}
}
4、运行测试hdfs
在E://data/hadoop/中发现LICENSE.txt
5、新建WordCount测试类,用于测试mapreduce
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount {
public static class TokenizerMapper
extends Mapper<Object, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context
) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.write(word, one);
}
}
}
public static class IntSumReducer
extends Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values,
Context context
) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("mapred.jar", "hadoop-study-1.0-SNAPSHOT.jar");
Job job = Job.getInstance(conf, "word count");
String input = "hdfs://192.168.1.101:9000/hadoop";
String output = "hdfs://192.168.1.101:9000/hadoop/output";
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(input));
FileOutputFormat.setOutputPath(job, new Path(output));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
6、运行测试mapreduce
注意,运行之前需要先将工程打包,并把jar包放到工程根目录,如下图所示:
image.png
运行成功后可以在服务器上查看:
hadoop fs -ls /hadoop/output
常见问题:
1、权限问题:
ermission denied: user=root, access=WRITE, inode="hadoop":hadoop:supergroup:rwxr-xr-x
解决:
修改服务器$HADOOP_HOME/etc/hadoop/hdfs-site.xml文件,加入
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
重启hadoop
http://blog.csdn.net/a925907195/article/details/411297732、出现Failed to connect to /127.0.0.1:50010 for block, add to
deadNodes and continue错误
解决:此错误一般是服务器的/etc/hosts设置有误,我也是被这个问题折腾了半天,最后才确定是hosts的问题,我的hosts如下配置,供参考:
192.168.1.101 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.1.101 roy