本来在Maven项目下尝试,一直无法找到缺少什么jar包。无奈之下,只能一次性导入全部的jar包。
- IDEA
- HADOOP2.9.0
前提需要
- 安装 jdk 并且配置环境变量
- 解压hadoop2.9.0 到指定目录,并且配置环境变量
HADOOP_HOME=C:\Work\hadoop-2.9.0
HADOOP_BIN_PATH=%HADOOP_HOME%\bin
HADOOP_PREFIX=C:\Work\hadoop-2.9.0
另外,PATH变量在最后追加;%HADOOP_HOME%\bin
hadoop版本对应的hadoop.dll、winutils.exe
下载地址 https://github.com/steveloughran/winutils
hadoop2.9.0 可以使用 该网址上的 3.0版本。
分别放到目录“C:\Windows\System32”和“$HADOOP_HOME\bin”下搭建集群(本文实验环境:Windows10 + VMware)
用IDEA 创建一个maven webapp 项目
以上项目基本信息根据个人设置。
生成的目录结构如下图
添加hadoop 需要的jar包(o(╥﹏╥)o 这里一次导入所有的包...)
鼠标右键项目,选择 Open Module Settings 或者快键键 F4
打开Project Structure界面。
点击 ‘+’ 选择 添加 jars or directory
找到本地的 hadoop2.9.0 的目录下的所需要的jar, 位置在(C:\Work\hadoop-2.9.0\share\hadoop\)
得到
起个名字方便管理
这样就添加所有的 jar 包。
编写 MapReduce 程序。
此时文件结构是没有 存放代码的 Sources 文件夹。
鼠标右键 main -> New -> Directory
此时的 java 仅仅是个文件夹,还需要设置它为 Sources
鼠标右键项目,选择 Open Module Settings 或者快键键 F4
打开Project Structure界面。
设置成功java 文件夹会是蓝色的。
PartitionerApp.java
package hk.uic.hadoop.testweb;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class PartitionerApp {
/**
* Map: 读取输入的文件
* LongWritable 偏移量
* Text 一行一行的文本数据
* Text 字符串 + 数字 如 home 1
* LongWritable {1,1,1,1,1}
*/
public static class MyMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
LongWritable one = new LongWritable(1);
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 接收到的每一行数据
String line = value.toString();
//按照指定分割符号进行拆分
String[] words = line.split(" ");
context.write(new Text(words[0]), new LongWritable(Long.parseLong(words[1])));
}
}
/**
* Reduce: 归并操作
*/
public static class MyReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
long sum = 0;
for (LongWritable value : values) {
// 求key 出现的次数总和
sum += value.get();
}
// 最终统计结果的输出
context.write(key, new LongWritable(sum));
}
}
public static class MyPartitioner extends Partitioner<Text, LongWritable> {
@Override
public int getPartition(Text key, LongWritable value, int numPartitions) {
if (key.toString().equals("Apple")) {
return 0;
}
if (key.toString().equals("Orange")) {
return 1;
}
if (key.toString().equals("Pear")) {
return 2;
}
return 3;
}
}
/**
* 定义 Driver : 封装了MapReduce作业的所有信息
*
* @param args
*/
public static void main(String[] args) throws Exception {
System.setProperty("hadoop.home.dir", "C:\\Work\\hadoop-2.9.0");
//懒得在IDEA配置 args 参数
args = new String[2];
args[0] = "hdfs://192.168.19.128:8020/springhdfs/fruits.txt";
args[1] = "hdfs://192.168.19.128:8020/output/fruits";
// 创建 Configuration
Configuration configuration = new Configuration();
configuration.set("fs.defaultFS", "hdfs://192.168.19.128:8020");
// 清除已存在的文件目录
Path outputPath = new Path(args[1]);
FileSystem fileSystem = FileSystem.get(configuration);
if (fileSystem.exists(outputPath)) {
fileSystem.delete(outputPath, true);
System.out.println("outputPath: " + args[1] + " exists, but has been deleted.");
}
// 创建 Job
Job job = Job.getInstance(configuration, "wordcount");
// 设置Job 的处理类
job.setJarByClass(PartitionerApp.class);
// 设置作业处理的输入路径, 通过参数获得
FileInputFormat.setInputPaths(job, new Path(args[0]));
// 设置map 相关参数
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
// 设置reduce 相关参数
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
// 设置job的Partitioner
job.setPartitionerClass(MyPartitioner.class);
// 设置4个reducer, 每个类别一个
job.setNumReduceTasks(4);
//设置作业处理的输出路径
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
添加完代码此时,你如果尝试运行代码,可能会出现这样的错误。
"C:\Program Files\Java\jdk1.8.0_102\bin\java" "-javaagent:C:\Work\JetBrains\IntelliJ IDEA 2017.2.3\lib\idea_rt.jar=57859:C:\Work\JetBrains\IntelliJ IDEA 2017.2.3\bin" -Dfile.encoding=UTF-8 -classpath "C:\Program Files\Java\jdk1.8.0_102\jre\lib\charsets.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\deploy.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\access-bridge-64.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\cldrdata.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\dnsns.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\jaccess.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\jfxrt.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\localedata.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\nashorn.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\sunec.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\sunjce_provider.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\sunmscapi.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\sunpkcs11.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\zipfs.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\javaws.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\jce.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\jfr.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\jfxswt.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\jsse.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\management-agent.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\plugin.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\resources.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\rt.jar;C:\Work\workshop\project\HadoopWebTest\target\classes;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-ui-2.9.0.war;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-api-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-client-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-common-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-registry-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-server-tests-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-server-common-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-server-router-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-server-web-proxy-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-server-nodemanager-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-server-resourcemanager-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-server-sharedcachemanager-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-applications-distributedshell-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-server-timeline-pluginstorage-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-server-applicationhistoryservice-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-applications-unmanaged-am-launcher-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\common\hadoop-nfs-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\common\hadoop-common-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\common\hadoop-common-2.9.0-tests.jar;C:\Work\hadoop-2.9.0\share\hadoop\hdfs\hadoop-hdfs-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\hdfs\hadoop-hdfs-nfs-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\hdfs\hadoop-hdfs-2.9.0-tests.jar;C:\Work\hadoop-2.9.0\share\hadoop\hdfs\hadoop-hdfs-client-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\hdfs\hadoop-hdfs-client-2.9.0-tests.jar;C:\Work\hadoop-2.9.0\share\hadoop\hdfs\hadoop-hdfs-native-client-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\hdfs\hadoop-hdfs-native-client-2.9.0-tests.jar;C:\Work\hadoop-2.9.0\share\hadoop\mapreduce\hadoop-mapreduce-examples-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\mapreduce\hadoop-mapreduce-client-hs-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\mapreduce\hadoop-mapreduce-client-app-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\mapreduce\hadoop-mapreduce-client-core-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\mapreduce\hadoop-mapreduce-client-common-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\mapreduce\hadoop-mapreduce-client-shuffle-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\mapreduce\hadoop-mapreduce-client-jobclient-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\mapreduce\hadoop-mapreduce-client-hs-plugins-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\mapreduce\hadoop-mapreduce-client-jobclient-2.9.0-tests.jar" hk.uic.hadoop.testweb.PartitionerApp
Exception in thread "main" java.lang.NoClassDefFoundError: com/ctc/wstx/io/InputBootstrapper
at hk.uic.hadoop.testweb.PartitionerApp.main(PartitionerApp.java:98)
Caused by: java.lang.ClassNotFoundException: com.ctc.wstx.io.InputBootstrapper
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 1 more
前面其实明明已经添加了所有的 jar 包,不知道为何还会出现这样的错误,不知道是我下载解压的 hadoop2.9.0 有问题还是其他原因。 然后 google 了好多博客还是不能解决。 最后,无意中在maven 中再 添加一个 hadoop-common的包 居然就好了。
pom.xml 内
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<!-- $Id: pom.xml 642118 2008-03-28 08:04:16Z reinhard $ -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<packaging>war</packaging>
<name>HadoopWebTest</name>
<groupId>hk.uic.hejing</groupId>
<artifactId>HadoopWebTest</artifactId>
<version>1.0</version>
<build>
<plugins>
<plugin>
<groupId>org.mortbay.jetty</groupId>
<artifactId>maven-jetty-plugin</artifactId>
<version>6.1.7</version>
<configuration>
<connectors>
<connector implementation="org.mortbay.jetty.nio.SelectChannelConnector">
<port>8888</port>
<maxIdleTime>30000</maxIdleTime>
</connector>
</connectors>
<webAppSourceDirectory>${project.build.directory}/${pom.artifactId}-${pom.version}</webAppSourceDirectory>
<contextPath>/</contextPath>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.9.0</version>
</dependency>
</dependencies>
</project>
刷新导入成功之后,再尝试运行。
"C:\Program Files\Java\jdk1.8.0_102\bin\java" "-javaagent:C:\Work\JetBrains\IntelliJ IDEA 2017.2.3\lib\idea_rt.jar=58143:C:\Work\JetBrains\IntelliJ IDEA 2017.2.3\bin" -Dfile.encoding=UTF-8 -classpath "C:\Program Files\Java\jdk1.8.0_102\jre\lib\charsets.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\deploy.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\access-bridge-64.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\cldrdata.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\dnsns.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\jaccess.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\jfxrt.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\localedata.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\nashorn.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\sunec.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\sunjce_provider.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\sunmscapi.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\sunpkcs11.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\ext\zipfs.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\javaws.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\jce.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\jfr.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\jfxswt.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\jsse.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\management-agent.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\plugin.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\resources.jar;C:\Program Files\Java\jdk1.8.0_102\jre\lib\rt.jar;C:\Work\workshop\project\HadoopWebTest\target\classes;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-ui-2.9.0.war;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-api-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-client-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-common-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-registry-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-server-tests-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-server-common-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-server-router-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-server-web-proxy-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-server-nodemanager-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-server-resourcemanager-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-server-sharedcachemanager-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-applications-distributedshell-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-server-timeline-pluginstorage-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-server-applicationhistoryservice-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\yarn\hadoop-yarn-applications-unmanaged-am-launcher-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\common\hadoop-nfs-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\common\hadoop-common-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\common\hadoop-common-2.9.0-tests.jar;C:\Work\hadoop-2.9.0\share\hadoop\hdfs\hadoop-hdfs-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\hdfs\hadoop-hdfs-nfs-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\hdfs\hadoop-hdfs-2.9.0-tests.jar;C:\Work\hadoop-2.9.0\share\hadoop\hdfs\hadoop-hdfs-client-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\hdfs\hadoop-hdfs-client-2.9.0-tests.jar;C:\Work\hadoop-2.9.0\share\hadoop\hdfs\hadoop-hdfs-native-client-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\hdfs\hadoop-hdfs-native-client-2.9.0-tests.jar;C:\Work\hadoop-2.9.0\share\hadoop\mapreduce\hadoop-mapreduce-examples-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\mapreduce\hadoop-mapreduce-client-hs-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\mapreduce\hadoop-mapreduce-client-app-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\mapreduce\hadoop-mapreduce-client-core-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\mapreduce\hadoop-mapreduce-client-common-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\mapreduce\hadoop-mapreduce-client-shuffle-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\mapreduce\hadoop-mapreduce-client-jobclient-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\mapreduce\hadoop-mapreduce-client-hs-plugins-2.9.0.jar;C:\Work\hadoop-2.9.0\share\hadoop\mapreduce\hadoop-mapreduce-client-jobclient-2.9.0-tests.jar;C:\Work\maven-repos\org\apache\hadoop\hadoop-common\2.9.0\hadoop-common-2.9.0.jar;C:\Work\maven-repos\org\apache\hadoop\hadoop-annotations\2.9.0\hadoop-annotations-2.9.0.jar;C:\Program Files\Java\jdk1.8.0_102\lib\tools.jar;C:\Work\maven-repos\com\google\guava\guava\11.0.2\guava-11.0.2.jar;C:\Work\maven-repos\commons-cli\commons-cli\1.2\commons-cli-1.2.jar;C:\Work\maven-repos\org\apache\commons\commons-math3\3.1.1\commons-math3-3.1.1.jar;C:\Work\maven-repos\xmlenc\xmlenc\0.52\xmlenc-0.52.jar;C:\Work\maven-repos\org\apache\httpcomponents\httpclient\4.5.2\httpclient-4.5.2.jar;C:\Work\maven-repos\org\apache\httpcomponents\httpcore\4.4.4\httpcore-4.4.4.jar;C:\Work\maven-repos\commons-codec\commons-codec\1.4\commons-codec-1.4.jar;C:\Work\maven-repos\commons-io\commons-io\2.4\commons-io-2.4.jar;C:\Work\maven-repos\commons-net\commons-net\3.1\commons-net-3.1.jar;C:\Work\maven-repos\commons-collections\commons-collections\3.2.2\commons-collections-3.2.2.jar;C:\Work\maven-repos\javax\servlet\servlet-api\2.5\servlet-api-2.5.jar;C:\Work\maven-repos\org\mortbay\jetty\jetty\6.1.26\jetty-6.1.26.jar;C:\Work\maven-repos\org\mortbay\jetty\jetty-util\6.1.26\jetty-util-6.1.26.jar;C:\Work\maven-repos\org\mortbay\jetty\jetty-sslengine\6.1.26\jetty-sslengine-6.1.26.jar;C:\Work\maven-repos\javax\servlet\jsp\jsp-api\2.1\jsp-api-2.1.jar;C:\Work\maven-repos\com\sun\jersey\jersey-core\1.9\jersey-core-1.9.jar;C:\Work\maven-repos\com\sun\jersey\jersey-json\1.9\jersey-json-1.9.jar;C:\Work\maven-repos\org\codehaus\jettison\jettison\1.1\jettison-1.1.jar;C:\Work\maven-repos\com\sun\xml\bind\jaxb-impl\2.2.3-1\jaxb-impl-2.2.3-1.jar;C:\Work\maven-repos\javax\xml\bind\jaxb-api\2.2.2\jaxb-api-2.2.2.jar;C:\Work\maven-repos\javax\xml\stream\stax-api\1.0-2\stax-api-1.0-2.jar;C:\Work\maven-repos\javax\activation\activation\1.1\activation-1.1.jar;C:\Work\maven-repos\org\codehaus\jackson\jackson-jaxrs\1.8.3\jackson-jaxrs-1.8.3.jar;C:\Work\maven-repos\org\codehaus\jackson\jackson-xc\1.8.3\jackson-xc-1.8.3.jar;C:\Work\maven-repos\com\sun\jersey\jersey-server\1.9\jersey-server-1.9.jar;C:\Work\maven-repos\asm\asm\3.1\asm-3.1.jar;C:\Work\maven-repos\commons-logging\commons-logging\1.1.3\commons-logging-1.1.3.jar;C:\Work\maven-repos\log4j\log4j\1.2.17\log4j-1.2.17.jar;C:\Work\maven-repos\net\java\dev\jets3t\jets3t\0.9.0\jets3t-0.9.0.jar;C:\Work\maven-repos\com\jamesmurty\utils\java-xmlbuilder\0.4\java-xmlbuilder-0.4.jar;C:\Work\maven-repos\commons-lang\commons-lang\2.6\commons-lang-2.6.jar;C:\Work\maven-repos\commons-configuration\commons-configuration\1.6\commons-configuration-1.6.jar;C:\Work\maven-repos\commons-digester\commons-digester\1.8\commons-digester-1.8.jar;C:\Work\maven-repos\commons-beanutils\commons-beanutils\1.7.0\commons-beanutils-1.7.0.jar;C:\Work\maven-repos\commons-beanutils\commons-beanutils-core\1.8.0\commons-beanutils-core-1.8.0.jar;C:\Work\maven-repos\org\apache\commons\commons-lang3\3.4\commons-lang3-3.4.jar;C:\Work\maven-repos\org\slf4j\slf4j-api\1.7.25\slf4j-api-1.7.25.jar;C:\Work\maven-repos\org\slf4j\slf4j-log4j12\1.7.25\slf4j-log4j12-1.7.25.jar;C:\Work\maven-repos\org\codehaus\jackson\jackson-core-asl\1.9.13\jackson-core-asl-1.9.13.jar;C:\Work\maven-repos\org\codehaus\jackson\jackson-mapper-asl\1.9.13\jackson-mapper-asl-1.9.13.jar;C:\Work\maven-repos\org\apache\avro\avro\1.7.7\avro-1.7.7.jar;C:\Work\maven-repos\com\thoughtworks\paranamer\paranamer\2.3\paranamer-2.3.jar;C:\Work\maven-repos\org\xerial\snappy\snappy-java\1.0.5\snappy-java-1.0.5.jar;C:\Work\maven-repos\com\google\protobuf\protobuf-java\2.5.0\protobuf-java-2.5.0.jar;C:\Work\maven-repos\com\google\code\gson\gson\2.2.4\gson-2.2.4.jar;C:\Work\maven-repos\org\apache\hadoop\hadoop-auth\2.9.0\hadoop-auth-2.9.0.jar;C:\Work\maven-repos\com\nimbusds\nimbus-jose-jwt\3.9\nimbus-jose-jwt-3.9.jar;C:\Work\maven-repos\net\jcip\jcip-annotations\1.0\jcip-annotations-1.0.jar;C:\Work\maven-repos\net\minidev\json-smart\1.1.1\json-smart-1.1.1.jar;C:\Work\maven-repos\org\apache\directory\server\apacheds-kerberos-codec\2.0.0-M15\apacheds-kerberos-codec-2.0.0-M15.jar;C:\Work\maven-repos\org\apache\directory\server\apacheds-i18n\2.0.0-M15\apacheds-i18n-2.0.0-M15.jar;C:\Work\maven-repos\org\apache\directory\api\api-asn1-api\1.0.0-M20\api-asn1-api-1.0.0-M20.jar;C:\Work\maven-repos\org\apache\directory\api\api-util\1.0.0-M20\api-util-1.0.0-M20.jar;C:\Work\maven-repos\org\apache\curator\curator-framework\2.7.1\curator-framework-2.7.1.jar;C:\Work\maven-repos\com\jcraft\jsch\0.1.54\jsch-0.1.54.jar;C:\Work\maven-repos\org\apache\curator\curator-client\2.7.1\curator-client-2.7.1.jar;C:\Work\maven-repos\org\apache\curator\curator-recipes\2.7.1\curator-recipes-2.7.1.jar;C:\Work\maven-repos\com\google\code\findbugs\jsr305\3.0.0\jsr305-3.0.0.jar;C:\Work\maven-repos\org\apache\htrace\htrace-core4\4.1.0-incubating\htrace-core4-4.1.0-incubating.jar;C:\Work\maven-repos\org\apache\zookeeper\zookeeper\3.4.6\zookeeper-3.4.6.jar;C:\Work\maven-repos\io\netty\netty\3.7.0.Final\netty-3.7.0.Final.jar;C:\Work\maven-repos\org\apache\commons\commons-compress\1.4.1\commons-compress-1.4.1.jar;C:\Work\maven-repos\org\tukaani\xz\1.0\xz-1.0.jar;C:\Work\maven-repos\org\codehaus\woodstox\stax2-api\3.1.4\stax2-api-3.1.4.jar;C:\Work\maven-repos\com\fasterxml\woodstox\woodstox-core\5.0.3\woodstox-core-5.0.3.jar" hk.uic.hadoop.testweb.PartitionerApp
outputPath: hdfs://192.168.19.128:8020/output/fruits exists, but has been deleted.
2018-02-12 15:05:55,365 INFO [main] Configuration.deprecation (Configuration.java:logDeprecation(1297)) - session.id is deprecated. Instead, use dfs.metrics.session-id
2018-02-12 15:05:55,371 INFO [main] jvm.JvmMetrics (JvmMetrics.java:init(79)) - Initializing JVM Metrics with processName=JobTracker, sessionId=
2018-02-12 15:05:55,872 WARN [main] mapreduce.JobResourceUploader (JobResourceUploader.java:uploadResourcesInternal(142)) - Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
2018-02-12 15:05:55,913 WARN [main] mapreduce.JobResourceUploader (JobResourceUploader.java:uploadJobJar(470)) - No job jar file set. User classes may not be found. See Job or Job#setJar(String).
2018-02-12 15:05:55,928 INFO [main] input.FileInputFormat (FileInputFormat.java:listStatus(289)) - Total input files to process : 1
2018-02-12 15:05:56,020 INFO [main] mapreduce.JobSubmitter (JobSubmitter.java:submitJobInternal(204)) - number of splits:1
2018-02-12 15:05:56,163 INFO [main] mapreduce.JobSubmitter (JobSubmitter.java:printTokens(300)) - Submitting tokens for job: job_local1432978983_0001
2018-02-12 15:05:56,501 INFO [main] mapreduce.Job (Job.java:submit(1574)) - The url to track the job: http://localhost:8080/
2018-02-12 15:05:56,502 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1619)) - Running job: job_local1432978983_0001
2018-02-12 15:05:56,504 INFO [Thread-4] mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(500)) - OutputCommitter set in config null
2018-02-12 15:05:56,512 INFO [Thread-4] output.FileOutputCommitter (FileOutputCommitter.java:<init>(123)) - File Output Committer Algorithm version is 1
2018-02-12 15:05:56,512 INFO [Thread-4] output.FileOutputCommitter (FileOutputCommitter.java:<init>(138)) - FileOutputCommitter skip cleanup _temporary folders under output directory:false, ignore cleanup failures: false
2018-02-12 15:05:56,514 INFO [Thread-4] mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(518)) - OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
2018-02-12 15:05:56,568 INFO [Thread-4] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(477)) - Waiting for map tasks
2018-02-12 15:05:56,569 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(251)) - Starting task: attempt_local1432978983_0001_m_000000_0
2018-02-12 15:05:56,606 INFO [LocalJobRunner Map Task Executor #0] output.FileOutputCommitter (FileOutputCommitter.java:<init>(123)) - File Output Committer Algorithm version is 1
2018-02-12 15:05:56,608 INFO [LocalJobRunner Map Task Executor #0] output.FileOutputCommitter (FileOutputCommitter.java:<init>(138)) - FileOutputCommitter skip cleanup _temporary folders under output directory:false, ignore cleanup failures: false
2018-02-12 15:05:56,624 INFO [LocalJobRunner Map Task Executor #0] util.ProcfsBasedProcessTree (ProcfsBasedProcessTree.java:isAvailable(168)) - ProcfsBasedProcessTree currently is supported only on Linux.
2018-02-12 15:05:56,970 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:initialize(619)) - Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@1a0bf416
2018-02-12 15:05:56,980 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:runNewMapper(762)) - Processing split: hdfs://192.168.19.128:8020/springhdfs/fruits.txt:0+69
2018-02-12 15:05:57,028 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:setEquator(1212)) - (EQUATOR) 0 kvi 26214396(104857584)
2018-02-12 15:05:57,028 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(1005)) - mapreduce.task.io.sort.mb: 100
2018-02-12 15:05:57,028 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(1006)) - soft limit at 83886080
2018-02-12 15:05:57,028 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(1007)) - bufstart = 0; bufvoid = 104857600
2018-02-12 15:05:57,028 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(1008)) - kvstart = 26214396; length = 6553600
2018-02-12 15:05:57,032 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:createSortingCollector(403)) - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
2018-02-12 15:05:57,231 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(620)) -
2018-02-12 15:05:57,234 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1469)) - Starting flush of map output
2018-02-12 15:05:57,234 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1491)) - Spilling map output
2018-02-12 15:05:57,235 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1492)) - bufstart = 0; bufend = 99; bufvoid = 104857600
2018-02-12 15:05:57,235 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1494)) - kvstart = 26214396(104857584); kvend = 26214372(104857488); length = 25/6553600
2018-02-12 15:05:57,352 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:sortAndSpill(1681)) - Finished spill 0
2018-02-12 15:05:57,363 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:done(1099)) - Task:attempt_local1432978983_0001_m_000000_0 is done. And is in the process of committing
2018-02-12 15:05:57,376 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(620)) - map
2018-02-12 15:05:57,376 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:sendDone(1219)) - Task 'attempt_local1432978983_0001_m_000000_0' done.
2018-02-12 15:05:57,376 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(276)) - Finishing task: attempt_local1432978983_0001_m_000000_0
2018-02-12 15:05:57,377 INFO [Thread-4] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(485)) - map task executor complete.
2018-02-12 15:05:57,382 INFO [Thread-4] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(477)) - Waiting for reduce tasks
2018-02-12 15:05:57,383 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(329)) - Starting task: attempt_local1432978983_0001_r_000000_0
2018-02-12 15:05:57,393 INFO [pool-7-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:<init>(123)) - File Output Committer Algorithm version is 1
2018-02-12 15:05:57,393 INFO [pool-7-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:<init>(138)) - FileOutputCommitter skip cleanup _temporary folders under output directory:false, ignore cleanup failures: false
2018-02-12 15:05:57,394 INFO [pool-7-thread-1] util.ProcfsBasedProcessTree (ProcfsBasedProcessTree.java:isAvailable(168)) - ProcfsBasedProcessTree currently is supported only on Linux.
2018-02-12 15:05:57,509 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1640)) - Job job_local1432978983_0001 running in uber mode : false
2018-02-12 15:05:57,511 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1647)) - map 100% reduce 0%
2018-02-12 15:05:57,562 INFO [pool-7-thread-1] mapred.Task (Task.java:initialize(619)) - Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@685c12a6
2018-02-12 15:05:57,565 INFO [pool-7-thread-1] mapred.ReduceTask (ReduceTask.java:run(362)) - Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@6f0dcd1f
2018-02-12 15:05:57,579 INFO [pool-7-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:<init>(207)) - MergerManager: memoryLimit=1321939712, maxSingleShuffleLimit=330484928, mergeThreshold=872480256, ioSortFactor=10, memToMemMergeOutputsThreshold=10
2018-02-12 15:05:57,581 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(61)) - attempt_local1432978983_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
2018-02-12 15:05:57,616 INFO [localfetcher#1] reduce.LocalFetcher (LocalFetcher.java:copyMapOutput(145)) - localfetcher#1 about to shuffle output of map attempt_local1432978983_0001_m_000000_0 decomp: 34 len: 38 to MEMORY
2018-02-12 15:05:57,623 INFO [localfetcher#1] reduce.InMemoryMapOutput (InMemoryMapOutput.java:doShuffle(93)) - Read 34 bytes from map-output for attempt_local1432978983_0001_m_000000_0
2018-02-12 15:05:57,625 INFO [localfetcher#1] reduce.MergeManagerImpl (MergeManagerImpl.java:closeInMemoryFile(322)) - closeInMemoryFile -> map-output of size: 34, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->34
2018-02-12 15:05:57,627 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(76)) - EventFetcher is interrupted.. Returning
2018-02-12 15:05:57,628 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(620)) - 1 / 1 copied.
2018-02-12 15:05:57,628 INFO [pool-7-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(694)) - finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
2018-02-12 15:05:57,774 INFO [pool-7-thread-1] mapred.Merger (Merger.java:merge(606)) - Merging 1 sorted segments
2018-02-12 15:05:57,775 INFO [pool-7-thread-1] mapred.Merger (Merger.java:merge(705)) - Down to the last merge-pass, with 1 segments left of total size: 26 bytes
2018-02-12 15:05:57,779 INFO [pool-7-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(761)) - Merged 1 segments, 34 bytes to disk to satisfy reduce memory limit
2018-02-12 15:05:57,782 INFO [pool-7-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(791)) - Merging 1 files, 38 bytes from disk
2018-02-12 15:05:57,784 INFO [pool-7-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(806)) - Merging 0 segments, 0 bytes from memory into reduce
2018-02-12 15:05:57,784 INFO [pool-7-thread-1] mapred.Merger (Merger.java:merge(606)) - Merging 1 sorted segments
2018-02-12 15:05:57,785 INFO [pool-7-thread-1] mapred.Merger (Merger.java:merge(705)) - Down to the last merge-pass, with 1 segments left of total size: 26 bytes
2018-02-12 15:05:57,786 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(620)) - 1 / 1 copied.
2018-02-12 15:05:57,816 INFO [pool-7-thread-1] Configuration.deprecation (Configuration.java:logDeprecation(1297)) - mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
2018-02-12 15:05:57,916 INFO [pool-7-thread-1] mapred.Task (Task.java:done(1099)) - Task:attempt_local1432978983_0001_r_000000_0 is done. And is in the process of committing
2018-02-12 15:05:57,921 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(620)) - 1 / 1 copied.
2018-02-12 15:05:57,921 INFO [pool-7-thread-1] mapred.Task (Task.java:commit(1260)) - Task attempt_local1432978983_0001_r_000000_0 is allowed to commit now
2018-02-12 15:05:57,937 INFO [pool-7-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:commitTask(582)) - Saved output of task 'attempt_local1432978983_0001_r_000000_0' to hdfs://192.168.19.128:8020/output/fruits/_temporary/0/task_local1432978983_0001_r_000000
2018-02-12 15:05:57,939 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(620)) - reduce > reduce
2018-02-12 15:05:57,939 INFO [pool-7-thread-1] mapred.Task (Task.java:sendDone(1219)) - Task 'attempt_local1432978983_0001_r_000000_0' done.
2018-02-12 15:05:57,940 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(352)) - Finishing task: attempt_local1432978983_0001_r_000000_0
2018-02-12 15:05:57,940 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(329)) - Starting task: attempt_local1432978983_0001_r_000001_0
2018-02-12 15:05:57,942 INFO [pool-7-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:<init>(123)) - File Output Committer Algorithm version is 1
2018-02-12 15:05:57,942 INFO [pool-7-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:<init>(138)) - FileOutputCommitter skip cleanup _temporary folders under output directory:false, ignore cleanup failures: false
2018-02-12 15:05:57,943 INFO [pool-7-thread-1] util.ProcfsBasedProcessTree (ProcfsBasedProcessTree.java:isAvailable(168)) - ProcfsBasedProcessTree currently is supported only on Linux.
2018-02-12 15:05:58,169 INFO [pool-7-thread-1] mapred.Task (Task.java:initialize(619)) - Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@1da33b9d
2018-02-12 15:05:58,169 INFO [pool-7-thread-1] mapred.ReduceTask (ReduceTask.java:run(362)) - Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@c854bea
2018-02-12 15:05:58,170 INFO [pool-7-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:<init>(207)) - MergerManager: memoryLimit=1321939712, maxSingleShuffleLimit=330484928, mergeThreshold=872480256, ioSortFactor=10, memToMemMergeOutputsThreshold=10
2018-02-12 15:05:58,172 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(61)) - attempt_local1432978983_0001_r_000001_0 Thread started: EventFetcher for fetching Map Completion Events
2018-02-12 15:05:58,276 INFO [localfetcher#2] reduce.LocalFetcher (LocalFetcher.java:copyMapOutput(145)) - localfetcher#2 about to shuffle output of map attempt_local1432978983_0001_m_000000_0 decomp: 36 len: 40 to MEMORY
2018-02-12 15:05:58,278 INFO [localfetcher#2] reduce.InMemoryMapOutput (InMemoryMapOutput.java:doShuffle(93)) - Read 36 bytes from map-output for attempt_local1432978983_0001_m_000000_0
2018-02-12 15:05:58,278 INFO [localfetcher#2] reduce.MergeManagerImpl (MergeManagerImpl.java:closeInMemoryFile(322)) - closeInMemoryFile -> map-output of size: 36, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->36
2018-02-12 15:05:58,279 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(76)) - EventFetcher is interrupted.. Returning
2018-02-12 15:05:58,280 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(620)) - 1 / 1 copied.
2018-02-12 15:05:58,280 INFO [pool-7-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(694)) - finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
2018-02-12 15:05:58,379 INFO [pool-7-thread-1] mapred.Merger (Merger.java:merge(606)) - Merging 1 sorted segments
2018-02-12 15:05:58,380 INFO [pool-7-thread-1] mapred.Merger (Merger.java:merge(705)) - Down to the last merge-pass, with 1 segments left of total size: 27 bytes
2018-02-12 15:05:58,383 INFO [pool-7-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(761)) - Merged 1 segments, 36 bytes to disk to satisfy reduce memory limit
2018-02-12 15:05:58,386 INFO [pool-7-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(791)) - Merging 1 files, 40 bytes from disk
2018-02-12 15:05:58,386 INFO [pool-7-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(806)) - Merging 0 segments, 0 bytes from memory into reduce
2018-02-12 15:05:58,386 INFO [pool-7-thread-1] mapred.Merger (Merger.java:merge(606)) - Merging 1 sorted segments
2018-02-12 15:05:58,388 INFO [pool-7-thread-1] mapred.Merger (Merger.java:merge(705)) - Down to the last merge-pass, with 1 segments left of total size: 27 bytes
2018-02-12 15:05:58,388 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(620)) - 1 / 1 copied.
2018-02-12 15:05:58,423 INFO [pool-7-thread-1] mapred.Task (Task.java:done(1099)) - Task:attempt_local1432978983_0001_r_000001_0 is done. And is in the process of committing
2018-02-12 15:05:58,429 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(620)) - 1 / 1 copied.
2018-02-12 15:05:58,429 INFO [pool-7-thread-1] mapred.Task (Task.java:commit(1260)) - Task attempt_local1432978983_0001_r_000001_0 is allowed to commit now
2018-02-12 15:05:58,443 INFO [pool-7-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:commitTask(582)) - Saved output of task 'attempt_local1432978983_0001_r_000001_0' to hdfs://192.168.19.128:8020/output/fruits/_temporary/0/task_local1432978983_0001_r_000001
2018-02-12 15:05:58,444 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(620)) - reduce > reduce
2018-02-12 15:05:58,444 INFO [pool-7-thread-1] mapred.Task (Task.java:sendDone(1219)) - Task 'attempt_local1432978983_0001_r_000001_0' done.
2018-02-12 15:05:58,444 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(352)) - Finishing task: attempt_local1432978983_0001_r_000001_0
2018-02-12 15:05:58,444 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(329)) - Starting task: attempt_local1432978983_0001_r_000002_0
2018-02-12 15:05:58,446 INFO [pool-7-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:<init>(123)) - File Output Committer Algorithm version is 1
2018-02-12 15:05:58,447 INFO [pool-7-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:<init>(138)) - FileOutputCommitter skip cleanup _temporary folders under output directory:false, ignore cleanup failures: false
2018-02-12 15:05:58,448 INFO [pool-7-thread-1] util.ProcfsBasedProcessTree (ProcfsBasedProcessTree.java:isAvailable(168)) - ProcfsBasedProcessTree currently is supported only on Linux.
2018-02-12 15:05:58,513 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1647)) - map 100% reduce 100%
2018-02-12 15:05:58,583 INFO [pool-7-thread-1] mapred.Task (Task.java:initialize(619)) - Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@59dc1d65
2018-02-12 15:05:58,583 INFO [pool-7-thread-1] mapred.ReduceTask (ReduceTask.java:run(362)) - Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@246cd798
2018-02-12 15:05:58,584 INFO [pool-7-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:<init>(207)) - MergerManager: memoryLimit=1321939712, maxSingleShuffleLimit=330484928, mergeThreshold=872480256, ioSortFactor=10, memToMemMergeOutputsThreshold=10
2018-02-12 15:05:58,585 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(61)) - attempt_local1432978983_0001_r_000002_0 Thread started: EventFetcher for fetching Map Completion Events
2018-02-12 15:05:58,651 INFO [localfetcher#3] reduce.LocalFetcher (LocalFetcher.java:copyMapOutput(145)) - localfetcher#3 about to shuffle output of map attempt_local1432978983_0001_m_000000_0 decomp: 32 len: 36 to MEMORY
2018-02-12 15:05:58,652 INFO [localfetcher#3] reduce.InMemoryMapOutput (InMemoryMapOutput.java:doShuffle(93)) - Read 32 bytes from map-output for attempt_local1432978983_0001_m_000000_0
2018-02-12 15:05:58,653 INFO [localfetcher#3] reduce.MergeManagerImpl (MergeManagerImpl.java:closeInMemoryFile(322)) - closeInMemoryFile -> map-output of size: 32, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->32
2018-02-12 15:05:58,653 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(76)) - EventFetcher is interrupted.. Returning
2018-02-12 15:05:58,654 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(620)) - 1 / 1 copied.
2018-02-12 15:05:58,654 INFO [pool-7-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(694)) - finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
2018-02-12 15:05:58,766 INFO [pool-7-thread-1] mapred.Merger (Merger.java:merge(606)) - Merging 1 sorted segments
2018-02-12 15:05:58,767 INFO [pool-7-thread-1] mapred.Merger (Merger.java:merge(705)) - Down to the last merge-pass, with 1 segments left of total size: 25 bytes
2018-02-12 15:05:58,770 INFO [pool-7-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(761)) - Merged 1 segments, 32 bytes to disk to satisfy reduce memory limit
2018-02-12 15:05:58,771 INFO [pool-7-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(791)) - Merging 1 files, 36 bytes from disk
2018-02-12 15:05:58,772 INFO [pool-7-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(806)) - Merging 0 segments, 0 bytes from memory into reduce
2018-02-12 15:05:58,772 INFO [pool-7-thread-1] mapred.Merger (Merger.java:merge(606)) - Merging 1 sorted segments
2018-02-12 15:05:58,773 INFO [pool-7-thread-1] mapred.Merger (Merger.java:merge(705)) - Down to the last merge-pass, with 1 segments left of total size: 25 bytes
2018-02-12 15:05:58,774 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(620)) - 1 / 1 copied.
2018-02-12 15:05:58,804 INFO [pool-7-thread-1] mapred.Task (Task.java:done(1099)) - Task:attempt_local1432978983_0001_r_000002_0 is done. And is in the process of committing
2018-02-12 15:05:58,806 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(620)) - 1 / 1 copied.
2018-02-12 15:05:58,807 INFO [pool-7-thread-1] mapred.Task (Task.java:commit(1260)) - Task attempt_local1432978983_0001_r_000002_0 is allowed to commit now
2018-02-12 15:05:58,816 INFO [pool-7-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:commitTask(582)) - Saved output of task 'attempt_local1432978983_0001_r_000002_0' to hdfs://192.168.19.128:8020/output/fruits/_temporary/0/task_local1432978983_0001_r_000002
2018-02-12 15:05:58,817 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(620)) - reduce > reduce
2018-02-12 15:05:58,817 INFO [pool-7-thread-1] mapred.Task (Task.java:sendDone(1219)) - Task 'attempt_local1432978983_0001_r_000002_0' done.
2018-02-12 15:05:58,817 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(352)) - Finishing task: attempt_local1432978983_0001_r_000002_0
2018-02-12 15:05:58,817 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(329)) - Starting task: attempt_local1432978983_0001_r_000003_0
2018-02-12 15:05:58,819 INFO [pool-7-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:<init>(123)) - File Output Committer Algorithm version is 1
2018-02-12 15:05:58,819 INFO [pool-7-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:<init>(138)) - FileOutputCommitter skip cleanup _temporary folders under output directory:false, ignore cleanup failures: false
2018-02-12 15:05:58,819 INFO [pool-7-thread-1] util.ProcfsBasedProcessTree (ProcfsBasedProcessTree.java:isAvailable(168)) - ProcfsBasedProcessTree currently is supported only on Linux.
2018-02-12 15:05:58,976 INFO [pool-7-thread-1] mapred.Task (Task.java:initialize(619)) - Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@44d9a753
2018-02-12 15:05:58,977 INFO [pool-7-thread-1] mapred.ReduceTask (ReduceTask.java:run(362)) - Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@6cb6847
2018-02-12 15:05:58,978 INFO [pool-7-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:<init>(207)) - MergerManager: memoryLimit=1321939712, maxSingleShuffleLimit=330484928, mergeThreshold=872480256, ioSortFactor=10, memToMemMergeOutputsThreshold=10
2018-02-12 15:05:58,979 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(61)) - attempt_local1432978983_0001_r_000003_0 Thread started: EventFetcher for fetching Map Completion Events
2018-02-12 15:05:59,037 INFO [localfetcher#4] reduce.LocalFetcher (LocalFetcher.java:copyMapOutput(145)) - localfetcher#4 about to shuffle output of map attempt_local1432978983_0001_m_000000_0 decomp: 19 len: 23 to MEMORY
2018-02-12 15:05:59,039 INFO [localfetcher#4] reduce.InMemoryMapOutput (InMemoryMapOutput.java:doShuffle(93)) - Read 19 bytes from map-output for attempt_local1432978983_0001_m_000000_0
2018-02-12 15:05:59,039 INFO [localfetcher#4] reduce.MergeManagerImpl (MergeManagerImpl.java:closeInMemoryFile(322)) - closeInMemoryFile -> map-output of size: 19, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->19
2018-02-12 15:05:59,040 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(76)) - EventFetcher is interrupted.. Returning
2018-02-12 15:05:59,041 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(620)) - 1 / 1 copied.
2018-02-12 15:05:59,041 INFO [pool-7-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(694)) - finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
2018-02-12 15:05:59,167 INFO [pool-7-thread-1] mapred.Merger (Merger.java:merge(606)) - Merging 1 sorted segments
2018-02-12 15:05:59,168 INFO [pool-7-thread-1] mapred.Merger (Merger.java:merge(705)) - Down to the last merge-pass, with 1 segments left of total size: 10 bytes
2018-02-12 15:05:59,171 INFO [pool-7-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(761)) - Merged 1 segments, 19 bytes to disk to satisfy reduce memory limit
2018-02-12 15:05:59,172 INFO [pool-7-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(791)) - Merging 1 files, 23 bytes from disk
2018-02-12 15:05:59,172 INFO [pool-7-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(806)) - Merging 0 segments, 0 bytes from memory into reduce
2018-02-12 15:05:59,173 INFO [pool-7-thread-1] mapred.Merger (Merger.java:merge(606)) - Merging 1 sorted segments
2018-02-12 15:05:59,174 INFO [pool-7-thread-1] mapred.Merger (Merger.java:merge(705)) - Down to the last merge-pass, with 1 segments left of total size: 10 bytes
2018-02-12 15:05:59,174 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(620)) - 1 / 1 copied.
2018-02-12 15:05:59,197 INFO [pool-7-thread-1] mapred.Task (Task.java:done(1099)) - Task:attempt_local1432978983_0001_r_000003_0 is done. And is in the process of committing
2018-02-12 15:05:59,200 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(620)) - 1 / 1 copied.
2018-02-12 15:05:59,200 INFO [pool-7-thread-1] mapred.Task (Task.java:commit(1260)) - Task attempt_local1432978983_0001_r_000003_0 is allowed to commit now
2018-02-12 15:05:59,207 INFO [pool-7-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:commitTask(582)) - Saved output of task 'attempt_local1432978983_0001_r_000003_0' to hdfs://192.168.19.128:8020/output/fruits/_temporary/0/task_local1432978983_0001_r_000003
2018-02-12 15:05:59,208 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(620)) - reduce > reduce
2018-02-12 15:05:59,208 INFO [pool-7-thread-1] mapred.Task (Task.java:sendDone(1219)) - Task 'attempt_local1432978983_0001_r_000003_0' done.
2018-02-12 15:05:59,208 INFO [pool-7-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(352)) - Finishing task: attempt_local1432978983_0001_r_000003_0
2018-02-12 15:05:59,209 INFO [Thread-4] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(485)) - reduce task executor complete.
2018-02-12 15:05:59,514 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1658)) - Job job_local1432978983_0001 completed successfully
2018-02-12 15:05:59,539 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1665)) - Counters: 35
File System Counters
FILE: Number of bytes read=3213
FILE: Number of bytes written=2389922
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=345
HDFS: Number of bytes written=101
HDFS: Number of read operations=60
HDFS: Number of large read operations=0
HDFS: Number of write operations=30
Map-Reduce Framework
Map input records=7
Map output records=7
Map output bytes=99
Map output materialized bytes=137
Input split bytes=113
Combine input records=0
Combine output records=0
Reduce input groups=4
Reduce shuffle bytes=137
Reduce input records=7
Reduce output records=4
Spilled Records=14
Shuffled Maps =4
Failed Shuffles=0
Merged Map outputs=4
GC time elapsed (ms)=8
Total committed heap usage (bytes)=1192755200
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=69
File Output Format Counters
Bytes Written=40
Process finished with exit code 0
细心的朋友会发现,这样运行其实是在本地运行的,并没有提交到集群上去执行。
任务名称都会带有 local 的字眼。
设置提交job 到 集群
需要将写好的代码打包一个jar
在 pom.xml 内 <plugins> 再添加
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.5.5</version>
<configuration>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<classpathPrefix>lib/</classpathPrefix>
<mainClass>hk.uic.hadoop.testweb.PartitionerApp</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
注意修改 自己的 mainClass
鼠标右键项目,选择 Open Module Settings 或者快键键 F4
选择 From modules with dependencies...
菜单栏 Build–>Build Artifacts
会生成 jar 包
添加这些配置到 main 函数内
configuration.set("mapreduce.app-submission.cross-platform", "true");
configuration.set("yarn.resourcemanager.hostname", "192.168.19.128");
configuration.set("mapreduce.framework.name", "yarn");
configuration.set("yarn.resourcemanager.address", "192.168.19.128:8032");
configuration.set("mapreduce.job.jar","C:\\Work\\workshop\\Hadoop\\HadoopWebTest\\out\\artifacts\\HadoopWebTest_jar\\HadoopWebTest.jar");
完整 PartitionerApp.java
System.setProperty("hadoop.home.dir", "C:\\Work\\hadoop-2.9.0");
//懒得在IDEA配置 args 参数
args = new String[2];
args[0] = "hdfs://192.168.19.128:8020/springhdfs/fruits.txt";
args[1] = "hdfs://192.168.19.128:8020/output/fruits";
// 创建 Configuration
Configuration configuration = new Configuration();
configuration.set("fs.defaultFS", "hdfs://192.168.19.128:8020");
//C:\Work\workshop\Hadoop\HadoopWebTest\out\artifacts\HadoopWebTest_jar
configuration.set("mapreduce.app-submission.cross-platform", "true");
configuration.set("yarn.resourcemanager.hostname", "192.168.19.128");
configuration.set("mapreduce.framework.name", "yarn");
configuration.set("yarn.resourcemanager.address", "192.168.19.128:8032");
configuration.set("mapreduce.job.jar","C:\\Work\\workshop\\Hadoop\\HadoopWebTest\\out\\artifacts\\HadoopWebTest_jar\\HadoopWebTest.jar");
// 清除已存在的文件目录
Path outputPath = new Path(args[1]);
FileSystem fileSystem = FileSystem.get(configuration);
if (fileSystem.exists(outputPath)) {
fileSystem.delete(outputPath, true);
System.out.println("outputPath: " + args[1] + " exists, but has been deleted.");
}
// 创建 Job
Job job = Job.getInstance(configuration, "wordcount");
// 设置Job 的处理类
job.setJarByClass(PartitionerApp.class);
// 设置作业处理的输入路径, 通过参数获得
FileInputFormat.setInputPaths(job, new Path(args[0]));
// 设置map 相关参数
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
// 设置reduce 相关参数
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
// 设置job的Partitioner
job.setPartitionerClass(MyPartitioner.class);
// 设置4个reducer, 每个类别一个
job.setNumReduceTasks(4);
//设置作业处理的输出路径
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
测试运行。
到此,本地开发的 MR job 已经成功从本地提交到 集群上。