test1 test2 test2 test3 test3 test3 test4 test4 test4 test4 test5 test5 test5 test5 test5自定义Driver类(WordCountDriver)
将 MapReduce中词频统计简单实现 中WordCountDriver改为如下代码,其他代码不变
package com.test.mapreduce.combiner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class WordCountDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
// 1.创建配置信息Configuration对象并获取Job单例对象
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
// 2.设置关联本Driver程序的jar
job.setJarByClass(WordCountDriver.class);
// 3.设置关联Mapper和Reducer的jar
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
// 4.设置Mapper输出的kv类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// 5.设置最终输出的kv类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// 6.设置combiner(由于自定义combiner代码与Reduce相同,可以直接使用自定义Reduce替代)
job.setCombinerClass(WordCountReducer.class);
// 7.设置输入和输出路径
FileInputFormat.setInputPaths(job, new Path("D:\input"));
FileOutputFormat.setOutputPath(job, new Path("D:\output"));
// 8.提交job
boolean result = job.waitForCompletion(true);
System.exit(result ? 0 : 1);
}
}
运行结果
使用combiner后,输入15个数据,分为了5组:



