package com.a.b.partion_demo;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
public class MyPartioner extends Partitioner {
@Override
public int getPartition(Text text, NullWritable nullWritable, int i) {
String[] strings = text.toString().split("t");
System.out.println(strings[5]);
if(Integer.parseInt(strings[5])>15){
return 1;
}else{
return 0;
}
}
}
package com.a.b.partion_demo;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class PartionMain extends Configured implements Tool {
@Override
public int run(String[] strings) throws Exception {
Job job = Job.getInstance(new Configuration(),"partion_demo");
job.setJarByClass(PartionMain.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextInputFormat.addInputPath(job,new Path("hdfs://node01:8020/partition"));
TextOutputFormat.setOutputPath(job,new Path("hdfs://node01:8020/partition_out"));
job.setMapperClass(PartionMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setReducerClass(PartionReduce.class);
//分区,分区个数
job.setPartitionerClass(MyPartioner.class);
job.setNumReduceTasks(2);
boolean b = job.waitForCompletion(true);
return b?0:1;
}
public static void main(String[] args) throws Exception {
int i = ToolRunner.run(new Configuration(),new PartionMain(),args);
System.exit(i);
}
}