接着上一篇博客,我们对其数据进行全局排序和自定义分区
1.mapper类
package com.tiger.FlowSortMapper;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import com.tiger.FlowBean.FlowBean;
/**
* @author tiger
*@version1.0
*/
public class FlowSortMapper extends Mapper<LongWritable, Text, FlowBean, Text>{
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String string = value.toString();
String[] split = string.split("\t");
long upFlow=Long.parseLong(split[1]);
long dwFlow=Long.parseLong(split[2]);
context.write(new FlowBean(upFlow, dwFlow), new Text(split[0]));
}
}
2.flowbean
package com.tiger.FlowBean;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class FlowBean implements WritableComparable<FlowBean>{
private long upFlow;
private long dwFlow;
private long sum;
public FlowBean() {}
public FlowBean(long upFlow, long dwFlow) {
this.upFlow = upFlow;
this.dwFlow = dwFlow;
sum=upFlow+dwFlow;
}
public long getUpFlow() {
return upFlow;
}
public void setUpFlow(long upFlow) {
this.upFlow = upFlow;
}
public long getDwFlow() {
return dwFlow;
}
public void setDwFlow(long dwFlow) {
this.dwFlow = dwFlow;
}
public long getSum() {
return sum;
}
public void setSum(long sum) {
this.sum = sum;
}
@Override//序列化
public void write(DataOutput out) throws IOException {
out.writeLong(upFlow);
out.writeLong(dwFlow);
out.writeLong(sum);
}
@Override//反序列化
public void readFields(DataInput in) throws IOException {
upFlow=in.readLong();
dwFlow=in.readLong();
sum=in.readLong();
}
@Override
public String toString() {
return upFlow +"\t"+dwFlow+"\t"+sum;
}
@Override//排序
public int compareTo(FlowBean o) {
return this.sum>o.getSum()?1:-1;
}
}
3.reducer
package com.tiger.FlowSortReducer;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import com.tiger.FlowBean.FlowBean;
public class FlowSortReducer extends Reducer<FlowBean, Text, Text, FlowBean>{
@Override
protected void reduce(FlowBean key, Iterable<Text> value, Context context)
throws IOException, InterruptedException {
context.write(value.iterator().next(), key);
}
}
4.partitioner
package FlowSortPartitioner;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
import com.tiger.FlowBean.FlowBean;
public class FlowSortPartitioner extends Partitioner<FlowBean, Text> {
@Override
public int getPartition(FlowBean key, Text value, int numPartitions) {
String substring = value.toString().substring(0, 3);
int partitioner=4;
if("135".equals(substring))
return 0;
else if ("136".equals(substring)) {
return 1;
}
else if ("137".equals(substring)) {
return 2;
}
else if ("138".equals(substring)) {
return 3;
}
return partitioner;
}
}
5.diver
package com.tiger.FlowSortDriver;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import com.tiger.FlowBean.FlowBean;
import com.tiger.FlowSortMapper.FlowSortMapper;
import com.tiger.FlowSortReducer.FlowSortReducer;
import FlowSortPartitioner.FlowSortPartitioner;
public class FlowSortDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration configuration=new Configuration();
Job job=Job.getInstance(configuration);
job.setJarByClass(FlowSortDriver.class);
job.setMapperClass(FlowSortMapper.class);
job.setReducerClass(FlowSortReducer.class);
job.setMapOutputKeyClass(FlowBean.class);
job.setMapOutputValueClass(Text.class);
job.setOutputValueClass(Text.class);
job.setOutputValueClass(FlowBean.class);
job.setPartitionerClass(FlowSortPartitioner.class); //导入分区类
job.setNumReduceTasks(5); //设置reducer task的数量(大于分区数)
FileInputFormat.setInputPaths(job, new Path("f://fc//out"));
FileOutputFormat.setOutputPath(job, new Path("f://fc/out1"));
boolean waitForCompletion = job.waitForCompletion(true);
System.out.println(waitForCompletion);
}
}
6.排序结果
13480253104 120 1320 1440
13502468823 735 11349 12084
13510439658 1116 954 2070
13560436326 1136 94 1230
13560436666 1136 94 1230
13560439658 918 4938 5856
13602846565 198 910 1108
13660577991 660 690 1350
13719199419 240 0 240
13726130503 299 681 980
13726238888 2481 24681 27162
13760778710 120 120 240
13822544101 264 0 264
13884138413 4116 1432 5548
13922314466 3008 3720 6728
13925057413 11058 4243 15301
13926251106 240 0 240
13926435656 132 1512 1644
15013685858 369 338 707
15889002119 938 380 1318
15920133257 316 296 612
18212575961 1527 2106 3633
18320173382 9531 212 9743
7.排序加分区的结果
分区一:
13560436666 1136 94 1230
13560436326 1136 94 1230
13510439658 1116 954 2070
13560439658 918 4938 5856
13502468823 735 11349 12084
分区二:
13602846565 198 910 1108
13660577991 660 690 1350
分区三:
13760778710 120 120 240
13719199419 240 0 240
13726130503 299 681 980
13726238888 2481 24681 27162
分区四:
13822544101 264 0 264
13884138413 4116 1432 5548
分区五:
13926251106 240 0 240
15920133257 316 296 612
15013685858 369 338 707
15889002119 938 380 1318
13480253104 120 1320 1440
13926435656 132 1512 1644
18212575961 1527 2106 3633
13922314466 3008 3720 6728
18320173382 9531 212 9743
13925057413 11058 4243 15301