MR的入门案例
要求
统计文件的数据并且排序去重
a.txt
12 123 34 1 5 345 23
b.txt
34 12345 34 1 3 5 57 4
c.txt
12 23 45 12 56 89 77 57
MyDriver
package com.qf.test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class MyDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//加载参数
Configuration conf = new Configuration();
//创建作业对象
Job job = Job.getInstance(conf,"com.qf.test");
//设置mapper类型
job.setMapperClass(MyMapper.class);
//设置reduce对象
job.setReducerClass(MyReduce.class);
//设置输出key数据的类型
job.setOutputKeyClass(LongWritable.class);
//设置输出values数据类型
job.setOutputValueClass(LongWritable.class);
//设置FileInputFormat要切分的对象
FileInputFormat.setInputPaths(job,new Path("D:\\test数据\\"));
//设置输出路径
FileOutputFormat.setOutputPath(job,new Path("D:/ot449"));
job.waitForCompletion(true);
}
}
MyReduce
package com.qf.test;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class MyReduce extends Reducer<LongWritable,LongWritable,LongWritable,LongWritable> {
@Override
protected void reduce(LongWritable key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
context.write(key,new LongWritable());
}
}
MyMapper
package com.qf.test;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
public class MyMapper extends Mapper<LongWritable, Text,LongWritable,LongWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] str = value.toString().split("\t");
Map<String,Integer> map = new HashMap();
for (String str2 : str){
map.put(str2,1);
}
long num;
for (String str3 : map.keySet()){
num = Long.parseLong(str3);
context.write(new LongWritable(num),new LongWritable(1));
}
}
}