本篇文章为大家展示了Hadoop中怎么自定义输出排序,内容简明扼要并且容易理解,绝对能使你眼前一亮,通过这篇文章的详细介绍希望你能有所收获。
package com.hgs; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class AvgValue { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { if(args.length!=2) { System.err.println("Usage: MaxTemperature <input path> <output path>"); System.exit(1); } Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "avg of grades"); job.setJarByClass(AvgValue.class); job.setMapperClass(InputClass.class); job.setReducerClass(OutputClass.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true)?0:1); } } class InputClass extends Mapper<LongWritable, Text, Text, DoubleWritable>{ @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, DoubleWritable>.Context context) throws IOException, InterruptedException { String line = value.toString(); if(line.length()>0){ String[] array = line.split("\t"); if(array.length==2){ String name=array[0]; int grade = Integer.parseInt(array[1]); context.write(new Text(name), new DoubleWritable(grade)); } } } } class OutputClass extends Reducer<Text, DoubleWritable, NameKey, DoubleWritable>{ @Override protected void reduce(Text text, Iterable<DoubleWritable> iterable, Reducer<Text, DoubleWritable, NameKey, DoubleWritable>.Context context) throws IOException, InterruptedException { int sum = 0; int cnt= 0 ; for(DoubleWritable iw : iterable) { sum+=iw.get(); cnt++; } NameKey nk = new NameKey(text,new DoubleWritable(sum/cnt)); context.write(nk, new DoubleWritable(sum/cnt)); } } //该处通过将输出记过封装为一个bean并且实现WritableComparable类,重写compareTo,来实现对自定义排序 class NameKey implements WritableComparable<NameKey>{ private Text name ; private DoubleWritable grade ; public NameKey(Text name,DoubleWritable grade) { this.name = name; this.grade = grade; } public Text getName() { return name; } public void setName(Text name) { this.name = name; } public DoubleWritable getGrade() { return grade; } public void setGrade(DoubleWritable grade) { this.grade = grade; } @Override public void write(DataOutput out) throws IOException { name.write(out); grade.write(out); } @Override public void readFields(DataInput in) throws IOException { name.readFields(in); grade.readFields(in); } @Override public String toString() { return name.toString(); } @Override public int compareTo(NameKey o) { double me = grade.get(); double other = o.getGrade().get(); int slid = (int)(me-other); return slid; } } //class Maxreducer extends Reducer
上述内容就是Hadoop中怎么自定义输出排序,你们学到知识或技能了吗?如果还想学到更多技能或者丰富自己的知识储备,欢迎关注亿速云行业资讯频道。
免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。