hadoop2x WordCount MapReduce怎么用

发布时间：2021-12-09 09:25:44 阅读：203 作者：小新栏目：云计算

开发者测试专用服务器限时活动，0元免费领，库存有限，领完即止！点击查看>>

这篇文章主要介绍了hadoop2x WordCount MapReduce怎么用，具有一定借鉴价值，感兴趣的朋友可以参考下，希望大家阅读完这篇文章之后大有收获，下面让小编带着大家一起了解一下。

package com.jhl.haoop.examples;

import java.io.IOException;

import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.GenericOptionsParser;

public class WordCount {

// map区域

public static class TokenizerMapper extends

Mapper<LongWritable, Text, Text, IntWritable> {

private final static IntWritable one = new IntWritable(1);//每个单词统计一次

private Text word = new Text();

public void map(LongWritable key, Text value, Context context)

throws IOException, InterruptedException {

//进行分割 [空格制表符 \t 换行 \n 回车符\r \f]

// public StringTokenizer(String str) {

//this(str, " \t\n\r\f", false);

// }

StringTokenizer itr = new StringTokenizer(value.toString());//获取每行数据的值value.toString()

while (itr.hasMoreTokens()) {

word.set(itr.nextToken());//设置map输出的key值

context.write(word, one);//上下文输出map的key和value值

}

hadoop2x WordCount MapReduce怎么用

//reduce 区域

public static class IntSumReducer extends

Reducer<Text, IntWritable, Text, IntWritable> {

private IntWritable result = new IntWritable();

public void reduce(Text key, Iterable<IntWritable> values,

Context context) throws IOException, InterruptedException {

int sum = 0;

for (IntWritable val : values) {//循环遍历Iterable

sum += val.get();//累加

}

result.set(sum);//设置总次数

context.write(key, result);

}

hadoop2x WordCount MapReduce怎么用

//client区域

public static void main(String[] args) throws Exception {

Configuration conf = new Configuration();//获取配置信息

//GenericOptionsParser 用来常用的Hadoop命令选项，并根据需要，为Configuration对象设置相应的取值。

String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

if (otherArgs.length != 2) {

System.err.println("Usage: wordcount ");

System.exit(2);

}

Job job = new Job(conf, "WordCount");//创建Job、设置Job配置和名称

job.setJarByClass(WordCount.class);//设置Job 运行的类

job.setMapperClass(TokenizerMapper.class);//设置Mapper类和Reducer类

job.setCombinerClass(IntSumReducer.class);

job.setReducerClass(IntSumReducer.class);

FileInputFormat.addInputPath(job, new Path(otherArgs[0]));//设置输入文件的路径和输出文件的路径

FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

job.setOutputKeyClass(Text.class);//设置输出结果的key和value类型

job.setOutputValueClass(IntWritable.class);

boolean isSuccess = job.waitForCompletion(true);//提交Job,等待运行结果，并在客户端显示运行信息

System.exit(isSuccess ? 0 : 1);//结束程序

}

感谢你能够认真阅读完这篇文章，希望小编分享的“hadoop2x WordCount MapReduce怎么用”这篇文章对大家有帮助，同时也希望大家多多支持亿速云，关注亿速云行业资讯频道，更多相关知识等着你来学习!

亿速云「云服务器」，即开即用、新一代英特尔至强铂金CPU、三副本存储NVMe SSD云盘，价格低至29元/月。点击查看>>

向AI问一下细节

hadoop2x WordCount MapReduce怎么用

猜你喜欢

最新资讯

相关推荐

开发者交流群：

相关标签