本篇文章给大家分享的是有关mr如何使用hcatalog读写hive表,小编觉得挺实用的,因此分享给大家学习,希望大家阅读完这篇文章后可以有所收获,话不多说,跟着小编一起来看看吧。
public class onTimeMapper extends Mapper { @Override protected void map(WritableComparable key, HCatRecord value, org.apache.hadoop.mapreduce.Mapper.Context context) throws IOException, InterruptedException { // Get table schema HCatSchema schema = HCatBaseInputFormat.getTableSchema(context); Integer year = new Integer(value.getString("year", schema)); Integer month = new Integer(value.getString("month", schema)); Integer DayofMonth = value.getInteger("dayofmonth", schema); context.write(new IntPair(year, month), new IntWritable(DayofMonth)); }}
public class onTimeReducer extends Reducer {public void reduce (IntPair key, Iterable value, Context context) throws IOException, InterruptedException{ int count = 0; // records counter for particular year-month for (IntWritable s:value) { count++; } // define output record schema List columns = new ArrayList(3); columns.add(new HCatFieldSchema("year", HCatFieldSchema.Type.INT, "")); columns.add(new HCatFieldSchema("month", HCatFieldSchema.Type.INT, "")); columns.add(new HCatFieldSchema("flightCount", HCatFieldSchema.Type.INT,"")); HCatSchema schema = new HCatSchema(columns); HCatRecord record = new DefaultHCatRecord(3); record.setInteger("year", schema, key.getFirstInt()); record.set("month", schema, key.getSecondInt()); record.set("flightCount", schema, count); context.write(null, record);}}
public class onTimeDriver extends Configured implements Tool{ private static final Log log = LogFactory.getLog( onTimeDriver.class ); public int run( String[] args ) throws Exception{ Configuration conf = new Configuration(); Job job = new Job(conf, "OnTimeCount"); job.setJarByClass(onTimeDriver.class); job.setMapperClass(onTimeMapper.class); job.setReducerClass(onTimeReducer.class); HCatInputFormat.setInput(job, "airline", "ontimeperf"); job.setInputFormatClass(HCatInputFormat.class); job.setMapOutputKeyClass(IntPair.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DefaultHCatRecord.class); job.setOutputFormatClass(HCatOutputFormat.class); HCatOutputFormat.setOutput(job, OutputJobInfo.create("airline", "flight_count", null)); HCatSchema s = HCatOutputFormat.getTableSchema(job); HCatOutputFormat.setSchema(job, s); return (job.waitForCompletion(true)? 0:1); } public static void main(String[] args) throws Exception{ int exitCode = ToolRunner.run(new onTimeDriver(), args); System.exit(exitCode);}}
create table airline.flight_count(Year INT ,Month INT ,flightCount INT)ROW FORMAT DELIMITED FIELDS TERMINATED BY ','STORED AS TEXTFILE;
以上就是mr如何使用hcatalog读写hive表,小编相信有部分知识点可能是我们日常工作会见到或用到的。希望你能通过这篇文章学到更多知识。更多详情敬请关注亿速云行业资讯频道。
亿速云「云服务器」,即开即用、新一代英特尔至强铂金CPU、三副本存储NVMe SSD云盘,价格低至29元/月。点击查看>>
免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。
原文链接:https://my.oschina.net/u/4590259/blog/4465245