educoder平台MapReduce基础实战

整理文档很辛苦,赏杯茶钱您下走!

免费阅读已结束,点击下载阅读编辑剩下 ...

阅读已结束,您可以下载文档离线阅读编辑

资源描述

MapReduce第1关:成绩统计过关代码:importjava.io.IOException;importjava.util.StringTokenizer;importjava.io.IOException;importjava.util.StringTokenizer;importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.fs.Path;importorg.apache.hadoop.io.*;importorg.apache.hadoop.io.Text;importorg.apache.hadoop.mapreduce.Job;importorg.apache.hadoop.mapreduce.Mapper;importorg.apache.hadoop.mapreduce.Reducer;importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;importorg.apache.hadoop.util.GenericOptionsParser;publicclassWordCount{/**********Begin**********/publicstaticclassTokenizerMapperextendsMapperLongWritable,Text,Text,IntWritable{privatefinalstaticIntWritableone=newIntWritable(1);privateTextword=newText();privateintmaxValue=0;publicvoidmap(LongWritablekey,Textvalue,Contextcontext)throwsIOException,InterruptedException{StringTokenizeritr=newStringTokenizer(value.toString(),\n);while(itr.hasMoreTokens()){String[]str=itr.nextToken().split();Stringname=str[0];one.set(Integer.parseInt(str[1]));word.set(name);context.write(word,one);}//context.write(word,one);}}publicstaticclassIntSumReducerextendsReducerText,IntWritable,Text,IntWritable{privateIntWritableresult=newIntWritable();publicvoidreduce(Textkey,IterableIntWritablevalues,Contextcontext)throwsIOException,InterruptedException{intmaxAge=0;intage=0;for(IntWritableintWritable:values){maxAge=Math.max(maxAge,intWritable.get());}result.set(maxAge);context.write(key,result);}}publicstaticvoidmain(String[]args)throwsException{Configurationconf=newConfiguration();Jobjob=newJob(conf,wordcount);job.setJarByClass(WordCount.class);job.setMapperClass(TokenizerMapper.class);job.setCombinerClass(IntSumReducer.class);job.setReducerClass(IntSumReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);Stringinputfile=/user/test/input;StringoutputFile=/user/test/output/;FileInputFormat.addInputPath(job,newPath(inputfile));FileOutputFormat.setOutputPath(job,newPath(outputFile));job.waitForCompletion(true);/**********End**********/}}命令行touchfile01echoHelloWorldByeWorldcatfile01echoHelloWorldByeWorldfile01catfile01touchfile02echoHelloHadoopGoodbyeHadoopfile02catfile02start-dfs.shhadoopfs-mkdir/usrhadoopfs-mkdir/usr/inputhadoopfs-ls/usr/outputhadoopfs-ls/hadoopfs-ls/usrhadoopfs-putfile01/usr/inputhadoopfs-putfile02/usr/inputhadoopfs-ls/usr/input测评——————————————————————————————————MapReduce第2关:文件内容合并去重代码importjava.io.IOException;importjava.util.*;importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.fs.Path;importorg.apache.hadoop.io.*;importorg.apache.hadoop.mapreduce.Job;importorg.apache.hadoop.mapreduce.Mapper;importorg.apache.hadoop.mapreduce.Reducer;importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;importorg.apache.hadoop.util.GenericOptionsParser;publicclassMerge{/***@paramargs*对A,B两个文件进行合并,并剔除其中重复的内容,得到一个新的输出文件C*///在这重载map函数,直接将输入中的value复制到输出数据的key上注意在map方法中要抛出异常:throwsIOException,InterruptedException/**********Begin**********/publicstaticclassMapextendsMapperLongWritable,Text,Text,Text{protectedvoidmap(LongWritablekey,Textvalue,MapperLongWritable,Text,Text,Text.Contextcontext)throwsIOException,InterruptedException{Stringstr=value.toString();String[]data=str.split();Textt1=newText(data[0]);Textt2=newText(data[1]);context.write(t1,t2);}}/**********End**********///在这重载reduce函数,直接将输入中的key复制到输出数据的key上注意在reduce方法上要抛出异常:throwsIOException,InterruptedException/**********Begin**********/publicstaticclassReduceextendsReducerText,Text,Text,Text{protectedvoidreduce(Textkey,IterableTextvalues,ReducerText,Text,Text,Text.Contextcontext)throwsIOException,InterruptedException{ListStringlist=newArrayList();for(Texttext:values){Stringstr=text.toString();if(!list.contains(str)){list.add(str);}}Collections.sort(list);for(Stringtext:list){context.write(key,newText(text));}}/**********End**********/}publicstaticvoidmain(String[]args)throwsException{Configurationconf=newConfiguration();Jobjob=newJob(conf,wordcount);job.setJarByClass(Merge.class);job.setMapperClass(Map.class);job.setCombinerClass(Reduce.class);job.setReducerClass(Reduce.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);StringinputPath=/user/tmp/input/;//在这里设置输入路径StringoutputPath=/user/tmp/output/;//在这里设置输出路径FileInputFormat.addInputPath(job,newPath(inputPath));FileOutputFormat.setOutputPath(job,newPath(outputPath));System.exit(job.waitForCompletion(true)?0:1);}}测评———————————————————————————————————————MapReduce第3关:信息挖掘-挖掘父子关系代码importjava.io.IOException;importjava.util.*;importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.fs.Path;importorg.apache.hadoop.io.IntWritable;importorg.apache.hadoop.io.Text;importorg.apache.hadoop.mapreduce.Job;importorg.apache.hadoop.mapreduce.Mapper;importorg.apache.hadoop.mapreduce.Reducer;importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;importorg.apache.hadoo

1 / 7
下载文档,编辑使用

©2015-2020 m.777doc.com 三七文档.

备案号:鲁ICP备2024069028号-1 客服联系 QQ:2149211541

×
保存成功