MapReduce_partition

  1 package MapReduce;
  2 
  3 import java.net.URI;
  4 
  5 import org.apache.hadoop.conf.Configuration;
  6 import org.apache.hadoop.fs.FileSystem;
  7 import org.apache.hadoop.fs.Path;  
  8 import org.apache.hadoop.io.LongWritable;  
  9 import org.apache.hadoop.io.NullWritable;  
 10 import org.apache.hadoop.io.Text;  
 11 import org.apache.hadoop.mapreduce.Job;  
 12 import org.apache.hadoop.mapreduce.Mapper;  
 13 import org.apache.hadoop.mapreduce.Partitioner;  
 14 import org.apache.hadoop.mapreduce.Reducer;  
 15 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  
 16 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  
 17 import org.apache.hadoop.util.GenericOptionsParser;
 18  
 19   
 20 public class MyPartitioner {  
 21     private static final String INPUT_PATH = "hdfs://h201:9000/user/hadoop/input_par";
 22     private static final String OUTPUT_PATH = "hdfs://h201:9000/user/hadoop/output";
 23       
 24     public static class MyPartitionerMap extends Mapper<LongWritable, Text, Text, Text> {  
 25           
 26         protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)  
 27         throws java.io.IOException, InterruptedException {  
 28               
 29             String arr_value[] = value.toString().split("\t");  
 30             if (arr_value.length > 3) {  
 31                 context.write(new Text("long"), value);  
 32             } else if (arr_value.length < 3) {  
 33                 context.write(new Text("short"), value);  
 34             } else {  
 35                 context.write(new Text("right"), value);  
 36             }  
 37         }  
 38     }  
 39   
 40     /** 
 41     * partitioner的输入就是map的输出 
 42     *  
 43     * @author Administrator 
 44     */  
 45     public static class MyPartitionerPar extends Partitioner<Text, Text> {  
 46       
 47         @Override  
 48         public int getPartition(Text key, Text value, int numPartitions) {  
 49             int result = 0;  
 50             /*********************************************************************/  
 51             /***key.toString().equals("long")  must use toString()!!!!  ***/  
 52             /***开始的时候我没有用 ,导致都在一个区里,结果也在一个reduce输出文件中。  ***/  
 53             /********************************************************************/  
 54             if (key.toString().equals("long")) {  
 55                 result = 0 % numPartitions;  
 56             } else if (key.toString().equals("short")) {  
 57                 result = 1 % numPartitions;  
 58             } else if (key.toString().equals("right")) {  
 59                 result = 2 % numPartitions;  
 60             }  
 61             return result;  
 62         }  
 63     }  
 64   
 65     public static class MyPartitionerReduce extends Reducer<Text, Text, Text, Text> {  
 66         protected void reduce(Text key, java.lang.Iterable<Text> value, Context context) throws java.io.IOException,  
 67         InterruptedException {  
 68             for (Text val : value) {  
 69                 context.write(key, val);  
 70                 //context.write(key, val);  
 71             }  
 72         }  
 73     }  
 74   
 75     public static void main(String[] args) throws Exception {  
 76         Configuration conf = new Configuration();
 77         /*
 78         String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();  
 79         if (otherArgs.length != 2) {  
 80             System.err.println("Usage: MyPartitioner <in> <out>");  
 81             System.exit(2);  
 82         }
 83         */
 84         conf.set("mapred.jar","mp1.jar");
 85         final FileSystem fileSystem = FileSystem.get(new URI(OUTPUT_PATH), conf);
 86         fileSystem.delete(new Path(OUTPUT_PATH), true);
 87         Job job = new Job(conf, "MyPartitioner");  
 88         job.setNumReduceTasks(3);  
 89           
 90         job.setJarByClass(MyPartitioner.class);  
 91           
 92         job.setMapperClass(MyPartitionerMap.class);  
 93         job.setCombinerClass(MyPartitionerReduce.class);
 94         
 95         job.setMapOutputKeyClass(Text.class);  
 96         job.setMapOutputValueClass(Text.class);  
 97           
 98         job.setPartitionerClass(MyPartitionerPar.class);  
 99         job.setReducerClass(MyPartitionerReduce.class);  
100           
101         job.setOutputKeyClass(Text.class);  
102         job.setOutputValueClass(Text.class);  
103           
104         FileInputFormat.setInputPaths(job, INPUT_PATH);  
105         FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));  
106         System.exit(job.waitForCompletion(true) ? 0 : 1);  
107     }  
108 }  

vim partititon

aa      1       2
bb      2       22
cc      11
dd      1
ee      99      99      999
ff      12      23      123

[hadoop@h201 ~]$ hadoop fs -cat /user/hadoop/output/part-r-00001
18/06/10 17:55:02 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform… using builtin-java classes where applicable
short   dd      1
short   cc      11
[hadoop@h201 ~]$ hadoop fs -cat /user/hadoop/output/part-r-00000
18/06/10 17:55:16 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform… using builtin-java classes where applicable
long    ff      12      23      123
long    ee      99      99      999
[hadoop@h201 ~]$ hadoop fs -cat /user/hadoop/output/part-r-00002
18/06/10 18:01:37 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform… using builtin-java classes where applicable
right   bb      2       22
right   aa      1       2

    原文作者:MapReduce
    原文地址: https://www.cnblogs.com/jieran/p/9163876.html
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞