背景:根据业务需要需要把60张主子表批量入库到hive表。
创建测试数据:
1 def createBatchTestFile(): Unit = { 2 for (layer <- 0 to 59) { 3 val sWriter = new PrintWriter(new File("D:\\server_" + layer + ".txt")) 4 val nWriter = new PrintWriter(new File("D:\\neighour_" + layer + ".txt")) 5 6 sWriter.write(layer + "," + "server_" + layer) 7 nWriter.write(layer + "," + "neighour_" + layer) 8 9 sWriter.close() 10 nWriter.close() 11 } 12 }
上传数据到服务器hdfs:
1 [hadoop@vd01 fglib]$ ls 2 neighour_0.txt neighour_20.txt neighour_31.txt neighour_42.txt neighour_53.txt server_0.txt server_20.txt server_31.txt server_42.txt server_53.txt 3 neighour_10.txt neighour_21.txt neighour_32.txt neighour_43.txt neighour_54.txt server_10.txt server_21.txt server_32.txt server_43.txt server_54.txt 4 neighour_11.txt neighour_22.txt neighour_33.txt neighour_44.txt neighour_55.txt server_11.txt server_22.txt server_33.txt server_44.txt server_55.txt 5 neighour_12.txt neighour_23.txt neighour_34.txt neighour_45.txt neighour_56.txt server_12.txt server_23.txt server_34.txt server_45.txt server_56.txt 6 neighour_13.txt neighour_24.txt neighour_35.txt neighour_46.txt neighour_57.txt server_13.txt server_24.txt server_35.txt server_46.txt server_57.txt 7 neighour_14.txt neighour_25.txt neighour_36.txt neighour_47.txt neighour_58.txt server_14.txt server_25.txt server_36.txt server_47.txt server_58.txt 8 neighour_15.txt neighour_26.txt neighour_37.txt neighour_48.txt neighour_59.txt server_15.txt server_26.txt server_37.txt server_48.txt server_59.txt 9 neighour_16.txt neighour_27.txt neighour_38.txt neighour_49.txt neighour_5.txt server_16.txt server_27.txt server_38.txt server_49.txt server_5.txt 10 neighour_17.txt neighour_28.txt neighour_39.txt neighour_4.txt neighour_6.txt server_17.txt server_28.txt server_39.txt server_4.txt server_6.txt 11 neighour_18.txt neighour_29.txt neighour_3.txt neighour_50.txt neighour_7.txt server_18.txt server_29.txt server_3.txt server_50.txt server_7.txt 12 neighour_19.txt neighour_2.txt neighour_40.txt neighour_51.txt neighour_8.txt server_19.txt server_2.txt server_40.txt server_51.txt server_8.txt 13 neighour_1.txt neighour_30.txt neighour_41.txt neighour_52.txt neighour_9.txt server_1.txt server_30.txt server_41.txt server_52.txt server_9.txt 14 [hadoop@vd01 fglib]$ hadoop fs -mkdir /user/hive_user/dang/fglib 15 17/09/07 13:24:14 INFO hdfs.PeerCache: SocketCache disabled. 16 [hadoop@vd01 fglib]$ hadoop fs -copyFromLocal * /user/hive_user/dang/fglib/ 17 17/09/07 13:24:39 INFO hdfs.PeerCache: SocketCache disabled. 18 [hadoop@vd01 fglib]$ 19 20 [hadoop@vd01 fglib]$ hadoop fs -ls /user/hive_user/dang/fglib 21 17/09/07 13:25:51 INFO hdfs.PeerCache: SocketCache disabled. 22 Found 120 items 23 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_0.txt 24 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_1.txt 25 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_10.txt 26 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_11.txt 27 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_12.txt 28 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_13.txt 29 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_14.txt 30 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_15.txt 31 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_16.txt 32 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_17.txt 33 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_18.txt 34 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_19.txt 35 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_2.txt 36 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_20.txt 37 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_21.txt 38 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_22.txt 39 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_23.txt 40 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_24.txt 41 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_25.txt 42 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_26.txt 43 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_27.txt 44 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_28.txt 45 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_29.txt 46 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_3.txt 47 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_30.txt 48 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_31.txt 49 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_32.txt 50 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_33.txt 51 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_34.txt 52 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_35.txt 53 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_36.txt 54 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_37.txt 55 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_38.txt 56 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_39.txt 57 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_4.txt 58 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_40.txt 59 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_41.txt 60 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_42.txt 61 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_43.txt 62 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_44.txt 63 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_45.txt 64 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_46.txt 65 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_47.txt 66 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_48.txt 67 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_49.txt 68 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_5.txt 69 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_50.txt 70 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_51.txt 71 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_52.txt 72 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_53.txt 73 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_54.txt 74 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_55.txt 75 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_56.txt 76 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_57.txt 77 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_58.txt 78 -rw-r--r--+ 3 hive_user hadoop 14 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_59.txt 79 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_6.txt 80 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_7.txt 81 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_8.txt 82 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/neighour_9.txt 83 -rw-r--r--+ 3 hive_user hadoop 10 2017-09-07 13:24 /user/hive_user/dang/fglib/server_0.txt 84 -rw-r--r--+ 3 hive_user hadoop 10 2017-09-07 13:24 /user/hive_user/dang/fglib/server_1.txt 85 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_10.txt 86 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_11.txt 87 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_12.txt 88 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_13.txt 89 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_14.txt 90 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_15.txt 91 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_16.txt 92 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_17.txt 93 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_18.txt 94 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_19.txt 95 -rw-r--r--+ 3 hive_user hadoop 10 2017-09-07 13:24 /user/hive_user/dang/fglib/server_2.txt 96 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_20.txt 97 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_21.txt 98 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_22.txt 99 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_23.txt 100 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_24.txt 101 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_25.txt 102 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_26.txt 103 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_27.txt 104 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_28.txt 105 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_29.txt 106 -rw-r--r--+ 3 hive_user hadoop 10 2017-09-07 13:25 /user/hive_user/dang/fglib/server_3.txt 107 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_30.txt 108 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_31.txt 109 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_32.txt 110 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:24 /user/hive_user/dang/fglib/server_33.txt 111 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_34.txt 112 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_35.txt 113 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_36.txt 114 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_37.txt 115 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_38.txt 116 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_39.txt 117 -rw-r--r--+ 3 hive_user hadoop 10 2017-09-07 13:25 /user/hive_user/dang/fglib/server_4.txt 118 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_40.txt 119 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_41.txt 120 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_42.txt 121 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_43.txt 122 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_44.txt 123 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_45.txt 124 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_46.txt 125 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_47.txt 126 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_48.txt 127 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_49.txt 128 -rw-r--r--+ 3 hive_user hadoop 10 2017-09-07 13:25 /user/hive_user/dang/fglib/server_5.txt 129 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_50.txt 130 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_51.txt 131 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_52.txt 132 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_53.txt 133 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_54.txt 134 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_55.txt 135 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_56.txt 136 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_57.txt 137 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_58.txt 138 -rw-r--r--+ 3 hive_user hadoop 12 2017-09-07 13:25 /user/hive_user/dang/fglib/server_59.txt 139 -rw-r--r--+ 3 hive_user hadoop 10 2017-09-07 13:25 /user/hive_user/dang/fglib/server_6.txt 140 -rw-r--r--+ 3 hive_user hadoop 10 2017-09-07 13:25 /user/hive_user/dang/fglib/server_7.txt 141 -rw-r--r--+ 3 hive_user hadoop 10 2017-09-07 13:25 /user/hive_user/dang/fglib/server_8.txt 142 -rw-r--r--+ 3 hive_user hadoop 10 2017-09-07 13:25 /user/hive_user/dang/fglib/server_9.txt
实现批量创建表及批量入库:
1 val conf = new SparkConf().setAppName("My_Test") 2 //.setMaster("local[1]").setMaster("spark://xx.xx.xx.xx:7077").setJars(List("xxx.jar")).set("spark.executor.memory", "10g") 3 val sc = new SparkContext(conf) 4 val hiveContext = new HiveContext(sc) 5 // use my_hive_db; 6 hiveContext.sql("use my_hive_db") 7 8 import hiveContext.implicits._ 9 10 hiveContext.setConf("mapred.max.split.size", "256000000") 11 hiveContext.setConf("mapred.min.split.size.per.node", "100000000") 12 hiveContext.setConf("mapred.min.split.size.per.rack", "100000000") 13 hiveContext.setConf("hive.input.format", "org.apache.hadoop.hive.ql.io.CombineHiveInputFormat") 14 hiveContext.setConf("hive.merge.mapfiles", "true") 15 hiveContext.setConf("hive.merge.mapredfiles", "true") 16 hiveContext.setConf("hive.merge.size.per.task", "256000000") 17 hiveContext.setConf("hive.merge.smallfiles.avgsize", "256000000") 18 hiveContext.setConf("hive.groupby.skewindata", "true") 19 20 // for (layer <- 0 to 59) { 21 // hiveContext.sql("create table if not exists bathServer_" + layer + "(id int,name string) row format delimited fields terminated by ',' stored as textfile") 22 // hiveContext.sql("create table if not exists bathNeighbour_" + layer + "(id int,name string) row format delimited fields terminated by ',' stored as textfile") 23 // hiveContext.sql("load data inpath 'hdfs:/user/userxx/dang/server_" + layer + ".txt' into table bathServer_"+ layer ) 24 // hiveContext.sql("load data inpath 'hdfs:/user/userxx/dang/neighour_" + layer + ".txt' into table bathNeighbour_"+ layer ) 25 // } 26 27 sc.parallelize(0 to 59).collect().foreach((layer)=>{ 28 hiveContext.sql("create table if not exists bathServer_" + layer + "(id int,name string) row format delimited fields terminated by ',' stored as textfile") 29 hiveContext.sql("create table if not exists bathNeighbour_" + layer + "(id int,name string) row format delimited fields terminated by ',' stored as textfile") 30 hiveContext.sql("load data inpath 'hdfs:/user/userxx/dang/fglib/server_" + layer + ".txt' into table bathServer_"+ layer ) 31 hiveContext.sql("load data inpath 'hdfs:/user/userxx/dang/fglib/neighour_" + layer + ".txt' into table bathNeighbour_"+ layer ) 32 }) 33 34 sc.stop() 35 }