hbase操作的工具类,使用scala写的,主要方法有
1、连接hbase配置
2、获取操作hbase的table对象
3、添加一行数据
4、把RDD入库到hbase中
5、使用精确的rowKey查询单行数据单列值
6、根据rowKey精确查询单行数据的多列值
7、根据subKey模糊批量查询多行数据
8、根据rowKey删除整行
9、删除rowKey的某簇的某列
10、删除rowKey的某个family
11、根据rowKey模糊删除多行数据
需要引入的maven配置
<dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-client</artifactId> <version>1.2.4</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-common</artifactId> <version>1.2.4</version> </dependency>
package com.blog.utils import java.text.SimpleDateFormat import java.util.Date import com.alibaba.fastjson.JSONObject import com.sf.gis.start.JavaUtil import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase.client._ import org.apache.hadoop.hbase.filter.{CompareFilter, RowFilter, SubstringComparator} import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.mapred.TableOutputFormat import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import org.apache.hadoop.mapred.JobConf import org.apache.log4j.Logger import org.apache.spark.rdd.RDD import scala.collection.JavaConversions._ import scala.collection.mutable.ArrayBuffer /** * hbase操作工具类 */ object HbaseUtil { @transient lazy val logger:Logger = Logger.getLogger(this.getClass().getName()) //获取配置文件的值 //val hbaseParent:String = JavaUtil.getString("zookeeper.znode.parent") //val hbaseQuorum:String = JavaUtil.getString("hbase.zookeeper.quorum") //val hbaseClientPort:String = JavaUtil.getString("hbase.zookeeper.property.clientPort") //val tableName:String = JavaUtil.getString("hbase.table") val hbaseParent:String = "cnsz22pl0112,cnsz22pl0113,cnsz22pl0114,cnsz22pl0115" val hbaseQuorum:String = "2181" val hbaseClientPort:String = "/hbase" val tableName:String = "orderTable" /** * 获取hbase配置 * @return */ def getHbaseConf(): Configuration ={ var hbaseConf: Configuration=null try{ //获取hbase的conf hbaseConf = HBaseConfiguration.create() //设置写入的表 hbaseConf.set("zookeeper.znode.parent", hbaseParent) hbaseConf.set("hbase.zookeeper.quorum", hbaseQuorum) hbaseConf.set("hbase.zookeeper.property.clientPort", hbaseClientPort) }catch{ case e:Exception => logger.error(">>>连接hbase失败:,"+e) } hbaseConf } /** * 获得操作类Table * @param tableName * @return */ def getTable(tableName:String): Table ={ var table: Table =null try { val hbaseConf = getHbaseConf() val conn = ConnectionFactory.createConnection(hbaseConf) table = conn.getTable(TableName.valueOf(tableName)) } catch { case e:Exception =>logger.error(">>>获取Table对象失败:"+e) } table } /** * 添加一行数据 * @param table 操作表的对象 * @param rowKey key * @param family 簇 * @param column 列 * @param value 需要添加的数据内容 */ def addRow(table:Table,rowKey:String,family:String,column:String,value:String): Unit ={ val rowPut: Put = new Put(Bytes.toBytes(rowKey)) if (value == null) { rowPut.addColumn(family.getBytes, column.getBytes, "".getBytes) } else { rowPut.addColumn(family.getBytes, column.getBytes, value.getBytes) } table.put(rowPut) } /** * rdd数据入到hbase库 * @param inputRdd 需要入库的RDD */ def putRdd(inputRdd:RDD[JSONObject]): Unit ={ try { val jobConf = new JobConf(getHbaseConf()) jobConf.setOutputFormat(classOf[TableOutputFormat]) jobConf.set(TableOutputFormat.OUTPUT_TABLE, tableName) val putRdd = inputRdd.map(convert) putRdd.saveAsHadoopDataset(jobConf) } catch { case e:Exception =>logger.error(">>>数据入库失败:"+e) } } /** * 构建转换数据 * @param obj * @return */ def convert(obj:JSONObject) = { var put:Put = null try{ //设置主键,这里测试,使用系统时间 val longtTime = System.currentTimeMillis() val dfTime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS") val saveTime = dfTime.format(new Date(longtTime)) val key = saveTime put = new Put(Bytes.toBytes(key)) put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("date"), Bytes.toBytes(saveTime)) put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("wrong_value"), Bytes.toBytes(obj.toString)) }catch{ case e:Exception=>println(">>>数据转换失败:"+e) } (new ImmutableBytesWritable, put) } /** * 根据rowKey精确查询单行数据单行值 * @param rowkey * @param family * @param column * @return */ def getByKey(table:Table,rowkey:String, family:String, column:String): String ={ var value: String=null try { //单个get查询 val get = new Get(Bytes.toBytes(rowkey)) get.addColumn(Bytes.toBytes(family), Bytes.toBytes(column)) val result = table.get(get) value = Bytes.toString(result.getValue(Bytes.toBytes(family),Bytes.toBytes(column))) } catch { case e:Exception =>logger.error(">>>查询失败:"+e) } value } /** * 根据rowKey精确查询单行数据的多列值 * @param rowkey * @param family * @param columns * @return */ def getByKey(table:Table,rowkey:String, family:String, columns:Array[String]): ArrayBuffer[String] ={ var list: ArrayBuffer[String]= new ArrayBuffer[String]() try { val get = new Get(Bytes.toBytes(rowkey)) get.addFamily(family.getBytes()) val result = table.get(get) columns.foreach(column=>{ val value = Bytes.toString(result.getValue(Bytes.toBytes(family),Bytes.toBytes(column))) list += value }) } catch { case e:Exception =>logger.error(">>>查询失败:"+e) } list } /** * 根据subKey模糊批量查询多行数据 * @param subKey * @param family * @param column * @return */ def scanBySubKey(table:Table,subKey:String,family:String,column:String): ArrayBuffer[String] ={ val resultList = new ArrayBuffer[String]() val table = getTable(tableName) //批量模糊查询 val filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new SubstringComparator(subKey)) val scan = new Scan() scan.setFilter(filter) val rs:ResultScanner = table.getScanner(scan) for(result <- rs){ if(result.containsColumn(Bytes.toBytes(family),Bytes.toBytes(column))){ val value = Bytes.toString(result.getValue(Bytes.toBytes(family),Bytes.toBytes(column))) resultList += value } } resultList } /** * 根据rowKey删除整行 * @param table * @param rowKey */ def deleteByKey(table:Table, rowKey:String): Unit ={ try { table.delete(new Delete(rowKey.getBytes())) } catch { case e:Exception =>println(">>>删除操作失败:"+e) } } /** * 删除rowKey的某簇的某列 * @param table * @param rowKey */ def deleteByColumn(table:Table, rowKey:String,family:String,column:String): Unit ={ try { val delete = new Delete(rowKey.getBytes()) delete.addColumn(family.getBytes(),column.getBytes()) table.delete(delete) } catch { case e:Exception =>println(">>>删除操作失败:"+e) } } /** * 删除rowKey的某个family * @param table * @param rowKey */ def deleteByFamily(table:Table, rowKey:String,family:String): Unit ={ try { val delete = new Delete(rowKey.getBytes()) delete.addFamily(family.getBytes()) table.delete(delete) } catch { case e:Exception =>println(">>>删除操作失败:"+e) } } /** * 根据rowKey模糊删除 * @param table * @param subKey */ def deleteByKeys(table:Table,subKey:String): Unit ={ try { //批量模糊删除 val filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new SubstringComparator(subKey)) val scan = new Scan() scan.setFilter(filter) val scanners:ResultScanner = table.getScanner(scan) val keyList = new ArrayBuffer[Delete]() var delete:Delete = null //注意:scala和java的for循环有区别,需要引入转换 import scala.collection.JavaConversions._ for(scanner <- scanners){ val rowKey = new String(scanner.getRow) delete = new Delete(rowKey.getBytes) keyList += delete } table.delete(keyList) } catch { case e:Exception =>println(">>>删除操作失败:"+e) } } def main(args: Array[String]): Unit = { testHbase } def testHbase(): Unit ={ val table = getTable(tableName) val family = "info" val column = "value" // deleteByKey(table,"18_13905083065_037563776718_2018-10-16 11:33:14:452") val value = scanBySubKey(table,"2018-10-16",family,column) // deleteByKeys(table,"13959774493_037765802064") // println(value.mkString(",")) deleteByColumn(table,"01_18950728102_037563109294_2018-10-16 11:33:15:394","info","name") } }