Hbase 的热点问题及rowkey的散列原则,hbaserowkey
这是我直接根据0-9 进行partition 预分组,然后以已1-10000为rowkey进行插入,我想象中的是,起始位分布都差不多,根据高位字典进行排序,按道理来说是很完美的,但是,确实很完美,完美造成了热点,还是最热的那种,心里一万个不愿意,但是没办法,说明如果要按照数字进行预分区,应该要分得细粒度一点,这种就要1-1000,1000-2000......9000-10000,这样来分,但是这种只适合测试啦,真的数据跑起来,是不OK的,还是得靠hash,md5等等散列函数来实现,真正的分布均匀。
看下图,照样是我,用1-10000为rowkey进行。数据put,结果也是很完美的,那是确实完美,这种除非数据真的非常非常大,对region进行了多次split,那么他还是会在不久之后出现热点问题,但是应付一般的数据量还是没问题的,所以应该提前考虑到数据的增量问题,对分区进行和里的设计
1、进行建表的时候,对表进行预分区
package make.hbase.com;
import java.util.Iterator;
import java.util.Random;
import java.util.TreeSet;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.MD5Hash;
public class hash_example {
// 随机取机数目 实验基数
private int baseRecord;
// rowkey生成器
private RowKeyGenerator rkGen;
// 取样时,由取样数目及region数相除所得的数量.
private int splitKeysBase;
// splitkeys个数
private int splitKeysNumber;
// 由抽样计算出来的splitkeys结果
private byte[][] splitKeys;
public hash_example(int baseRecord, int prepareRegions) {
this.baseRecord = baseRecord;
// 实例化rowkey生成器
rkGen = new HashRowKeyGenerator();
// region数量减一
splitKeysNumber = prepareRegions - 1;
// 抽样基数 除以 region数量
splitKeysBase = baseRecord / prepareRegions;
}
public byte[][] calcSplitKeys() {
splitKeys = new byte[splitKeysNumber][]; // new byte[9][]
// 使用treeset保存抽样数据,已排序过
TreeSet<byte[]> rows = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
// 把生成的散列byte[] 添加到rows
for (int i = 0; i < baseRecord; i++) {
rows.add(rkGen.nextId());
}
int pointer = 0;
Iterator<byte[]> rowKeyIter = rows.iterator();
int index = 0;
while (rowKeyIter.hasNext()) {
byte[] tempRow = rowKeyIter.next();
rowKeyIter.remove();
if ((pointer != 0) && (pointer % splitKeysBase == 0)) {
if (index < splitKeysNumber) {
splitKeys[index] = tempRow;
index++;
}
}
pointer++;
}
rows.clear();
rows = null;
return splitKeys;
}
// interface
public interface RowKeyGenerator {
byte[] nextId();
}
// implements
public class HashRowKeyGenerator implements RowKeyGenerator {
private long currentId = 1;
private long currentTime = System.currentTimeMillis();
private Random random = new Random();
public byte[] nextId() {
try {
currentTime += random.nextInt(1000);
byte[] lowT = Bytes.copy(Bytes.toBytes(currentTime), 4, 4);
byte[] lowU = Bytes.copy(Bytes.toBytes(currentId), 4, 4);
return Bytes.add(MD5Hash.getMD5AsHex(Bytes.add(lowU, lowT))
.substring(0, 8).getBytes(), Bytes.toBytes(currentId));
} finally {
currentId++;
}
}
}
public static void main(String[] args) throws Exception {
hash_example worker = new hash_example(1000000, 20);
byte[][] splitKeys = worker.calcSplitKeys();
String namespace = "hbase_api";
String tbname = "hbase_hashrowkey2";
String colfamily = "info";
// 读取配置信息
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "make.hadoop.com");
HBaseAdmin admin = new HBaseAdmin(conf);
TableName tableName = TableName.valueOf(tbname);
if (admin.tableExists(tableName)) {
try {
admin.disableTable(tableName);
} catch (Exception e) {
}
admin.deleteTable(tableName);
}
HTableDescriptor tableDesc = new HTableDescriptor(tableName);
HColumnDescriptor columnDesc = new HColumnDescriptor(
Bytes.toBytes(colfamily));
columnDesc.setMaxVersions(1);
tableDesc.addFamily(columnDesc);
admin.createTable(tableDesc, splitKeys);
admin.close();
}
}2、对顺序id进行插入测试
@Test
public void put_hash() throws MasterNotRunningException,
ZooKeeperConnectionException, IOException {
HTable table = GetTable_test("rwokry_partition");
long currentTime = System.currentTimeMillis();
Random random = new Random();
// 模拟put 10000条数据
for (int i = 1; i < 10000; i++) {
currentTime += random.nextInt(1000); byte[] lowT =
Bytes.copy(Bytes.toBytes(currentTime), 4, 4); byte[] lowI =
Bytes.copy(Bytes.toBytes((long) i), 4, 4); byte[] rowkey =
Bytes.add(MD5Hash .getMD5AsHex(Bytes.add(lowI, lowT)).substring(0, 8)
.getBytes(), Bytes.toBytes(i));
Put put = new Put(rowkey);
put.add(Bytes.toBytes(colfamily), Bytes.toBytes(column),
Bytes.toBytes("make"));
table.put(put);
}
table.close(); System.err.println("数据插入成功");
}我们上UI看请求结果,确实做到了负载均衡,防止了热点的问题
其中我们查看一下rowkey,也是符合长度原则的,这里rowkey达到了12个字节,应该是正常的,其实实现这个的代码也是挺简单的,参考:
本站文章为和通数据库网友分享或者投稿,欢迎任何形式的转载,但请务必注明出处.
同时文章内容如有侵犯了您的权益,请联系QQ:970679559,我们会在尽快处理。