客户端版本1.2.x
下面是一段比较常见hbase客户端连接server查询的代码。
Connection connection = ConnectionFactory.createConnection(conf);
HTable table=connection.getTable(TableName.valueOf("myTable"), Executors.newFixedThreadPool(10));
Result result = table.get(new Get("myRowKey".getBytes()));
根据上面例子,下面分别从connection初始化、table初始化以及查询三个过程,从源码上分析hbase查询时都做了哪些事情。
一、源码分析
贴代码较多,用于理解每一步代码中都在做哪些事情,中间有几个流程图,可以帮助理解。
1.1 初始化过程
整个初始化流程如下图
1.1.1 Connection源码
先看下connection创建的过程
ConnectionFactory.java
static Connection createConnection(final Configuration conf, final boolean managed,
final ExecutorService pool, final User user)
throws IOException {
//如果没有设置HBASE_CLIENT_CONNECTION_IMPL,默认使用HConnectionImplementation进行连接的初始化Connection
String className = conf.get(HConnection.HBASE_CLIENT_CONNECTION_IMPL,
ConnectionManager.HConnectionImplementation.class.getName());
Class<?> clazz = null;
try {
clazz = Class.forName(className);
} catch (ClassNotFoundException e) {
throw new IOException(e);
}
try {
// Default HCM#HCI is not accessible; make it so before invoking.
Constructor<?> constructor =
clazz.getDeclaredConstructor(Configuration.class,
boolean.class, ExecutorService.class, User.class);
constructor.setAccessible(true);
return (Connection) constructor.newInstance(conf, managed, pool, user);
} catch (Exception e) {
throw new IOException(e);
}
}
查看HConnectionImplementation初始化过程,里面的关键对象是初始化出了rpc客户端。
HConnectionImplementation.java
//几个比较关键的成员变量
//rpc客户端
private RpcClient rpcClient;
//存储table regionserver region 的缓存
private MetaCache metaCache = new MetaCache();
// indicates whether this connection's life cycle is managed (by us)
//rpc客户端
private RpcRetryingCallerFactory rpcCallerFactory;
private RpcControllerFactory rpcControllerFactory;
//ZooKeeperRegistry
private Registry registry
HConnectionImplementation(Configuration conf, boolean managed,
ExecutorService pool, User user) throws IOException {
this(conf);
this.user = user;
this.batchPool = pool;
this.managed = managed;
//初始化 ZooKeeperRegistry
this.registry = setupRegistry();
//获取到集群id
retrieveClusterId();
//初始化rpc客户端
this.rpcClient = RpcClientFactory.createClient(this.conf, this.clusterId);
this.rpcControllerFactory = RpcControllerFactory.instantiate(conf);
// Do we publish the status?
boolean shouldListen = conf.getBoolean(HConstants.STATUS_PUBLISHED,
HConstants.STATUS_PUBLISHED_DEFAULT);
Class<? extends ClusterStatusListener.Listener> listenerClass =
conf.getClass(ClusterStatusListener.STATUS_LISTENER_CLASS,
ClusterStatusListener.DEFAULT_STATUS_LISTENER_CLASS,
ClusterStatusListener.Listener.class);
if (shouldListen) {
if (listenerClass == null) {
LOG.warn(HConstants.STATUS_PUBLISHED + " is true, but " +
ClusterStatusListener.STATUS_LISTENER_CLASS + " is not set - not listening status");
} else {
clusterStatusListener = new ClusterStatusListener(
new ClusterStatusListener.DeadServerHandler() {
@Override
public void newDead(ServerName sn) {
clearCaches(sn);
rpcClient.cancelConnections(sn);
}
}, conf, listenerClass);
}
}
}
1.1.2 Table源码
接下来看下初始化HTable的过程,在HConnectionImplementation中查找初始化htable的方法
HConnectionImplementation.java
public HTableInterface getTable(TableName tableName, ExecutorService pool) throws IOException {
if (managed) {
throw new NeedUnmanagedConnectionException();
}
//使用HTable类初始化Table对象
return new HTable(tableName, this, connectionConfig, rpcCallerFactory, rpcControllerFactory, pool);
}
初始化HTable,这一步将rpc客户端传给table,并且在里面初始化了HRegionLocator,用于定位region在哪台机器上。
HTable.java
//几个比较关键的成员变量
protected ClusterConnection connection; //第一步初始化的connection连接
private final TableName tableName;
private volatile Configuration configuration;
private ConnectionConfiguration connConfiguration;
private ExecutorService pool; // For Multi & Scan
private HRegionLocator locator; //region定位器
/** The Async process for batch */
protected AsyncProcess multiAp;
private RpcRetryingCallerFactory rpcCallerFactory;
private RpcControllerFactory rpcControllerFactory;
public HTable(TableName tableName, final ClusterConnection connection,
final ConnectionConfiguration tableConfig,
final RpcRetryingCallerFactory rpcCallerFactory,
final RpcControllerFactory rpcControllerFactory,
final ExecutorService pool) throws IOException {
if (connection == null || connection.isClosed()) {
throw new IllegalArgumentException("Connection is null or closed.");
}
this.tableName = tableName;
this.cleanupConnectionOnClose = false;
this.connection = connection;
this.configuration = connection.getConfiguration();
this.connConfiguration = tableConfig;
this.pool = pool;
if (pool == null) {
this.pool = getDefaultExecutor(this.configuration);
this.cleanupPoolOnClose = true;
} else {
this.cleanupPoolOnClose = false;
}
//将connection中的rpc客户端传给htable
this.rpcCallerFactory = rpcCallerFactory;
this.rpcControllerFactory = rpcControllerFactory;
this.finishSetup();
}
初始化方法最下面还有个finishSetup方法
HTable.java
private void finishSetup() throws IOException {
if (connConfiguration == null) {
connConfiguration = new ConnectionConfiguration(configuration);
}
this.operationTimeout = tableName.isSystemTable() ?
connConfiguration.getMetaOperationTimeout() : connConfiguration.getOperationTimeout();
this.scannerCaching = connConfiguration.getScannerCaching();
this.scannerMaxResultSize = connConfiguration.getScannerMaxResultSize();
if (this.rpcCallerFactory == null) {
this.rpcCallerFactory = connection.getNewRpcRetryingCallerFactory(configuration);
}
if (this.rpcControllerFactory == null) {
this.rpcControllerFactory = RpcControllerFactory.instantiate(configuration);
}
// puts need to track errors globally due to how the APIs currently work.
multiAp = this.connection.getAsyncProcess();
this.closed = false;
//获取region地址的locator对象。
this.locator = new HRegionLocator(tableName, connection);
}
1.2 查询过程源码
table.get(new Get(“myRowKey”.getBytes()))的方法在HTable类中查找,只截取关键部分
HTable.java
if (get.getConsistency() == Consistency.STRONG) {
// Good old call.
final Get getReq = get;
//创建一个regionserver远程调用的对象
RegionServerCallable<Result> callable = new RegionServerCallable<Result>(this.connection,
getName(), get.getRow()) {
@Override
public Result call(int callTimeout) throws IOException {
ClientProtos.GetRequest request =
RequestConverter.buildGetRequest(getLocation().getRegionInfo().getRegionName(), getReq);
PayloadCarryingRpcController controller = rpcControllerFactory.newController();
controller.setPriority(tableName);
controller.setCallTimeout(callTimeout);
try {
ClientProtos.GetResponse response = getStub().get(controller, request);
if (response == null) return null;
return ProtobufUtil.toResult(response.getResult());
} catch (ServiceException se) {
throw ProtobufUtil.getRemoteException(se);
}
}
};
//使用rpc多次尝试客户端进行远程调用,这里点进去callWithRetries方法
return rpcCallerFactory.<Result>newCaller().callWithRetries(callable, this.operationTimeout);
}
RpcRetryingCaller.java
try {
//这段代码进行callable的初始化,在这个方法中进行region地址的定位
callable.prepare(tries != 0); // if called with false, check table status on ZK
interceptor.intercept(context.prepare(callable, tries));
return callable.call(getRemainingTime(callTimeout));
} catch (PreemptiveFastFailException e) {
throw e;
} catch (Throwable t) {
ExceptionUtil.rethrowIfInterrupt(t);
if (tries > startLogErrorsCnt) {
LOG.info("Call exception, tries=" + tries + ", retries=" + retries + ", started=" +
(EnvironmentEdgeManager.currentTime() - this.globalStartTime) + " ms ago, "
+ "cancelled=" + cancelled.get() + ", msg="
+ callable.getExceptionMessageAdditionalDetail());
}
RegionServerCallable.java
public void prepare(final boolean reload) throws IOException {
//此处是创建了一个HRegionLocator对象
try (RegionLocator regionLocator = connection.getRegionLocator(tableName)) {
//使用HRegionLocator对象查找row所在region
//此处最终调用的方法也是在connection中,connection.getRegionLocation(tableName, row, reload);
this.location = regionLocator.getRegionLocation(row, reload);
}
if (this.location == null) {
throw new IOException("Failed to find location, tableName=" + tableName +
", row=" + Bytes.toString(row) + ", reload=" + reload);
}
setStub(getConnection().getClient(this.location.getServerName()));
}
HConnectionImplementation.java
public RegionLocations locateRegion(final TableName tableName,
final byte [] row, boolean useCache, boolean retry, int replicaId)
throws IOException {
if (this.closed) throw new IOException(toString() + " closed");
if (tableName== null || tableName.getName().length == 0) {
throw new IllegalArgumentException(
"table name cannot be null or zero length");
}
//定位元信息表位置。
//当前版本hbase的meta信息存储在某个regionserver中
if (tableName.equals(TableName.META_TABLE_NAME)) {
return locateMeta(tableName, useCache, replicaId);
} else {
// Region not in the cache - have to go to the meta RS
//在元数据定位region位置。
return locateRegionInMeta(tableName, row, useCache, retry, replicaId);
}
}
我们先看locateRegionInMeta方法,看他如何定位我们的表"myTable"的region位置。
HConnectionImplementation.java
private RegionLocations locateRegionInMeta(TableName tableName, byte[] row,
boolean useCache, boolean retry, int replicaId) throws IOException {
// If we are supposed to be using the cache, look in the cache to see if
// we already have the region.
//默认都使用缓存,查找region时会先在connection的metacahce中查找。
if (useCache) {
RegionLocations locations = getCachedLocation(tableName, row);
if (locations != null && locations.getRegionLocation(replicaId) != null) {
return locations;
}
}
// build the key of the meta region we should be looking for.
// the extra 9's on the end are necessary to allow "exact" matches
// without knowing the precise region names.
byte[] metaKey = HRegionInfo.createRegionName(tableName, row, HConstants.NINES, false);
Scan s = new Scan();
s.setReversed(true);
s.setStartRow(metaKey);
s.setSmall(true);
s.setCaching(1);
if (this.useMetaReplicas) {
s.setConsistency(Consistency.TIMELINE);
}
int localNumRetries = (retry ? numTries : 1);
for (int tries = 0; true; tries++) {
if (tries >= localNumRetries) {
throw new NoServerForRegionException("Unable to find region for "
+ Bytes.toStringBinary(row) + " in " + tableName +
" after " + localNumRetries + " tries.");
}
if (useCache) {
RegionLocations locations = getCachedLocation(tableName, row);
if (locations != null && locations.getRegionLocation(replicaId) != null) {
return locations;
}
} else {
// If we are not supposed to be using the cache, delete any existing cached location
// so it won't interfere.
metaCache.clearCache(tableName, row);
}
// Query the meta region
try {
Result regionInfoRow = null;
ReversedClientScanner rcs = null;
try {
//从META_TABLE表中查找"myTable"region所在位置,这里会向hbase发起一个查询"META_TABLE"表的请求。
//想要查询META_TABLE,就需要知道他的region位置,这样才能发起rpc请求,由于当前并不知道META_TABLE的位置
//所以会调用locateMeta方法,查找META_TABLE表的位置。
rcs = new ClientSmallReversedScanner(conf, s, TableName.META_TABLE_NAME, this,
rpcCallerFactory, rpcControllerFactory, getMetaLookupPool(), 0);
regionInfoRow = rcs.next();
} finally {
if (rcs != null) {
rcs.close();
}
}
HConnectionImplementation.java
private RegionLocations locateMeta(final TableName tableName,
boolean useCache, int replicaId) throws IOException {
..........
// Look up from zookeeper
//这里的registry是ZooKeeperRegistry,从zookeeper中获取META_TABLE表的位置。
locations = this.registry.getMetaRegionLocation();
if (locations != null) {
cacheLocation(tableName, locations);
}
........
整个查询过程如下图