CapacityScheduler#nodeUpdate()方法
protected void nodeUpdate(RMNode rmNode) {
long begin = System.nanoTime();
try {
readLock.lock();
setLastNodeUpdateTime(Time.now());
super.nodeUpdate(rmNode);
} finally {
readLock.unlock();
}
// Try to do scheduling
if (!scheduleAsynchronously) {
try {
writeLock.lock();
ActivitiesLogger.NODE.startNodeUpdateRecording(activitiesManager,
rmNode.getNodeID());
// reset allocation and reservation stats before we start doing any
// work
updateSchedulerHealth(lastNodeUpdateTime, rmNode.getNodeID(),
CSAssignment.NULL_ASSIGNMENT);
allocateContainersToNode(rmNode.getNodeID(), true);
ActivitiesLogger.NODE.finishNodeUpdateRecording(activitiesManager,
rmNode.getNodeID());
} finally {
writeLock.unlock();
}
}
long latency = System.nanoTime() - begin;
CapacitySchedulerMetrics.getMetrics().addNodeUpdate(latency);
}
CapacityScheduler#allocateContainersToNode()方法
/**
* We need to make sure when doing allocation, Node should be existed
* And we will construct a {@link CandidateNodeSet} before proceeding
*/
private void allocateContainersToNode(NodeId nodeId,
boolean withNodeHeartbeat) {
FiCaSchedulerNode node = getNode(nodeId);
if (null != node) {
int offswitchCount = 0;
int assignedContainers = 0;
CandidateNodeSet<FiCaSchedulerNode> candidates = getCandidateNodeSet(
node);
CSAssignment assignment = allocateContainersToNode(candidates,
withNodeHeartbeat);
// Only check if we can allocate more container on the same node when
// scheduling is triggered by node heartbeat
if (null != assignment && withNodeHeartbeat) {
if (assignment.getType() == NodeType.OFF_SWITCH) {
offswitchCount++;
}
if (Resources.greaterThan(calculator, getClusterResource(),
assignment.getResource(), Resources.none())) {
assignedContainers++;
}
while (canAllocateMore(assignment, offswitchCount,
assignedContainers)) {
// Try to see if it is possible to allocate multiple container for
// the same node heartbeat
assignment = allocateContainersToNode(candidates, true);
if (null != assignment
&& assignment.getType() == NodeType.OFF_SWITCH) {
offswitchCount++;
}
if (null != assignment
&& Resources.greaterThan(calculator, getClusterResource(),
assignment.getResource(), Resources.none())) {
assignedContainers++;
}
}
if (offswitchCount >= offswitchPerHeartbeatLimit) {
if (LOG.isDebugEnabled()) {
LOG.debug("Assigned maximum number of off-switch containers: "
+ offswitchCount + ", assignments so far: " + assignment);
}
}
}
}
}
CapacityScheduler#allocateContainersToNode()方法
CSAssignment allocateContainersToNode(
CandidateNodeSet<FiCaSchedulerNode> candidates,
boolean withNodeHeartbeat) {
if (rmContext.isWorkPreservingRecoveryEnabled() && !rmContext
.isSchedulerReadyForAllocatingContainers()) {
return null;
}
long startTime = System.nanoTime();
// Backward compatible way to make sure previous behavior which allocation
// driven by node heartbeat works.
FiCaSchedulerNode node = CandidateNodeSetUtils.getSingleNode(candidates);
// We have two different logics to handle allocation on single node / multi
// nodes.
CSAssignment assignment;
if (!multiNodePlacementEnabled) {
assignment = allocateContainerOnSingleNode(candidates,
node, withNodeHeartbeat);
} else{
assignment = allocateContainersOnMultiNodes(candidates);
}
if (assignment != null && assignment.getAssignmentInformation() != null
&& assignment.getAssignmentInformation().getNumAllocations() > 0) {
long allocateTime = System.nanoTime() - startTime;
CapacitySchedulerMetrics.getMetrics().addAllocate(allocateTime);
}
return assignment;
}
CapacityScheduler#allocateContainersOnMultiNodes()方法
/*
* New behavior, allocate containers considering multiple nodes
*/
private CSAssignment allocateContainersOnMultiNodes(
CandidateNodeSet<FiCaSchedulerNode> candidates) {
// When this time look at multiple nodes, try schedule if the
// partition has any available resource or killable resource
if (getRootQueue().getQueueCapacities().getUsedCapacity(
candidates.getPartition()) >= 1.0f
&& preemptionManager.getKillableResource(
CapacitySchedulerConfiguration.ROOT, candidates.getPartition())
== Resources.none()) {
if (LOG.isDebugEnabled()) {
LOG.debug("This node or this node partition doesn't have available or"
+ "killable resource");
}
return null;
}
return allocateOrReserveNewContainers(candidates, false);
}
CapacityScheduler#allocateOrReserveNewContainers()方法
private CSAssignment allocateOrReserveNewContainers(
CandidateNodeSet<FiCaSchedulerNode> candidates,
boolean withNodeHeartbeat) {
CSAssignment assignment = getRootQueue().assignContainers(
getClusterResource(), candidates, new ResourceLimits(labelManager
.getResourceByLabel(candidates.getPartition(),
getClusterResource())),
SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
assignment.setSchedulingMode(SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
submitResourceCommitRequest(getClusterResource(), assignment);
if (Resources.greaterThan(calculator, getClusterResource(),
assignment.getResource(), Resources.none())) {
FiCaSchedulerNode node = CandidateNodeSetUtils.getSingleNode(candidates);
NodeId nodeId = null;
if (node != null) {
nodeId = node.getNodeID();
}
if (withNodeHeartbeat) {
updateSchedulerHealth(lastNodeUpdateTime, nodeId, assignment);
}
return assignment;
}
// Only do non-exclusive allocation when node has node-labels.
if (StringUtils.equals(candidates.getPartition(),
RMNodeLabelsManager.NO_LABEL)) {
return null;
}
// Only do non-exclusive allocation when the node-label supports that
try {