Skip to content

Commit

Permalink
HDFS-17646. Add Option to limit Balancer overUtilized nodes num in ea…
Browse files Browse the repository at this point in the history
…ch iteration.
  • Loading branch information
huangzhaobo99 committed Oct 18, 2024
1 parent 78a08b3 commit 54a6488
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,8 @@ public class Balancer {
+ "\n\t[-sortTopNodes]"
+ "\tSort datanodes based on the utilization so "
+ "that highly utilized datanodes get scheduled first."
+ "\n\t[-limitOverUtilizedNum <specified maximum number of overutilized datanodes>]"
+ "\tLimit the maximum number of overutilized datanodes"
+ "\n\t[-hotBlockTimeInterval]\tprefer to move cold blocks.";

@VisibleForTesting
Expand All @@ -227,6 +229,7 @@ public class Balancer {
private final long maxSizeToMove;
private final long defaultBlockSize;
private final boolean sortTopNodes;
private final int limitOverUtilizedNum;
private final BalancerMetrics metrics;

// all data node lists
Expand Down Expand Up @@ -352,6 +355,7 @@ static int getFailedTimesSinceLastSuccessfulBalance() {
this.sourceNodes = p.getSourceNodes();
this.runDuringUpgrade = p.getRunDuringUpgrade();
this.sortTopNodes = p.getSortTopNodes();
this.limitOverUtilizedNum = p.getlimitOverUtilizedNum();

this.maxSizeToMove = getLongBytes(conf,
DFSConfigKeys.DFS_BALANCER_MAX_SIZE_TO_MOVE_KEY,
Expand Down Expand Up @@ -456,13 +460,20 @@ private long init(List<DatanodeStorageReport> reports) {
sortOverUtilized(overUtilizedPercentage);
}

// limit the number of OverUtilized nodes
// If excludedOverUtilizedNum is greater than 0, The overUtilized is limited
int excludedOverUtilizedNum = Math.max(overUtilized.size() - limitOverUtilizedNum, 0);
if (excludedOverUtilizedNum > 0) {
limitOverUtilizedNum();
}

logUtilizationCollections();
metrics.setNumOfOverUtilizedNodes(overUtilized.size());
metrics.setNumOfUnderUtilizedNodes(underUtilized.size());
Preconditions.checkState(dispatcher.getStorageGroupMap().size()
== overUtilized.size() + underUtilized.size() + aboveAvgUtilized.size()
+ belowAvgUtilized.size(),

Preconditions.checkState(dispatcher.getStorageGroupMap().size() - excludedOverUtilizedNum
== overUtilized.size() + underUtilized.size() + aboveAvgUtilized.size()
+ belowAvgUtilized.size(),
"Mismatched number of storage groups");

// return number of bytes to be moved in order to make the cluster balanced
Expand All @@ -484,6 +495,16 @@ private void sortOverUtilized(Map<Source, Double> overUtilizedPercentage) {
);
}

private void limitOverUtilizedNum() {
Preconditions.checkState(overUtilized instanceof LinkedList,
"Collection overUtilized is not a LinkedList.");
LinkedList<Source> list = (LinkedList<Source>) overUtilized;
int size = overUtilized.size();
for (int i = 0; i < size - limitOverUtilizedNum; i++) {
list.removeLast();
}
}

private static long computeMaxSize2Move(final long capacity, final long remaining,
final double utilizationDiff, final long max) {
final double diff = Math.abs(utilizationDiff);
Expand Down Expand Up @@ -1071,6 +1092,12 @@ static BalancerParameters parse(String[] args) {
b.setSortTopNodes(true);
LOG.info("Balancer will sort nodes by" +
" capacity usage percentage to prioritize top used nodes");
} else if ("-limitOverUtilizedNum".equalsIgnoreCase(args[i])) {
Preconditions.checkArgument(++i < args.length,
"limitOverUtilizedNum value is missing: args = " + Arrays.toString(args));
int limitNum = Integer.parseInt(args[i]);
LOG.info("Using a limitOverUtilizedNum of {}", limitNum);
b.setLimitOverUtilizedNum(limitNum);
} else {
throw new IllegalArgumentException("args = "
+ Arrays.toString(args));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ final class BalancerParameters {

private final boolean sortTopNodes;

private final int limitOverUtilizedNum;

static final BalancerParameters DEFAULT = new BalancerParameters();

private BalancerParameters() {
Expand All @@ -67,6 +69,7 @@ private BalancerParameters(Builder builder) {
this.runDuringUpgrade = builder.runDuringUpgrade;
this.runAsService = builder.runAsService;
this.sortTopNodes = builder.sortTopNodes;
this.limitOverUtilizedNum = builder.limitOverUtilizedNum;
this.hotBlockTimeInterval = builder.hotBlockTimeInterval;
}

Expand Down Expand Up @@ -110,6 +113,10 @@ boolean getSortTopNodes() {
return this.sortTopNodes;
}

int getlimitOverUtilizedNum() {
return this.limitOverUtilizedNum;
}

long getHotBlockTimeInterval() {
return this.hotBlockTimeInterval;
}
Expand All @@ -120,12 +127,12 @@ public String toString() {
+ " max idle iteration = %s," + " #excluded nodes = %s,"
+ " #included nodes = %s," + " #source nodes = %s,"
+ " #blockpools = %s," + " run during upgrade = %s,"
+ " sort top nodes = %s,"
+ " sort top nodes = %s," + " limit overutilized nodes num = %s"
+ " hot block time interval = %s]",
Balancer.class.getSimpleName(), getClass().getSimpleName(), policy,
threshold, maxIdleIteration, excludedNodes.size(),
includedNodes.size(), sourceNodes.size(), blockpools.size(),
runDuringUpgrade, sortTopNodes, hotBlockTimeInterval);
runDuringUpgrade, sortTopNodes, limitOverUtilizedNum, hotBlockTimeInterval);
}

static class Builder {
Expand All @@ -141,6 +148,7 @@ static class Builder {
private boolean runDuringUpgrade = false;
private boolean runAsService = false;
private boolean sortTopNodes = false;
private int limitOverUtilizedNum = Integer.MAX_VALUE;
private long hotBlockTimeInterval = 0;

Builder() {
Expand Down Expand Up @@ -201,6 +209,11 @@ Builder setSortTopNodes(boolean shouldSortTopNodes) {
return this;
}

Builder setLimitOverUtilizedNum(int limitOverUtilizedNum) {
this.limitOverUtilizedNum = limitOverUtilizedNum;
return this;
}

BalancerParameters build() {
return new BalancerParameters(this);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ Usage:
[-runDuringUpgrade]
[-asService]
[-sortTopNodes]
[-limitOverUtilizedNum <specified maximum number of overutilized datanodes>]
[-hotBlockTimeInterval <specified time interval>]

| COMMAND\_OPTION | Description |
Expand All @@ -307,6 +308,7 @@ Usage:
| `-runDuringUpgrade` | Whether to run the balancer during an ongoing HDFS upgrade. This is usually not desired since it will not affect used space on over-utilized machines. |
| `-asService` | Run Balancer as a long running service. |
| `-sortTopNodes` | Sort datanodes based on the utilization so that highly utilized datanodes get scheduled first. |
| `-limitOverUtilizedNum` | Limit the maximum number of overutilized datanodes. |
| `-hotBlockTimeInterval` | Prefer moving cold blocks i.e blocks associated with files accessed or modified before the specified time interval. |
| `-h`\|`--help` | Display the tool usage and help information and exit. |

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,98 @@ public void testBalancerWithSortTopNodes() throws Exception {
assertEquals(900, maxUsage);
}

@Test(timeout = 60000)
public void testBalancerWithLimitOverUtilizedNum() throws Exception {
final Configuration conf = new HdfsConfiguration();
// Init the config (block size to 100)
initConf(conf);
conf.setInt(DFS_HEARTBEAT_INTERVAL_KEY, 30000);

final long totalCapacity = 1000L;
final int diffBetweenNodes = 50;

// Set up the nodes with two groups:
// 5 over-utilized nodes with 80%, 85%, 90%, 95%, 100% usage
// 2 under-utilized nodes with 0%, 5% usage
// With sortTopNodes and limitOverUtilizedNum option, 100% used ones will be chosen
final int numOfOverUtilizedDn = 5;
final int numOfUnderUtilizedDn = 2;
final int totalNumOfDn = numOfOverUtilizedDn + numOfUnderUtilizedDn;
final long[] capacityArray = new long[totalNumOfDn];
Arrays.fill(capacityArray, totalCapacity);

try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(totalNumOfDn)
.simulatedCapacities(capacityArray)
.build()) {
cluster.setDataNodesDead();
List<DataNode> dataNodes = cluster.getDataNodes();
// Create top used nodes
for (int i = 0; i < numOfOverUtilizedDn; i++) {
// Bring one node alive
DataNodeTestUtils.triggerHeartbeat(dataNodes.get(i));
DataNodeTestUtils.triggerBlockReport(dataNodes.get(i));
// Create nodes with: 80%, 85%, 90%, 95%, 100%
int nodeCapacity = (int) totalCapacity - diffBetweenNodes * (numOfOverUtilizedDn - i - 1);
TestBalancer.createFile(cluster, new Path("test_big" + i), nodeCapacity, (short) 1, 0);
cluster.setDataNodesDead();
}

// Create under utilized nodes
for (int i = numOfUnderUtilizedDn - 1; i >= 0; i--) {
int index = i + numOfOverUtilizedDn;
// Bring one node alive
DataNodeTestUtils.triggerHeartbeat(dataNodes.get(index));
DataNodeTestUtils.triggerBlockReport(dataNodes.get(index));
// Create nodes with: 5%, 0%
int nodeCapacity = diffBetweenNodes * i;
TestBalancer.createFile(cluster, new Path("test_small" + i), nodeCapacity, (short) 1, 0);
cluster.setDataNodesDead();
}

// Bring all nodes alive
cluster.triggerHeartbeats();
cluster.triggerBlockReports();
cluster.waitFirstBRCompleted(0, 6000);

final BalancerParameters balancerParameters = Balancer.Cli.parse(new String[] {
"-policy", BalancingPolicy.Node.INSTANCE.getName(),
"-threshold", "1",
"-sortTopNodes",
"-limitOverUtilizedNum", "1"
});

client = NameNodeProxies.createProxy(conf, cluster.getFileSystem(0)
.getUri(), ClientProtocol.class)
.getProxy();

// Set max-size-to-move to small number
// so only top two nodes will be chosen in one iteration
conf.setLong(DFS_BALANCER_MAX_SIZE_TO_MOVE_KEY, 99L);
final Collection<URI> namenodes = DFSUtil.getInternalNsRpcUris(conf);
List<NameNodeConnector> connectors =
NameNodeConnector.newNameNodeConnectors(namenodes, Balancer.class.getSimpleName(),
Balancer.BALANCER_ID_PATH, conf, BalancerParameters.DEFAULT.getMaxIdleIteration());
final Balancer balancer = new Balancer(connectors.get(0), balancerParameters, conf);
Balancer.Result balancerResult = balancer.runOneIteration();

cluster.triggerDeletionReports();
cluster.triggerBlockReports();
cluster.triggerHeartbeats();

DatanodeInfo[] datanodeReport =
client.getDatanodeReport(HdfsConstants.DatanodeReportType.ALL);
long maxUsage = 0;
for (int i = 0; i < totalNumOfDn; i++) {
maxUsage = Math.max(maxUsage, datanodeReport[i].getDfsUsed());
}
// The maxUsage value is 950, only 100% of the nodes will be balanced
assertEquals(950, maxUsage);
assertTrue("BalancerResult is not as expected. " + balancerResult,
(balancerResult.getBytesAlreadyMoved() == 100 && balancerResult.getBlocksMoved() == 1));
}
}

@Test(timeout = 100000)
public void testMaxIterationTime() throws Exception {
final Configuration conf = new HdfsConfiguration();
Expand Down

0 comments on commit 54a6488

Please sign in to comment.