Skip to content
5 changes: 5 additions & 0 deletions docs/changelog/140197.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 140197
summary: Allow relocation to NOT_PREFERRED node for evacuating shards
area: Allocation
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import java.util.List;
import java.util.Objects;

import static org.elasticsearch.cluster.routing.allocation.decider.Decision.NO;

/**
* Represents a decision to move a started shard, either because it is no longer allowed to remain on its current node
* or because moving it to another node will form a better cluster balance.
Expand All @@ -39,14 +41,7 @@ public final class MoveDecision extends AbstractAllocationDecision {
null,
0
);
private static final MoveDecision CACHED_CANNOT_MOVE_DECISION = new MoveDecision(
null,
null,
AllocationDecision.NO,
Decision.NO,
null,
0
);
private static final MoveDecision CACHED_CANNOT_MOVE_DECISION = new MoveDecision(null, null, AllocationDecision.NO, NO, null, 0);

@Nullable
private final AllocationDecision canMoveDecision;
Expand Down Expand Up @@ -172,12 +167,11 @@ public boolean isDecisionTaken() {
*/
public boolean cannotRemainAndCanMove() {
checkDecisionState();
return cannotRemain() && (canMoveDecision == AllocationDecision.YES);
}

public boolean cannotRemainAndNotPreferredMove() {
checkDecisionState();
return cannotRemain() && canMoveDecision == AllocationDecision.NOT_PREFERRED;
return switch (canRemainDecision.type()) {
case NO -> canMoveDecision == AllocationDecision.YES || canMoveDecision == AllocationDecision.NOT_PREFERRED;
case NOT_PREFERRED -> canMoveDecision == AllocationDecision.YES;
default -> false;
};
}

/**
Expand Down Expand Up @@ -342,9 +336,7 @@ public Iterator<? extends ToXContent> toXContentChunked(ToXContent.Params params
builder.field("rebalance_explanation", getExplanation());
} else {
if (cannotRemainAndCanMove()) {
builder.field("can_move_to_other_node", "yes");
} else if (cannotRemainAndNotPreferredMove()) {
builder.field("can_move_to_other_node", "not-preferred");
builder.field("can_move_to_other_node", canMoveDecision);
} else {
builder.field("can_move_to_other_node", "no");
}
Expand Down Expand Up @@ -374,4 +366,17 @@ public int hashCode() {
return 31 * super.hashCode() + Objects.hash(canMoveDecision, canRemainDecision, clusterRebalanceDecision, currentNodeRanking);
}

@Override
public String toString() {
return "MoveDecision{"
+ "canMoveDecision="
+ canMoveDecision
+ ", canRemainDecision="
+ canRemainDecision
+ ", clusterRebalanceDecision="
+ clusterRebalanceDecision
+ ", currentNodeRanking="
+ currentNodeRanking
+ '}';
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -861,7 +861,7 @@ public boolean moveShards() {
shardMoved = true;
}
} else if (moveDecision.isDecisionTaken() && moveDecision.cannotRemain()) {
logger.trace("[{}][{}] can't move", shardRouting.index(), shardRouting.id());
logger.trace("[{}][{}] can't move: [{}]", shardRouting.index(), shardRouting.id(), moveDecision);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.shutdown;

import org.elasticsearch.cluster.metadata.ProjectId;
import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata;
import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
import org.elasticsearch.cluster.routing.RoutingNode;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
import org.elasticsearch.cluster.routing.allocation.decider.AllocationDecider;
import org.elasticsearch.cluster.routing.allocation.decider.Decision;
import org.elasticsearch.common.settings.ClusterSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.plugins.ClusterPlugin;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ClusterServiceUtils;
import org.elasticsearch.test.ESIntegTestCase;
import org.junit.After;

import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;

import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;

@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0)
public class ShutdownEvacuationIT extends ESIntegTestCase {

private static final Set<String> NOT_PREFERRED_NODES = Collections.newSetFromMap(new ConcurrentHashMap<>());

@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Arrays.asList(ShutdownPlugin.class, NotPreferredPlugin.class);
}

@After
public void clearNotPreferredNodes() {
NOT_PREFERRED_NODES.clear();
}

public void testCanEvacuationToNotPreferredNodeDuringShutdown() {
final var node1 = internalCluster().startNode();
final var indexName = randomIdentifier();
createIndex(indexName, 1, 0);
ensureGreen(indexName);
final var node2 = internalCluster().startNode();
final var node2ID = getNodeId(node2);
final var node1ID = getNodeId(node1);

NOT_PREFERRED_NODES.add(node2ID);

// Mark node 1 as shutting down
assertAcked(
internalCluster().client()
.execute(
PutShutdownNodeAction.INSTANCE,
new PutShutdownNodeAction.Request(
TEST_REQUEST_TIMEOUT,
TEST_REQUEST_TIMEOUT,
node1ID,
SingleNodeShutdownMetadata.Type.SIGTERM,
"testing",
null,
null,
TimeValue.ZERO
)
)
);

safeAwait(
ClusterServiceUtils.addMasterTemporaryStateListener(
state -> state.routingTable(ProjectId.DEFAULT)
.index(indexName)
.allShards()
.flatMap(IndexShardRoutingTable::allShards)
.allMatch(shardRouting -> shardRouting.currentNodeId().equals(node2ID) && shardRouting.started())
)
);
}

public static class NotPreferredPlugin extends Plugin implements ClusterPlugin {

@Override
public Collection<AllocationDecider> createAllocationDeciders(Settings settings, ClusterSettings clusterSettings) {

return List.of(new AllocationDecider() {
@Override
public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
return NOT_PREFERRED_NODES.contains(node.nodeId()) ? Decision.NOT_PREFERRED : Decision.YES;
}
});
}
}
}