Skip to content

Commit

Permalink
Initial commit for search only replica.
Browse files Browse the repository at this point in the history
This PR contains the following:
1. Introduce searchOnly flag on ShardRouting.
2. Added feature flag to enable/disable the feature.
3. supports both create and update APIs to toggle search replica count.
4. Changes to exclude search replicas from primary eligibility.
5. Changes to prevent replicationOperations from routing to search replicas.

Signed-off-by: Marc Handalian <[email protected]>
  • Loading branch information
mch2 committed Aug 26, 2024
1 parent b830d68 commit a1b71a9
Show file tree
Hide file tree
Showing 24 changed files with 1,064 additions and 21 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.indices.settings;

import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.settings.SettingsException;
import org.opensearch.common.util.FeatureFlags;
import org.opensearch.indices.replication.common.ReplicationType;
import org.opensearch.test.OpenSearchIntegTestCase;

import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS;
import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE;
import static org.opensearch.cluster.routing.UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING;

@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.SUITE, numDataNodes = 1)
public class SearchOnlyReplicaFeatureFlagIT extends OpenSearchIntegTestCase {

private static final String TEST_INDEX = "test_index";

@Override
protected Settings featureFlagSettings() {
return Settings.builder()
.put(super.featureFlagSettings())
.put(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL, Boolean.FALSE)
.build();
}

@Override
public Settings indexSettings() {
return Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS, 1)
.put(SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT)
.build();
}

public void testCreateFeatureFlagDisabled() {
Settings settings = Settings.builder().put(indexSettings()).put(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL, false).build();

SettingsException settingsException = expectThrows(SettingsException.class, () -> createIndex(TEST_INDEX, settings));
assertEquals(
"unknown setting [index.number_of_search_only_shards] did you mean [index.number_of_routing_shards]?",
settingsException.getMessage()
);
}

public void testUpdateFeatureFlagDisabled() {
Settings settings = Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "0ms") // so that after we punt a node we can immediately try to
// reallocate after node left.
.put(SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT)
.build();

createIndex(TEST_INDEX, settings);
SettingsException settingsException = expectThrows(SettingsException.class, () -> {
client().admin()
.indices()
.prepareUpdateSettings(TEST_INDEX)
.setSettings(Settings.builder().put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 1))
.get();
});
assertEquals(
"unknown setting [index.number_of_search_only_shards] did you mean [index.number_of_routing_shards]?",
settingsException.getMessage()
);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.indices.settings;

import org.opensearch.cluster.ClusterState;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.cluster.metadata.Metadata;
import org.opensearch.cluster.routing.IndexShardRoutingTable;
import org.opensearch.cluster.routing.ShardRouting;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.util.FeatureFlags;
import org.opensearch.indices.replication.common.ReplicationType;
import org.opensearch.test.InternalTestCluster;
import org.opensearch.test.OpenSearchIntegTestCase;

import java.io.IOException;

import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS;
import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE;
import static org.opensearch.cluster.routing.UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING;

@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0)
public class SearchOnlyReplicaIT extends OpenSearchIntegTestCase {

private static final String TEST_INDEX = "test_index";

@Override
protected Settings featureFlagSettings() {
return Settings.builder().put(super.featureFlagSettings()).put(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL, Boolean.TRUE).build();
}

private final String expectedFailureMessage =
"To set index.number_of_search_only_shards, index.replication.type must be set to SEGMENT";

@Override
public Settings indexSettings() {
return Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
.put(IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS, 1)
.put(INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "0ms") // so that after we punt a node we can immediately try to
// reallocate after node left.
.put(SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT)
.build();
}

public void testCreateDocRepFails() {
Settings settings = Settings.builder().put(indexSettings()).put(SETTING_REPLICATION_TYPE, ReplicationType.DOCUMENT).build();

IllegalArgumentException illegalArgumentException = expectThrows(
IllegalArgumentException.class,
() -> createIndex(TEST_INDEX, settings)
);
assertEquals(expectedFailureMessage, illegalArgumentException.getMessage());
}

public void testUpdateDocRepFails() {
Settings settings = Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(SETTING_REPLICATION_TYPE, ReplicationType.DOCUMENT)
.build();
// create succeeds
createIndex(TEST_INDEX, settings);

// update fails
IllegalArgumentException illegalArgumentException = expectThrows(IllegalArgumentException.class, () -> {
client().admin()
.indices()
.prepareUpdateSettings(TEST_INDEX)
.setSettings(Settings.builder().put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 1))
.get();
});
assertEquals(expectedFailureMessage, illegalArgumentException.getMessage());
}

public void testSearchReplicasAreNotPrimaryEligible() throws IOException {
int numSearchReplicas = randomIntBetween(0, 3);
int numWriterReplicas = randomIntBetween(0, 3);
internalCluster().startClusterManagerOnlyNode();
String primaryNodeName = internalCluster().startDataOnlyNode();
createIndex(
TEST_INDEX,
Settings.builder()
.put(indexSettings())
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numWriterReplicas)
.put(IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS, numSearchReplicas)
.build()
);
ensureYellow(TEST_INDEX);
for (int i = 0; i < numSearchReplicas + numWriterReplicas; i++) {
internalCluster().startDataOnlyNode();
}
ensureGreen(TEST_INDEX);

// assert shards are on separate nodes & all active
assertActiveShardCounts(numSearchReplicas, numWriterReplicas);

// stop the primary and ensure search shard is not promoted:
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primaryNodeName));
ensureRed(TEST_INDEX);

if (numWriterReplicas > 0) {
assertActiveShardCounts(numSearchReplicas, numWriterReplicas - 1); // 1 repl is inactive that was promoted to primary
// add back a node
internalCluster().startDataOnlyNode();
ensureGreen(TEST_INDEX);
} else {
// index falls red and does not recover
// Without any writer replica with n2n replication this is an unrecoverable scenario and snapshot restore is required.
ensureRed(TEST_INDEX);
assertActiveSearchShards(numSearchReplicas);
}
}

public void testSearchReplicaScaling() {
internalCluster().startNodes(2);
createIndex(TEST_INDEX);
ensureGreen(TEST_INDEX);
// assert settings
Metadata metadata = client().admin().cluster().prepareState().get().getState().metadata();
int numSearchReplicas = Integer.parseInt(metadata.index(TEST_INDEX).getSettings().get(SETTING_NUMBER_OF_SEARCH_REPLICAS));
assertEquals(1, numSearchReplicas);

// assert cluster state & routing table
assertActiveSearchShards(1);

// Add another node and search replica
internalCluster().startDataOnlyNode();
client().admin()
.indices()
.prepareUpdateSettings(TEST_INDEX)
.setSettings(Settings.builder().put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 2))
.get();

ensureGreen(TEST_INDEX);
assertActiveSearchShards(2);

// remove all search shards
client().admin()
.indices()
.prepareUpdateSettings(TEST_INDEX)
.setSettings(Settings.builder().put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 0))
.get();
ensureGreen(TEST_INDEX);
assertActiveSearchShards(0);
}

/**
* Helper to assert counts of active shards for each type.
*/
private void assertActiveShardCounts(int expectedSearchReplicaCount, int expectedWriteReplicaCount) {
// assert routing table
IndexShardRoutingTable indexShardRoutingTable = getIndexShardRoutingTable();
// assert search replica count
int activeCount = expectedSearchReplicaCount + expectedWriteReplicaCount;
assertEquals(expectedSearchReplicaCount, indexShardRoutingTable.searchOnlyReplicas().stream().filter(ShardRouting::active).count());
assertEquals(expectedWriteReplicaCount, indexShardRoutingTable.writerReplicas().stream().filter(ShardRouting::active).count());
assertEquals(
expectedWriteReplicaCount + expectedSearchReplicaCount,
indexShardRoutingTable.replicaShards().stream().filter(ShardRouting::active).count()
);

// assert routing nodes
ClusterState clusterState = getClusterState();
assertEquals(activeCount, clusterState.getRoutingNodes().shards(r -> r.active() && !r.primary()).size());
assertEquals(expectedSearchReplicaCount, clusterState.getRoutingNodes().shards(r -> r.active() && r.isSearchOnly()).size());
assertEquals(
expectedWriteReplicaCount,
clusterState.getRoutingNodes().shards(r -> r.active() && !r.primary() && !r.isSearchOnly()).size()
);
}

private void assertActiveSearchShards(int expectedSearchReplicaCount) {
assertActiveShardCounts(expectedSearchReplicaCount, 0);
}

private IndexShardRoutingTable getIndexShardRoutingTable() {
return getClusterState().routingTable().index(TEST_INDEX).shards().values().stream().findFirst().get();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,21 @@ static Setting<Integer> buildNumberOfShardsSetting() {
Property.IndexScope
);

/**
* Setting to control the number of search only replicas for an index.
* A search only replica has the following properties:
* 1. Is not primary eligible
* 2.
*/
public static final String SETTING_NUMBER_OF_SEARCH_REPLICAS = "index.number_of_search_only_shards";
public static final Setting<Integer> INDEX_NUMBER_OF_SEARCH_REPLICAS_SETTING = Setting.intSetting(
SETTING_NUMBER_OF_SEARCH_REPLICAS,
0,
0,
Property.Dynamic,
Property.IndexScope
);

public static final String SETTING_ROUTING_PARTITION_SIZE = "index.routing_partition_size";
public static final Setting<Integer> INDEX_ROUTING_PARTITION_SIZE_SETTING = Setting.intSetting(
SETTING_ROUTING_PARTITION_SIZE,
Expand Down Expand Up @@ -648,6 +663,7 @@ public static APIBlock readFrom(StreamInput input) throws IOException {

private final int numberOfShards;
private final int numberOfReplicas;
private final int numberOfSearchOnlyReplicas;

private final Index index;
private final long version;
Expand Down Expand Up @@ -699,6 +715,7 @@ private IndexMetadata(
final State state,
final int numberOfShards,
final int numberOfReplicas,
final int numberOfSearchOnlyReplicas,
final Settings settings,
final Map<String, MappingMetadata> mappings,
final Map<String, AliasMetadata> aliases,
Expand Down Expand Up @@ -731,7 +748,8 @@ private IndexMetadata(
this.state = state;
this.numberOfShards = numberOfShards;
this.numberOfReplicas = numberOfReplicas;
this.totalNumberOfShards = numberOfShards * (numberOfReplicas + 1);
this.numberOfSearchOnlyReplicas = numberOfSearchOnlyReplicas;
this.totalNumberOfShards = numberOfShards * (numberOfReplicas + numberOfSearchOnlyReplicas + 1);
this.settings = settings;
this.mappings = Collections.unmodifiableMap(mappings);
this.customData = Collections.unmodifiableMap(customData);
Expand Down Expand Up @@ -833,6 +851,10 @@ public int getNumberOfReplicas() {
return numberOfReplicas;
}

public int getNumberOfSearchOnlyReplicas() {
return numberOfSearchOnlyReplicas;
}

public int getRoutingPartitionSize() {
return routingPartitionSize;
}
Expand Down Expand Up @@ -1324,6 +1346,11 @@ public Builder numberOfReplicas(int numberOfReplicas) {
return this;
}

public Builder numberOfSearchReplicas(int numberOfSearchReplicas) {
settings = Settings.builder().put(settings).put(SETTING_NUMBER_OF_SEARCH_REPLICAS, numberOfSearchReplicas).build();
return this;
}

public Builder routingPartitionSize(int routingPartitionSize) {
settings = Settings.builder().put(settings).put(SETTING_ROUTING_PARTITION_SIZE, routingPartitionSize).build();
return this;
Expand Down Expand Up @@ -1511,6 +1538,7 @@ public IndexMetadata build() {
throw new IllegalArgumentException("must specify number of replicas for index [" + index + "]");
}
final int numberOfReplicas = INDEX_NUMBER_OF_REPLICAS_SETTING.get(settings);
final int numberOfSearchReplicas = INDEX_NUMBER_OF_SEARCH_REPLICAS_SETTING.get(settings);

int routingPartitionSize = INDEX_ROUTING_PARTITION_SIZE_SETTING.get(settings);
if (routingPartitionSize != 1 && routingPartitionSize >= getRoutingNumShards()) {
Expand Down Expand Up @@ -1606,6 +1634,7 @@ public IndexMetadata build() {
state,
numberOfShards,
numberOfReplicas,
numberOfSearchReplicas,
tmpSettings,
mappings,
tmpAliases,
Expand Down
18 changes: 18 additions & 0 deletions server/src/main/java/org/opensearch/cluster/metadata/Metadata.java
Original file line number Diff line number Diff line change
Expand Up @@ -1506,6 +1506,24 @@ public Builder updateNumberOfReplicas(final int numberOfReplicas, final String[]
return this;
}

/**
* Update the number of search replicas for the specified indices.
*
* @param numberOfSearchReplicas the number of search replicas
* @param indices the indices to update the number of replicas for
* @return the builder
*/
public Builder updateNumberOfSearchReplicas(final int numberOfSearchReplicas, final String[] indices) {
for (String index : indices) {
IndexMetadata indexMetadata = this.indices.get(index);
if (indexMetadata == null) {
throw new IndexNotFoundException(index);
}
put(IndexMetadata.builder(indexMetadata).numberOfSearchReplicas(numberOfSearchReplicas));
}
return this;
}

public Builder coordinationMetadata(CoordinationMetadata coordinationMetadata) {
this.coordinationMetadata = coordinationMetadata;
return this;
Expand Down
Loading

0 comments on commit a1b71a9

Please sign in to comment.