Skip to content

Commit

Permalink
Default back to pick_first policy (#979)
Browse files Browse the repository at this point in the history
* Default to pick_first policy with shuffled addresses

This offers an improvement over the default "pick_first" policy which always
picks the first host returned by getaddrinfo, which may not always be random
(see linked documentation in `XdsChannelFactory`). The default policy used to be
"round_robin" but this makes the load balancer eagerly create a connection to
each xDS server, instead of only the one it is talking to. This is starting to
cause significant load on the observer instances.

* Bump version, update CL

* CL

* Add log for control plane identifier

* Match log message to others

* Bump gRPC

* Teeny fix in log message

* Bump setup-java

* Go back to gRPC defaults when parameters are null

* Address comments
  • Loading branch information
PapaCharlie authored Feb 15, 2024
1 parent e66f2be commit 48f38b5
Show file tree
Hide file tree
Showing 9 changed files with 83 additions and 15 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ jobs:
with:
# Need to fetch 2 commits for the PR (base commit and head merge commit) so we can compute the diff
fetch-depth: 2
- uses: actions/setup-java@v2
- uses: actions/setup-java@v4
with:
distribution: zulu
java-version: ${{ matrix.java }}
cache: gradle
- run: ./.github/scripts/build.sh
- run: ./.github/scripts/build.sh
2 changes: 1 addition & 1 deletion .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
name: Java ${{ matrix.java }}
steps:
- uses: actions/checkout@v2
- uses: actions/setup-java@v2
- uses: actions/setup-java@v4
with:
distribution: zulu
java-version: ${{ matrix.java }}
Expand Down
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ and what APIs have changed, if applicable.

## [Unreleased]

## [29.51.1] - 2024-02-13
- Default back to pick_first policy

## [29.51.0] - 2024-02-06
- Minor version bump due to dropping support to Gradle versions below 6.9.4.
- Make rest.li codebase use Gradle 6.9.4 to build itself
Expand Down Expand Up @@ -5632,7 +5635,8 @@ patch operations can re-use these classes for generating patch messages.

## [0.14.1]

[Unreleased]: https://github.com/linkedin/rest.li/compare/v29.51.0...master
[Unreleased]: https://github.com/linkedin/rest.li/compare/v29.51.1...master
[29.51.1]: https://github.com/linkedin/rest.li/compare/v29.51.0...v29.51.1
[29.51.0]: https://github.com/linkedin/rest.li/compare/v29.50.1...v29.51.0
[29.50.1]: https://github.com/linkedin/rest.li/compare/v29.50.0...v29.50.1
[29.50.0]: https://github.com/linkedin/rest.li/compare/v29.49.9...v29.50.0
Expand Down
14 changes: 13 additions & 1 deletion d2/src/main/java/com/linkedin/d2/balancer/D2ClientBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,9 @@ public D2Client build()
_config.dualReadStateManager,
_config.xdsExecutorService,
_config.xdsStreamReadyTimeout,
_config.dualReadNewLbExecutor
_config.dualReadNewLbExecutor,
_config.xdsChannelLoadBalancingPolicy,
_config.xdsChannelLoadBalancingPolicyConfig
);

final LoadBalancerWithFacilitiesFactory loadBalancerFactory = (_config.lbWithFacilitiesFactory == null) ?
Expand Down Expand Up @@ -699,6 +701,16 @@ public D2ClientBuilder setXdsStreamReadyTimeout(long xdsStreamReadyTimeout) {
return this;
}

public D2ClientBuilder setXdsChannelLoadBalancingPolicy(String xdsChannelLoadBalancingPolicy) {
_config.xdsChannelLoadBalancingPolicy = xdsChannelLoadBalancingPolicy;
return this;
}

public D2ClientBuilder xdsChannelLoadBalancingPolicyConfig(Map<String, ?> xdsChannelLoadBalancingPolicyConfig) {
_config.xdsChannelLoadBalancingPolicyConfig = xdsChannelLoadBalancingPolicyConfig;
return this;
}

private Map<String, TransportClientFactory> createDefaultTransportClientFactories()
{
final Map<String, TransportClientFactory> clientFactories = new HashMap<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,8 @@ public class D2ClientConfig
public ScheduledExecutorService xdsExecutorService = null;
public Long xdsStreamReadyTimeout = null;
public ExecutorService dualReadNewLbExecutor = null;
public String xdsChannelLoadBalancingPolicy = null;
public Map<String, ?> xdsChannelLoadBalancingPolicyConfig = null;

public D2ClientConfig()
{
Expand Down Expand Up @@ -204,7 +206,9 @@ public D2ClientConfig()
DualReadStateManager dualReadStateManager,
ScheduledExecutorService xdsExecutorService,
Long xdsStreamReadyTimeout,
ExecutorService dualReadNewLbExecutor)
ExecutorService dualReadNewLbExecutor,
String xdsChannelLoadBalancingPolicy,
Map<String, ?> xdsChannelLoadBalancingPolicyConfig)
{
this.zkHosts = zkHosts;
this.xdsServer = xdsServer;
Expand Down Expand Up @@ -274,5 +278,7 @@ public D2ClientConfig()
this.xdsExecutorService = xdsExecutorService;
this.xdsStreamReadyTimeout = xdsStreamReadyTimeout;
this.dualReadNewLbExecutor = dualReadNewLbExecutor;
this.xdsChannelLoadBalancingPolicy = xdsChannelLoadBalancingPolicy;
this.xdsChannelLoadBalancingPolicyConfig = xdsChannelLoadBalancingPolicyConfig;
}
}
42 changes: 37 additions & 5 deletions d2/src/main/java/com/linkedin/d2/xds/XdsChannelFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,29 +20,42 @@
import io.grpc.internal.GrpcUtil;
import io.grpc.netty.shaded.io.grpc.netty.NettyChannelBuilder;
import io.grpc.netty.shaded.io.netty.handler.ssl.SslContext;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import javax.annotation.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static java.util.Collections.singletonList;
import static java.util.Collections.singletonMap;


public class XdsChannelFactory
{
private static final Logger _log = LoggerFactory.getLogger(XdsChannelFactory.class);

public static final String ROUND_ROBIN_POLICY = "round_robin";

private final SslContext _sslContext;
private final String _xdsServerUri;
@Nullable
private final String _defaultLoadBalancingPolicy;
@Nullable
private final Map<String, ?> _loadBalancingPolicyConfig;

/**
* Invokes alternative constructor with {@code defaultLoadBalancingPolicy} as {@value ROUND_ROBIN_POLICY}.
* Invokes alternative constructor with {@code defaultLoadBalancingPolicy} and {@code loadBalancingPolicyConfig} as
* {@code null}.
*/
public XdsChannelFactory(SslContext sslContext, String xdsServerUri)
{
this(sslContext, xdsServerUri, ROUND_ROBIN_POLICY);
this(sslContext, xdsServerUri, null, null);
}

/**
* Invokes alternative constructor with {@code loadBalancingPolicyConfig} as {@code null}.
*/
public XdsChannelFactory(SslContext sslContext, String xdsServerUri, @Nullable String defaultLoadBalancingPolicy)
{
this(sslContext, xdsServerUri, defaultLoadBalancingPolicy, null);
}

/**
Expand All @@ -53,17 +66,27 @@ public XdsChannelFactory(SslContext sslContext, String xdsServerUri)
* @param defaultLoadBalancingPolicy If provided, changes the default load balancing policy on the builder to the
* given policy (see
* {@link io.grpc.ManagedChannelBuilder#defaultLoadBalancingPolicy(String)}).
* @param loadBalancingPolicyConfig Can only be provided if {@code defaultLoadBalancingPolicy} is provided. Will be
* provided to {@link io.grpc.ManagedChannelBuilder#defaultServiceConfig(Map)}})
* after being wrapped in a "loadBalancingConfig" JSON context that corresponds
* to the load balancing policy name provided by {@code defaultLoadBalancingPolicy}.
* @see <a href="https://daniel.haxx.se/blog/2012/01/03/getaddrinfo-with-round-robin-dns-and-happy-eyeballs/"/>
* Details on IPv6 routing.
*/
public XdsChannelFactory(
@Nullable SslContext sslContext,
String xdsServerUri,
@Nullable String defaultLoadBalancingPolicy)
@Nullable String defaultLoadBalancingPolicy,
@Nullable Map<String, ?> loadBalancingPolicyConfig)
{
_sslContext = sslContext;
_xdsServerUri = xdsServerUri;
if (defaultLoadBalancingPolicy == null && loadBalancingPolicyConfig != null)
{
_log.warn("loadBalancingPolicyConfig ignored because defaultLoadBalancingPolicy was not provided.");
}
_defaultLoadBalancingPolicy = defaultLoadBalancingPolicy;
_loadBalancingPolicyConfig = loadBalancingPolicyConfig;
}

public ManagedChannel createChannel()
Expand All @@ -79,6 +102,15 @@ public ManagedChannel createChannel()
{
_log.info("Applying custom load balancing policy for xDS channel: {}", _defaultLoadBalancingPolicy);
builder = builder.defaultLoadBalancingPolicy(_defaultLoadBalancingPolicy);

if (_loadBalancingPolicyConfig != null)
{
_log.info("Applying custom load balancing config for xDS channel: {}", _loadBalancingPolicyConfig);
builder = builder
.defaultServiceConfig(
singletonMap("loadBalancingConfig",
singletonList(singletonMap(_defaultLoadBalancingPolicy, _loadBalancingPolicyConfig))));
}
}

if (_sslContext != null)
Expand Down
19 changes: 17 additions & 2 deletions d2/src/main/java/com/linkedin/d2/xds/XdsClientImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package com.linkedin.d2.xds;

import com.google.common.base.Joiner;
import com.google.common.base.Strings;
import com.google.protobuf.InvalidProtocolBufferException;
import com.google.rpc.Code;
import com.linkedin.d2.jmx.XdsClientJmx;
Expand Down Expand Up @@ -444,18 +445,22 @@ private static final class DiscoveryResponseData
private final ResourceType _resourceType;
private final List<Resource> _resources;
private final String _nonce;
@Nullable
private final String _controlPlaneIdentifier;

DiscoveryResponseData(ResourceType resourceType, List<Resource> resources, String nonce)
DiscoveryResponseData(ResourceType resourceType, List<Resource> resources, String nonce,
@Nullable String controlPlaneIdentifier)
{
_resourceType = resourceType;
_resources = resources;
_nonce = nonce;
_controlPlaneIdentifier = controlPlaneIdentifier;
}

static DiscoveryResponseData fromEnvoyProto(DeltaDiscoveryResponse proto)
{
return new DiscoveryResponseData(ResourceType.fromTypeUrl(proto.getTypeUrl()), proto.getResourcesList(),
proto.getNonce());
proto.getNonce(), Strings.emptyToNull(proto.getControlPlane().getIdentifier()));
}

ResourceType getResourceType()
Expand All @@ -473,6 +478,12 @@ String getNonce()
return _nonce;
}

@Nullable
String getControlPlaneIdentifier()
{
return _controlPlaneIdentifier;
}

@Override
public String toString()
{
Expand Down Expand Up @@ -614,6 +625,10 @@ private void handleResponse(DiscoveryResponseData response)
{
return;
}
if (!_responseReceived && response.getControlPlaneIdentifier() != null)
{
_log.info("Successfully established stream with ADS server: {}", response.getControlPlaneIdentifier());
}
_responseReceived = true;
String respNonce = response.getNonce();
ResourceType resourceType = response.getResourceType();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down
2 changes: 1 addition & 1 deletion gradle.properties
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version=29.51.0
version=29.51.1
group=com.linkedin.pegasus
org.gradle.configureondemand=true
org.gradle.parallel=true
Expand Down

0 comments on commit 48f38b5

Please sign in to comment.