diff --git a/Microsoft.Azure.Cosmos/src/ConnectionPolicy.cs b/Microsoft.Azure.Cosmos/src/ConnectionPolicy.cs index b54c4837c4..1e35ebf4c9 100644 --- a/Microsoft.Azure.Cosmos/src/ConnectionPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/ConnectionPolicy.cs @@ -506,6 +506,7 @@ public bool? EnableAdvancedReplicaSelectionForTcp set; } + /// /// (Direct/TCP) This is an advanced setting that controls the number of TCP connections that will be opened eagerly to each Cosmos DB back-end. /// @@ -530,6 +531,15 @@ internal CosmosClientTelemetryOptions CosmosClientTelemetryOptions set; } + /// + /// provides SessionTokenMismatchRetryPolicy optimization through customer supplied region switch hints + /// + internal SessionRetryOptions SessionRetryOptions + { + get; + set; + } + /// /// GlobalEndpointManager will subscribe to this event if user updates the preferredLocations list in the Azure Cosmos DB service. /// diff --git a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs index a5c945ff1f..0b40813fd9 100644 --- a/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs +++ b/Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs @@ -53,7 +53,7 @@ public class CosmosClientOptions private const string ConnectionStringAccountKey = "AccountKey"; private const string ConnectionStringDisableServerCertificateValidation = "DisableServerCertificateValidation"; - private const ApiType DefaultApiType = ApiType.None; + private const ApiType DefaultApiType = ApiType.None; /// /// Default request timeout @@ -65,14 +65,15 @@ public class CosmosClientOptions private ConnectionMode connectionMode; private Protocol connectionProtocol; - private TimeSpan? idleTcpConnectionTimeout; + private TimeSpan? idleTcpConnectionTimeout; private TimeSpan? openTcpConnectionTimeout; private int? maxRequestsPerTcpConnection; private int? maxTcpConnectionsPerEndpoint; private PortReuseMode? portReuseMode; private IWebProxy webProxy; private Func httpClientFactory; - private string applicationName; + private string applicationName; + private SessionRetryOptions sessionRetryOptions; /// /// Creates a new CosmosClientOptions @@ -86,7 +87,8 @@ public CosmosClientOptions() this.ConnectionProtocol = CosmosClientOptions.DefaultProtocol; this.ApiType = CosmosClientOptions.DefaultApiType; this.CustomHandlers = new Collection(); - this.CosmosClientTelemetryOptions = new CosmosClientTelemetryOptions(); + this.CosmosClientTelemetryOptions = new CosmosClientTelemetryOptions(); + this.sessionRetryOptions = new SessionRetryOptions(); } /// @@ -315,8 +317,8 @@ public ConnectionMode ConnectionMode /// This can be used to weaken the database account consistency level for read operations. /// If this is not set the database account consistency level will be used for all requests. /// - public ConsistencyLevel? ConsistencyLevel { get; set; } - + public ConsistencyLevel? ConsistencyLevel { get; set; } + /// /// Sets the priority level for requests created using cosmos client. /// @@ -460,8 +462,8 @@ public TimeSpan? IdleTcpConnectionTimeout this.idleTcpConnectionTimeout = value; this.ValidateDirectTCPSettings(); } - } - + } + /// /// (Direct/TCP) Controls the amount of time allowed for trying to establish a connection. /// @@ -726,7 +728,39 @@ public Func HttpClientFactory internal #endif AvailabilityStrategy AvailabilityStrategy { get; set; } - + + /// + /// provides SessionTokenMismatchRetryPolicy optimization through customer supplied region switch hints + /// + internal SessionRetryOptions SessionRetryOptions + { + get => this.sessionRetryOptions; + set + { + if (value.RemoteRegionPreferred) + { + + if (value.MinInRegionRetryTime == null) + { + throw new ArgumentException($" Argument 'MinInRegionRetryTime' must not be null when RemoteRegionPreferred option is selected."); + } + + if (value.MinInRegionRetryTime.TotalMilliseconds < ConfigurationManager.MinMinInRegionRetryTimeForWritesInMs) + { + throw new ArgumentException($" Argument 'MinInRegionRetryTime' in the SessionRetryOptions must be set and have at least a value of " + + "{ConfigurationManager.MinMinInRegionRetryTimeForWritesInMs} ms"); + } + + if (value.MaxInRegionRetryCount < ConfigurationManager.MinMaxRetriesInLocalRegionWhenRemoteRegionPreferred) + { + throw new ArgumentException($" Argument 'MaxInRegionRetryCount' in the SessionRetryOptions must have at least a value of " + + "{ConfigurationManager.MinMaxRetriesInLocalRegionWhenRemoteRegionPreferred}"); + } + } + + this.sessionRetryOptions = value; + } + } /// /// Enable partition key level failover /// @@ -919,7 +953,7 @@ internal virtual ConnectionPolicy GetConnectionPolicy(int clientId) this.ValidateDirectTCPSettings(); this.ValidateLimitToEndpointSettings(); this.ValidatePartitionLevelFailoverSettings(); - this.ValidateAvailabilityStrategy(); + this.ValidateAvailabilityStrategy(); ConnectionPolicy connectionPolicy = new ConnectionPolicy() { @@ -929,7 +963,8 @@ internal virtual ConnectionPolicy GetConnectionPolicy(int clientId) ConnectionProtocol = this.ConnectionProtocol, UserAgentContainer = this.CreateUserAgentContainerWithFeatures(clientId), UseMultipleWriteLocations = true, - IdleTcpConnectionTimeout = this.IdleTcpConnectionTimeout, + IdleTcpConnectionTimeout = this.IdleTcpConnectionTimeout, + SessionRetryOptions = this.SessionRetryOptions, OpenTcpConnectionTimeout = this.OpenTcpConnectionTimeout, MaxRequestsPerTcpConnection = this.MaxRequestsPerTcpConnection, MaxTcpConnectionsPerEndpoint = this.MaxTcpConnectionsPerEndpoint, @@ -940,7 +975,7 @@ internal virtual ConnectionPolicy GetConnectionPolicy(int clientId) EnableAdvancedReplicaSelectionForTcp = this.EnableAdvancedReplicaSelectionForTcp, HttpClientFactory = this.httpClientFactory, ServerCertificateCustomValidationCallback = this.ServerCertificateCustomValidationCallback, - CosmosClientTelemetryOptions = new CosmosClientTelemetryOptions() + CosmosClientTelemetryOptions = new CosmosClientTelemetryOptions() }; if (this.CosmosClientTelemetryOptions != null) @@ -1105,7 +1140,8 @@ private void ValidateAvailabilityStrategy() { throw new ArgumentException($"{nameof(this.ApplicationPreferredRegions)} or {nameof(this.ApplicationRegion)} must be set to use {nameof(this.AvailabilityStrategy)}"); } - } + } + private void ValidateDirectTCPSettings() { diff --git a/Microsoft.Azure.Cosmos/src/DocumentClient.cs b/Microsoft.Azure.Cosmos/src/DocumentClient.cs index 946fa4cb08..4b1e99c4ad 100644 --- a/Microsoft.Azure.Cosmos/src/DocumentClient.cs +++ b/Microsoft.Azure.Cosmos/src/DocumentClient.cs @@ -6723,7 +6723,8 @@ private void CreateStoreModel(bool subscribeRntbdStatus) !this.enableRntbdChannel, this.UseMultipleWriteLocations && (this.accountServiceConfiguration.DefaultConsistencyLevel != Documents.ConsistencyLevel.Strong), true, - enableReplicaValidation: this.isReplicaAddressValidationEnabled); + enableReplicaValidation: this.isReplicaAddressValidationEnabled, + this.ConnectionPolicy.SessionRetryOptions); if (subscribeRntbdStatus) { diff --git a/Microsoft.Azure.Cosmos/src/Fluent/CosmosClientBuilder.cs b/Microsoft.Azure.Cosmos/src/Fluent/CosmosClientBuilder.cs index 3182e17192..43ee42d8df 100644 --- a/Microsoft.Azure.Cosmos/src/Fluent/CosmosClientBuilder.cs +++ b/Microsoft.Azure.Cosmos/src/Fluent/CosmosClientBuilder.cs @@ -580,7 +580,20 @@ public CosmosClientBuilder WithSerializerOptions(CosmosSerializationOptions cosm { this.clientOptions.SerializerOptions = cosmosSerializerOptions; return this; - } + } + + /// + /// provides SessionTokenMismatchRetryPolicy optimization through customer supplied region switch hints + /// + /// customer supplied region switch hints + /// The object + public CosmosClientBuilder WithSessionRetryOptions(SessionRetryOptions sessionRetryOptions) + { + this.clientOptions.SessionRetryOptions = sessionRetryOptions; + return this; + } + + /// /// Set a custom JSON serializer. diff --git a/Microsoft.Azure.Cosmos/src/RMResources.Designer.cs b/Microsoft.Azure.Cosmos/src/RMResources.Designer.cs index e79ff1bcbb..bad6de32dc 100644 --- a/Microsoft.Azure.Cosmos/src/RMResources.Designer.cs +++ b/Microsoft.Azure.Cosmos/src/RMResources.Designer.cs @@ -46,7 +46,7 @@ internal RMResources() if (object.ReferenceEquals(resourceMan, null)) { #if COSMOSCLIENT - global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("Microsoft.Azure.Documents.RMResources", typeof(RMResources).Assembly); + global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("Microsoft.Azure.Cosmos.RMResources", typeof(RMResources).Assembly); #else global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("Microsoft.Azure.Documents.RMResources", typeof(RMResources).GetAssembly()); #endif diff --git a/Microsoft.Azure.Cosmos/src/RequestOptions/RequestOptions.cs b/Microsoft.Azure.Cosmos/src/RequestOptions/RequestOptions.cs index 7555c6b78d..ee6adfde2a 100644 --- a/Microsoft.Azure.Cosmos/src/RequestOptions/RequestOptions.cs +++ b/Microsoft.Azure.Cosmos/src/RequestOptions/RequestOptions.cs @@ -82,7 +82,7 @@ public class RequestOptions internal #endif AvailabilityStrategy AvailabilityStrategy { get; set; } - + /// /// Gets or sets the boolean to use effective partition key routing in the cosmos db request. /// diff --git a/Microsoft.Azure.Cosmos/src/SessionRetryOptions.cs b/Microsoft.Azure.Cosmos/src/SessionRetryOptions.cs new file mode 100644 index 0000000000..2b36db8bdc --- /dev/null +++ b/Microsoft.Azure.Cosmos/src/SessionRetryOptions.cs @@ -0,0 +1,33 @@ +namespace Microsoft.Azure.Cosmos +{ + using System; + using Microsoft.Azure.Documents; + + /// + /// Implementation of ISessionRetryOptions interface, do not want clients to subclass. + /// + public sealed class SessionRetryOptions : ISessionRetryOptions + { + /// + /// Sets the minimum retry time for 404/1002 retries within each region for read and write operations. + /// The minimum value is 100ms - this minimum is enforced to provide a way for the local region to catch-up on replication lag. The default value is 500ms - as a recommendation ensure that this value is higher than the steady-state + /// replication latency between the regions you chose + /// + public TimeSpan MinInRegionRetryTime { get; set; } = ConfigurationManager.GetMinRetryTimeInLocalRegionWhenRemoteRegionPreferred(); + + /// + /// Sets the maximum number of retries within each region for read and write operations. The minimum value is 1 - the backoff time for the last in-region retry will ensure that the total retry time within the + /// region is at least the min. in-region retry time. + /// + public int MaxInRegionRetryCount { get; set; } = ConfigurationManager.GetMaxRetriesInLocalRegionWhenRemoteRegionPreferred(); + + + /// + /// hints which guide SDK-internal retry policies on how early to switch retries to a different region. + /// + public Boolean RemoteRegionPreferred { get; set; } = false; + + + + } +} diff --git a/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs b/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs index bffb8b93f9..3dd50d15b4 100644 --- a/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs +++ b/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs @@ -35,7 +35,25 @@ internal static class ConfigurationManager /// /// Environment variable name to enable distributed query gateway mode. /// - internal static readonly string DistributedQueryGatewayModeEnabled = "AZURE_COSMOS_DISTRIBUTED_QUERY_GATEWAY_ENABLED"; + internal static readonly string DistributedQueryGatewayModeEnabled = "AZURE_COSMOS_DISTRIBUTED_QUERY_GATEWAY_ENABLED"; + + + /// + /// intent is If a client specify a value, we will force it to be atleast 100ms, otherwise default is going to be 500ms + /// + internal static readonly string MinInRegionRetryTimeForWritesInMs = "AZURE_COSMOS_SESSION_TOKEN_MISMATCH_IN_REGION_RETRY_TIME_IN_MILLISECONDS"; + internal static readonly int DefaultMinInRegionRetryTimeForWritesInMs = 500; + internal static readonly int MinMinInRegionRetryTimeForWritesInMs = 100; + + + /// + /// intent is If a client specify a value, we will force it to be atleast 1, otherwise default is going to be 1(right now both the values are 1 but we have the provision to change them in future). + /// + internal static readonly string MaxRetriesInLocalRegionWhenRemoteRegionPreferred = "AZURE_COSMOS_MAX_RETRIES_IN_LOCAL_REGION_WHEN_REMOTE_REGION_PREFERRED"; + internal static readonly int DefaultMaxRetriesInLocalRegionWhenRemoteRegionPreferred = 1; + internal static readonly int MinMaxRetriesInLocalRegionWhenRemoteRegionPreferred = 1; + + public static T GetEnvironmentVariable(string variable, T defaultValue) { @@ -124,6 +142,34 @@ public static bool IsDistributedQueryGatewayModeEnabled( .GetEnvironmentVariable( variable: DistributedQueryGatewayModeEnabled, defaultValue: defaultValue); - } + } + + + public static int GetMaxRetriesInLocalRegionWhenRemoteRegionPreferred() + { + return Math.Max( + ConfigurationManager + .GetEnvironmentVariable( + variable: MaxRetriesInLocalRegionWhenRemoteRegionPreferred, + defaultValue: DefaultMaxRetriesInLocalRegionWhenRemoteRegionPreferred), + MinMaxRetriesInLocalRegionWhenRemoteRegionPreferred); + + + } + + public static TimeSpan GetMinRetryTimeInLocalRegionWhenRemoteRegionPreferred() + { + + return TimeSpan.FromMilliseconds(Math.Max( + ConfigurationManager + .GetEnvironmentVariable( + variable: MinInRegionRetryTimeForWritesInMs, + defaultValue: DefaultMinInRegionRetryTimeForWritesInMs), + MinMinInRegionRetryTimeForWritesInMs)); + + + } + + } } diff --git a/Microsoft.Azure.Cosmos/src/direct/ConsistencyReader.cs b/Microsoft.Azure.Cosmos/src/direct/ConsistencyReader.cs index 746bc7e743..7bf475e4bf 100644 --- a/Microsoft.Azure.Cosmos/src/direct/ConsistencyReader.cs +++ b/Microsoft.Azure.Cosmos/src/direct/ConsistencyReader.cs @@ -9,6 +9,7 @@ namespace Microsoft.Azure.Documents using System.Net; using System.Threading; using System.Threading.Tasks; + using Microsoft.Azure.Cosmos; using Microsoft.Azure.Cosmos.Core.Trace; using Microsoft.Azure.Documents.Collections; @@ -138,6 +139,7 @@ internal sealed class ConsistencyReader private readonly IAuthorizationTokenProvider authorizationTokenProvider; private readonly StoreReader storeReader; private readonly QuorumReader quorumReader; + private readonly ISessionRetryOptions sessionRetryOptions; public ConsistencyReader( AddressSelector addressSelector, @@ -145,11 +147,13 @@ public ConsistencyReader( TransportClient transportClient, IServiceConfigurationReader serviceConfigReader, IAuthorizationTokenProvider authorizationTokenProvider, - bool enableReplicaValidation) + bool enableReplicaValidation, + ISessionRetryOptions sessionRetryOptions = null) { this.addressSelector = addressSelector; this.serviceConfigReader = serviceConfigReader; this.authorizationTokenProvider = authorizationTokenProvider; + this.sessionRetryOptions = sessionRetryOptions; this.storeReader = new StoreReader(transportClient, addressSelector, new AddressEnumerator(), sessionContainer, enableReplicaValidation); this.quorumReader = new QuorumReader(transportClient, addressSelector, this.storeReader, serviceConfigReader, authorizationTokenProvider); } @@ -233,7 +237,8 @@ public Task ReadAsync( { return BackoffRetryUtility.ExecuteAsync( callbackMethod: () => this.ReadSessionAsync(entity, desiredReadMode), - retryPolicy: new SessionTokenMismatchRetryPolicy(), + retryPolicy: new SessionTokenMismatchRetryPolicy( + sessionRetryOptions: this.sessionRetryOptions), cancellationToken: cancellationToken); } else diff --git a/Microsoft.Azure.Cosmos/src/direct/ConsistencyWriter.cs b/Microsoft.Azure.Cosmos/src/direct/ConsistencyWriter.cs index 180ad3fa84..d27cd959d6 100644 --- a/Microsoft.Azure.Cosmos/src/direct/ConsistencyWriter.cs +++ b/Microsoft.Azure.Cosmos/src/direct/ConsistencyWriter.cs @@ -11,6 +11,7 @@ namespace Microsoft.Azure.Documents using System.Linq; using System.Threading; using System.Threading.Tasks; + using Microsoft.Azure.Cosmos; using Microsoft.Azure.Cosmos.Core.Trace; /* @@ -57,6 +58,7 @@ internal sealed class ConsistencyWriter private readonly IServiceConfigurationReader serviceConfigReader; private readonly IAuthorizationTokenProvider authorizationTokenProvider; private readonly bool useMultipleWriteLocations; + private readonly ISessionRetryOptions sessionRetryOptions; public ConsistencyWriter( AddressSelector addressSelector, @@ -65,7 +67,8 @@ public ConsistencyWriter( IServiceConfigurationReader serviceConfigReader, IAuthorizationTokenProvider authorizationTokenProvider, bool useMultipleWriteLocations, - bool enableReplicaValidation) + bool enableReplicaValidation, + ISessionRetryOptions sessionRetryOptions = null) { this.transportClient = transportClient; this.addressSelector = addressSelector; @@ -73,6 +76,7 @@ public ConsistencyWriter( this.serviceConfigReader = serviceConfigReader; this.authorizationTokenProvider = authorizationTokenProvider; this.useMultipleWriteLocations = useMultipleWriteLocations; + this.sessionRetryOptions = sessionRetryOptions; this.storeReader = new StoreReader( transportClient, addressSelector, @@ -130,7 +134,8 @@ public async Task WriteAsync( { return await BackoffRetryUtility.ExecuteAsync( callbackMethod: () => this.WritePrivateAsync(entity, timeout, forceRefresh), - retryPolicy: new SessionTokenMismatchRetryPolicy(), + retryPolicy: new SessionTokenMismatchRetryPolicy( + sessionRetryOptions: this.sessionRetryOptions), cancellationToken: cancellationToken); } finally diff --git a/Microsoft.Azure.Cosmos/src/direct/ISessionRetryOptions.cs b/Microsoft.Azure.Cosmos/src/direct/ISessionRetryOptions.cs new file mode 100644 index 0000000000..17481dc0b4 --- /dev/null +++ b/Microsoft.Azure.Cosmos/src/direct/ISessionRetryOptions.cs @@ -0,0 +1,33 @@ +namespace Microsoft.Azure.Documents +{ + using System; + + internal interface ISessionRetryOptions + { + + /// + /// Sets the minimum retry time for 404/1002 retries within each region for read and write operations. + /// The minimum value is 100ms - this minimum is enforced to provide a way for the local region to catch-up on replication lag. The default value is 500ms - as a recommendation ensure that this value is higher than the steady-state + /// replication latency between the regions you chose + /// + TimeSpan MinInRegionRetryTime { get; } + + /// + /// Sets the maximum number of retries within each region for read and write operations. The minimum value is 1 - the backoff time for the last in-region retry will ensure that the total retry time within the + /// region is at least the min. in-region retry time. + /// + int MaxInRegionRetryCount { get; } + + + /// + /// hints which guide SDK-internal retry policies on how early to switch retries to a different region. + /// + Boolean RemoteRegionPreferred { get;} + + + + } +} + + + diff --git a/Microsoft.Azure.Cosmos/src/direct/IStoreClientFactory.cs b/Microsoft.Azure.Cosmos/src/direct/IStoreClientFactory.cs index dd3d02fd01..8a39665736 100644 --- a/Microsoft.Azure.Cosmos/src/direct/IStoreClientFactory.cs +++ b/Microsoft.Azure.Cosmos/src/direct/IStoreClientFactory.cs @@ -5,6 +5,7 @@ namespace Microsoft.Azure.Documents { using System; + using Microsoft.Azure.Cosmos; internal interface IStoreClientFactory: IDisposable { @@ -18,6 +19,8 @@ StoreClient CreateStoreClient( bool useFallbackClient = true, bool useMultipleWriteLocations = false, bool detectClientConnectivityIssues = false, - bool enableReplicaValidation = false); + bool enableReplicaValidation = false, + ISessionRetryOptions sessionRetryOptions = null + ); } } diff --git a/Microsoft.Azure.Cosmos/src/direct/ReplicatedResourceClient.cs b/Microsoft.Azure.Cosmos/src/direct/ReplicatedResourceClient.cs index 7e19472e83..01c9ee7f71 100644 --- a/Microsoft.Azure.Cosmos/src/direct/ReplicatedResourceClient.cs +++ b/Microsoft.Azure.Cosmos/src/direct/ReplicatedResourceClient.cs @@ -7,6 +7,7 @@ namespace Microsoft.Azure.Documents using System.Globalization; using System.Threading; using System.Threading.Tasks; + using Microsoft.Azure.Cosmos; using Microsoft.Azure.Cosmos.Core.Trace; using Microsoft.Azure.Documents.Client; @@ -70,7 +71,8 @@ public ReplicatedResourceClient( bool detectClientConnectivityIssues, bool disableRetryWithRetryPolicy, bool enableReplicaValidation, - RetryWithConfiguration retryWithConfiguration = null) + RetryWithConfiguration retryWithConfiguration = null, + ISessionRetryOptions sessionRetryOptions = null) { this.addressResolver = addressResolver; this.addressSelector = new AddressSelector(addressResolver, protocol); @@ -90,7 +92,8 @@ public ReplicatedResourceClient( transportClient, serviceConfigReader, authorizationTokenProvider, - enableReplicaValidation); + enableReplicaValidation, + sessionRetryOptions); this.consistencyWriter = new ConsistencyWriter( this.addressSelector, sessionContainer, @@ -98,7 +101,8 @@ public ReplicatedResourceClient( serviceConfigReader, authorizationTokenProvider, useMultipleWriteLocations, - enableReplicaValidation); + enableReplicaValidation, + sessionRetryOptions); this.enableReadRequestsFallback = enableReadRequestsFallback; this.useMultipleWriteLocations = useMultipleWriteLocations; this.detectClientConnectivityIssues = detectClientConnectivityIssues; diff --git a/Microsoft.Azure.Cosmos/src/direct/SessionTokenMismatchRetryPolicy.cs b/Microsoft.Azure.Cosmos/src/direct/SessionTokenMismatchRetryPolicy.cs index 3316c10791..cda1e203c9 100644 --- a/Microsoft.Azure.Cosmos/src/direct/SessionTokenMismatchRetryPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/direct/SessionTokenMismatchRetryPolicy.cs @@ -8,6 +8,8 @@ namespace Microsoft.Azure.Documents using System.Net; using System.Threading; using System.Threading.Tasks; + using HdrHistogram.Utilities; + using Microsoft.Azure.Cosmos; using Microsoft.Azure.Cosmos.Core.Trace; internal sealed class SessionTokenMismatchRetryPolicy : IRetryPolicy @@ -18,7 +20,10 @@ internal sealed class SessionTokenMismatchRetryPolicy : IRetryPolicy private const int defaultWaitTimeInMilliSeconds = 5000; private const int defaultInitialBackoffTimeInMilliseconds = 5; private const int defaultMaximumBackoffTimeInMilliseconds = 500; - private const int backoffMultiplier = 2; + private const int backoffMultiplier = 5; // before it was very aggressive + private readonly ISessionRetryOptions sessionRetryOptions; + private readonly DateTimeOffset startTime = DateTime.UtcNow; + private static readonly Lazy sessionRetryInitialBackoffConfig; private static readonly Lazy sessionRetryMaximumBackoffConfig; @@ -69,12 +74,14 @@ static SessionTokenMismatchRetryPolicy() }); } - public SessionTokenMismatchRetryPolicy(int waitTimeInMilliSeconds = defaultWaitTimeInMilliSeconds) + public SessionTokenMismatchRetryPolicy(int waitTimeInMilliSeconds = defaultWaitTimeInMilliSeconds, + ISessionRetryOptions sessionRetryOptions = null) { this.durationTimer.Start(); this.retryCount = 0; this.waitTimeInMilliSeconds = waitTimeInMilliSeconds; this.currentBackoffInMilliSeconds = null; + this.sessionRetryOptions = sessionRetryOptions; } public Task ShouldRetryAsync(Exception exception, CancellationToken cancellationToken) @@ -108,6 +115,13 @@ private ShouldRetryResult ShouldRetryInternalAsync( return ShouldRetryResult.NoRetry(); } + if (!this.shouldRetryLocally()) + { + DefaultTrace.TraceInformation("SessionTokenMismatchRetryPolicy not retrying because it a retry attempt for the current region and " + + "fallback to a different region is preferred "); + return ShouldRetryResult.NoRetry(); + } + TimeSpan backoffTime = TimeSpan.Zero; // Don't penalize first retry with delay @@ -130,6 +144,23 @@ private ShouldRetryResult ShouldRetryInternalAsync( } this.retryCount++; + + // For remote region preference ensure that the last retry is long enough (even when exceeding max backoff time) + // to consume the entire minRetryTimeInLocalRegion + if( this.sessionRetryOptions.RemoteRegionPreferred && + this.retryCount >= (this.sessionRetryOptions.MaxInRegionRetryCount - 1)) + { + + long elapsed = DateTimeOffset.Now.ToUnixTimeMilliseconds() - this.startTime.ToUnixTimeMilliseconds(); + TimeSpan remainingMinRetryTimeInLocalRegion = TimeSpan.FromMilliseconds(this.sessionRetryOptions.MinInRegionRetryTime.TotalMilliseconds - elapsed); + + if(remainingMinRetryTimeInLocalRegion.CompareTo(backoffTime) > 0) + { + backoffTime = remainingMinRetryTimeInLocalRegion; + } + + } + DefaultTrace.TraceInformation("SessionTokenMismatchRetryPolicy will retry. Retry count = {0}. Backoff time = {1} ms", this.retryCount, backoffTime.TotalMilliseconds); return ShouldRetryResult.RetryAfter(backoffTime); @@ -139,5 +170,24 @@ private ShouldRetryResult ShouldRetryInternalAsync( return ShouldRetryResult.NoRetry(); } + + private Boolean shouldRetryLocally() + { + if(! this.sessionRetryOptions.RemoteRegionPreferred) + { + return true; + } + + // SessionTokenMismatchRetryPolicy is invoked after 1 attempt on a region + // sessionTokenMismatchRetryAttempts increments only after shouldRetry triggers + // another attempt on the same region + // hence to curb the retry attempts on a region, + // compare sessionTokenMismatchRetryAttempts with max retry attempts allowed on the region - 1 + return this.retryCount <= (this.sessionRetryOptions.MaxInRegionRetryCount - 1); + + } + + + } } diff --git a/Microsoft.Azure.Cosmos/src/direct/StoreClient.cs b/Microsoft.Azure.Cosmos/src/direct/StoreClient.cs index 65ec7963fc..838d3cba00 100644 --- a/Microsoft.Azure.Cosmos/src/direct/StoreClient.cs +++ b/Microsoft.Azure.Cosmos/src/direct/StoreClient.cs @@ -8,6 +8,7 @@ namespace Microsoft.Azure.Documents using System.Net; using System.Threading; using System.Threading.Tasks; + using Microsoft.Azure.Cosmos; using Microsoft.Azure.Cosmos.Rntbd; using Microsoft.Azure.Documents.Client; using Microsoft.Azure.Documents.Collections; @@ -42,7 +43,8 @@ public StoreClient( bool detectClientConnectivityIssues = false, bool disableRetryWithRetryPolicy = false, bool enableReplicaValidation = false, - RetryWithConfiguration retryWithConfiguration = null) + RetryWithConfiguration retryWithConfiguration = null, + ISessionRetryOptions sessionRetryOptions = null) { this.transportClient = transportClient; this.serviceConfigurationReader = serviceConfigurationReader; @@ -70,7 +72,9 @@ public StoreClient( detectClientConnectivityIssues: detectClientConnectivityIssues, disableRetryWithRetryPolicy: disableRetryWithRetryPolicy, retryWithConfiguration: retryWithConfiguration, - enableReplicaValidation: enableReplicaValidation); + enableReplicaValidation: enableReplicaValidation, + sessionRetryOptions: sessionRetryOptions + ); } internal JsonSerializerSettings SerializerSettings { get; set; } diff --git a/Microsoft.Azure.Cosmos/src/direct/StoreClientFactory.cs b/Microsoft.Azure.Cosmos/src/direct/StoreClientFactory.cs index 3dbe550fa8..ecf522d6cc 100644 --- a/Microsoft.Azure.Cosmos/src/direct/StoreClientFactory.cs +++ b/Microsoft.Azure.Cosmos/src/direct/StoreClientFactory.cs @@ -8,6 +8,7 @@ namespace Microsoft.Azure.Documents using System.Diagnostics; using System.Net.Security; using System.Threading.Tasks; + using Microsoft.Azure.Cosmos; using Microsoft.Azure.Cosmos.Core.Trace; using Microsoft.Azure.Documents.Client; using Microsoft.Azure.Documents.FaultInjection; @@ -313,7 +314,9 @@ public StoreClient CreateStoreClient( bool useFallbackClient = true, bool useMultipleWriteLocations = false, bool detectClientConnectivityIssues = false, - bool enableReplicaValidation = false) + bool enableReplicaValidation = false, + ISessionRetryOptions sessionRetryOptions = null + ) { this.ThrowIfDisposed(); if (useFallbackClient && this.fallbackTransportClient != null) @@ -332,7 +335,8 @@ public StoreClient CreateStoreClient( detectClientConnectivityIssues: detectClientConnectivityIssues, disableRetryWithRetryPolicy: this.disableRetryWithRetryPolicy, retryWithConfiguration: this.retryWithConfiguration, - enableReplicaValidation: enableReplicaValidation); + enableReplicaValidation: enableReplicaValidation, + sessionRetryOptions: sessionRetryOptions); } return new StoreClient( @@ -348,7 +352,8 @@ public StoreClient CreateStoreClient( detectClientConnectivityIssues: detectClientConnectivityIssues, disableRetryWithRetryPolicy: this.disableRetryWithRetryPolicy, retryWithConfiguration: this.retryWithConfiguration, - enableReplicaValidation: enableReplicaValidation); + enableReplicaValidation: enableReplicaValidation, + sessionRetryOptions: sessionRetryOptions); } #region IDisposable @@ -382,7 +387,6 @@ private void ThrowIfDisposed() } } #endregion - private static void ValidatePortPoolReuseThreshold(ref int rntbdPortPoolReuseThreshold) { const int minRntbdPortPoolReuseThreshold = 32; diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/ClientTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/ClientTests.cs index 0600c7be85..b2e090b0c8 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/ClientTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/ClientTests.cs @@ -415,6 +415,7 @@ public async Task TestEtagOnUpsertOperationForDirectTCPClient() await this.TestEtagOnUpsertOperation(false, Protocol.Tcp); } + internal async Task TestEtagOnUpsertOperation(bool useGateway, Protocol protocol = Protocol.Tcp) { using (DocumentClient client = TestCommon.CreateClient(false, Protocol.Tcp)) @@ -936,7 +937,7 @@ public async Task MultiRegionAccountTest() AccountProperties properties = await cosmosClient.ReadAccountAsync(); Assert.IsNotNull(properties); } - + public static IReadOnlyList GetActiveConnections() { string testPid = Process.GetCurrentProcess().Id.ToString(); diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosAvailabilityStrategyTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosAvailabilityStrategyTests.cs index 0bcf55481c..893cacec6a 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosAvailabilityStrategyTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosAvailabilityStrategyTests.cs @@ -25,9 +25,6 @@ public class CosmosAvailabilityStrategyTests private const string centralUS = "Central US"; private const string northCentralUS = "North Central US"; private const string eastUs = "East US"; - private const string dbName = "availabilityStrategyTestDb"; - private const string containerName = "availabilityStrategyTestContainer"; - private const string changeFeedContainerName = "availabilityStrategyTestChangeFeedContainer"; private CosmosClient client; private Database database; @@ -208,8 +205,8 @@ public async Task AvailabilityStrategyNoTriggerTest() connectionString: this.connectionString, clientOptions: faultInjector.GetFaultInjectionClientOptions(clientOptions))) { - Database database = faultInjectionClient.GetDatabase(CosmosAvailabilityStrategyTests.dbName); - Container container = database.GetContainer(CosmosAvailabilityStrategyTests.containerName); + Database database = faultInjectionClient.GetDatabase(MultiRegionSetupHelpers.dbName); + Container container = database.GetContainer(MultiRegionSetupHelpers.containerName); responseDelay.Enable(); ItemResponse ir = await container.ReadItemAsync("testId", new PartitionKey("pk")); @@ -265,8 +262,8 @@ public async Task AvailabilityStrategyRequestOptionsTriggerTest() connectionString: this.connectionString, clientOptions: faultInjector.GetFaultInjectionClientOptions(clientOptions))) { - Database database = faultInjectionClient.GetDatabase(CosmosAvailabilityStrategyTests.dbName); - Container container = database.GetContainer(CosmosAvailabilityStrategyTests.containerName); + Database database = faultInjectionClient.GetDatabase(MultiRegionSetupHelpers.dbName); + Container container = database.GetContainer(MultiRegionSetupHelpers.containerName); responseDelay.Enable(); @@ -327,8 +324,8 @@ public async Task AvailabilityStrategyDisableOverideTest() connectionString: this.connectionString, clientOptions: faultInjector.GetFaultInjectionClientOptions(clientOptions))) { - Database database = faultInjectionClient.GetDatabase(CosmosAvailabilityStrategyTests.dbName); - Container container = database.GetContainer(CosmosAvailabilityStrategyTests.containerName); + Database database = faultInjectionClient.GetDatabase(MultiRegionSetupHelpers.dbName); + Container container = database.GetContainer(MultiRegionSetupHelpers.containerName); responseDelay.Enable(); ItemRequestOptions requestOptions = new ItemRequestOptions @@ -426,8 +423,8 @@ public async Task AvailabilityStrategyAllFaultsTests(string operation, string co connectionString: this.connectionString, clientOptions: faultInjector.GetFaultInjectionClientOptions(clientOptions))) { - Database database = faultInjectionClient.GetDatabase(CosmosAvailabilityStrategyTests.dbName); - Container container = database.GetContainer(CosmosAvailabilityStrategyTests.containerName); + Database database = faultInjectionClient.GetDatabase(MultiRegionSetupHelpers.dbName); + Container container = database.GetContainer(MultiRegionSetupHelpers.containerName); CosmosTraceDiagnostics traceDiagnostic; object hedgeContext; @@ -521,7 +518,7 @@ public async Task AvailabilityStrategyAllFaultsTests(string operation, string co break; case "ChangeFeed": - Container leaseContainer = database.GetContainer(CosmosAvailabilityStrategyTests.changeFeedContainerName); + Container leaseContainer = database.GetContainer(MultiRegionSetupHelpers.changeFeedContainerName); ChangeFeedProcessor changeFeedProcessor = container.GetChangeFeedProcessorBuilder( processorName: "AvialabilityStrategyTest", onChangesDelegate: HandleChangesAsync) @@ -607,8 +604,8 @@ public async Task AvailabilityStrategyStepTests(string operation, string condito connectionString: this.connectionString, clientOptions: faultInjector.GetFaultInjectionClientOptions(clientOptions))) { - Database database = faultInjectionClient.GetDatabase(CosmosAvailabilityStrategyTests.dbName); - Container container = database.GetContainer(CosmosAvailabilityStrategyTests.containerName); + Database database = faultInjectionClient.GetDatabase(MultiRegionSetupHelpers.dbName); + Container container = database.GetContainer(MultiRegionSetupHelpers.containerName); CosmosTraceDiagnostics traceDiagnostic; object hedgeContext; @@ -702,7 +699,7 @@ public async Task AvailabilityStrategyStepTests(string operation, string condito break; case "ChangeFeed": - Container leaseContainer = database.GetContainer(CosmosAvailabilityStrategyTests.changeFeedContainerName); + Container leaseContainer = database.GetContainer(MultiRegionSetupHelpers.changeFeedContainerName); ChangeFeedProcessor changeFeedProcessor = container.GetChangeFeedProcessorBuilder( processorName: "AvialabilityStrategyTest", onChangesDelegate: HandleChangesStepAsync) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosMultiRegionDiagnosticsTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosMultiRegionDiagnosticsTests.cs index 848af2adee..ada0e74b8b 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosMultiRegionDiagnosticsTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosMultiRegionDiagnosticsTests.cs @@ -14,8 +14,6 @@ [TestClass] public class CosmosMultiRegionDiagnosticsTests { - private const string dbName = "availabilityStrategyTestDb"; - private const string containerName = "availabilityStrategyTestContainer"; CosmosClient client; Database database; @@ -30,7 +28,7 @@ public async Task TestInitialize() this.client = new CosmosClient(this.connectionString); DatabaseResponse db = await this.client.CreateDatabaseIfNotExistsAsync( - id: CosmosMultiRegionDiagnosticsTests.dbName, + id: MultiRegionSetupHelpers.dbName, throughput: 400); this.database = db.Database; @@ -49,7 +47,7 @@ public void TestCleanup() [TestCategory("MultiRegion")] public async Task ExlcudeRegionDiagnosticsTest() { - this.container = this.database.GetContainer(CosmosMultiRegionDiagnosticsTests.containerName); + this.container = this.database.GetContainer(MultiRegionSetupHelpers.containerName); ItemResponse itemResponse = await this.container.ReadItemAsync( "testId", new Cosmos.PartitionKey("pk"), new ItemRequestOptions() @@ -69,7 +67,7 @@ public async Task ExlcudeRegionDiagnosticsTest() [TestCategory("MultiRegion")] public async Task ExcludeRegionWithReadManyDiagnosticsTest() { - this.container = this.database.GetContainer(CosmosMultiRegionDiagnosticsTests.containerName); + this.container = this.database.GetContainer(MultiRegionSetupHelpers.containerName); FeedResponse feedResonse = await this.container.ReadManyItemsAsync( new List<(string, PartitionKey)>() @@ -125,8 +123,8 @@ public async Task HedgeNestingDiagnosticsTest() connectionString: this.connectionString, clientOptions: faultInjector.GetFaultInjectionClientOptions(clientOptions))) { - Database database = faultInjectionClient.GetDatabase(CosmosMultiRegionDiagnosticsTests.dbName); - Container container = database.GetContainer(CosmosMultiRegionDiagnosticsTests.containerName); + Database database = faultInjectionClient.GetDatabase(MultiRegionSetupHelpers.dbName); + Container container = database.GetContainer(MultiRegionSetupHelpers.containerName); responseDelay.Enable(); diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SessionRetryOptionsTest.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SessionRetryOptionsTest.cs new file mode 100644 index 0000000000..6ead151367 --- /dev/null +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/SessionRetryOptionsTest.cs @@ -0,0 +1,493 @@ +namespace Microsoft.Azure.Cosmos.SDK.EmulatorTests +{ + using System; + using System.Collections.Generic; + using System.Data; + using System.Diagnostics; + using System.Linq; + using System.Net; + using System.Text.Json.Serialization; + using System.Threading.Tasks; + using Microsoft.Azure.Cosmos; + using Microsoft.Azure.Cosmos.FaultInjection; + using Microsoft.Azure.Cosmos.Spatial; + using Microsoft.VisualStudio.TestTools.UnitTesting; + using Container = Container; + + [TestClass] + public class SessionRetryOptionsTest + { + private string connectionString; + private IDictionary writeRegionMap; + + // to run code before running each test + [TestInitialize] + public async Task TestInitAsync() + { + this.connectionString = ConfigurationManager.GetEnvironmentVariable("COSMOSDB_MULTI_REGION", null); + if (string.IsNullOrEmpty(this.connectionString)) + { + Assert.Fail("Set environment variable COSMOSDB_MULTI_REGION to run the tests"); + } + + CosmosClient client = new CosmosClient(this.connectionString); + await MultiRegionSetupHelpers.GetOrCreateMultiRegionDatabaseAndContainers(client); + this.writeRegionMap = client.DocumentClient.GlobalEndpointManager.GetAvailableWriteEndpointsByLocation(); + Assert.IsTrue(this.writeRegionMap.Count() >= 2); + + } + + [TestMethod] + public void SessionRetryOptionsDefaultValuesTest() + { + CosmosClientOptions clientOptions = new CosmosClientOptions() + { + SessionRetryOptions = new SessionRetryOptions() + { + RemoteRegionPreferred = true + }, + }; + + + Assert.IsTrue(clientOptions.SessionRetryOptions.MinInRegionRetryTime == ConfigurationManager.GetMinRetryTimeInLocalRegionWhenRemoteRegionPreferred()); + Assert.IsTrue(clientOptions.SessionRetryOptions.MaxInRegionRetryCount == ConfigurationManager.GetMaxRetriesInLocalRegionWhenRemoteRegionPreferred()); + + } + + [TestMethod] + public void SessionRetryOptionsCustomValuesTest() + { + CosmosClientOptions clientOptions = new CosmosClientOptions() + { + SessionRetryOptions = new SessionRetryOptions() + { + RemoteRegionPreferred = true, + MinInRegionRetryTime = TimeSpan.FromSeconds(1), + MaxInRegionRetryCount = 3 + + }, + }; + + Assert.IsTrue(clientOptions.SessionRetryOptions.MinInRegionRetryTime == TimeSpan.FromSeconds(1)); + Assert.IsTrue(clientOptions.SessionRetryOptions.MaxInRegionRetryCount == 3); + + } + + [TestMethod] + public void SessionRetryOptionsMinMaxRetriesCountEnforcedTest() + { + + ArgumentException argumentException = Assert.ThrowsException(() => + new CosmosClientOptions() + { + SessionRetryOptions = new SessionRetryOptions() + { + RemoteRegionPreferred = true, + MaxInRegionRetryCount = 0 + + }, + } + ); + Assert.IsNotNull(argumentException); + + } + + + [TestMethod] + public void SessionRetryOptionsMinMinRetryTimeEnforcedTest() + { + + ArgumentException argumentException = Assert.ThrowsException(() => + new CosmosClientOptions() + { + SessionRetryOptions = new SessionRetryOptions() + { + RemoteRegionPreferred = true, + MinInRegionRetryTime = TimeSpan.FromMilliseconds(99) + + }, + } + ); + Assert.IsNotNull(argumentException); + + } + + + [TestMethod] + [DataRow(FaultInjectionOperationType.ReadItem, 2, true , DisplayName = "Validate Read Item operation with remote region preferred.")] + [DataRow(FaultInjectionOperationType.QueryItem, 1, true, DisplayName = "Validate Query Item operation with remote region preferred.")] + [DataRow(FaultInjectionOperationType.ReadItem, 2, false, DisplayName = "Validate Read Item operation with local region preferred.")] + [DataRow(FaultInjectionOperationType.QueryItem, 2, false, DisplayName = "Validate Query Item operation with local region preferred.")] + [TestCategory("MultiMaster")] + public async Task ReadOperationWithReadSessionUnavailableTest(FaultInjectionOperationType faultInjectionOperationType, + int sessionTokenMismatchRetryAttempts , Boolean remoteRegionPreferred) + { + string[] preferredRegions = this.writeRegionMap.Keys.ToArray(); + // if I go to first region for reading an item, I should get a 404/2002 response for 10 minutes + FaultInjectionRule badSessionTokenRule = new FaultInjectionRuleBuilder( + id: "badSessionTokenRule", + condition: + new FaultInjectionConditionBuilder() + .WithOperationType(faultInjectionOperationType) + .WithRegion(preferredRegions[0]) + .Build(), + result: + FaultInjectionResultBuilder.GetResultBuilder(FaultInjectionServerErrorType.ReadSessionNotAvailable) + .Build()) + .WithDuration(TimeSpan.FromMinutes(10)) + .Build(); + + List rules = new List() { badSessionTokenRule }; + FaultInjector faultInjector = new FaultInjector(rules); + Assert.IsNotNull(faultInjector); + + + CosmosClientOptions clientOptions = new CosmosClientOptions() + { + + SessionRetryOptions = new SessionRetryOptions() + { + RemoteRegionPreferred = remoteRegionPreferred, + MinInRegionRetryTime = TimeSpan.FromMilliseconds(100), + MaxInRegionRetryCount = sessionTokenMismatchRetryAttempts + }, + ConsistencyLevel = ConsistencyLevel.Session, + ApplicationPreferredRegions = preferredRegions, + ConnectionMode = ConnectionMode.Direct, + }; + + using (CosmosClient faultInjectionClient = new CosmosClient( + connectionString: this.connectionString, + clientOptions: faultInjector.GetFaultInjectionClientOptions(clientOptions))) + { + Database database = faultInjectionClient.GetDatabase(MultiRegionSetupHelpers.dbName); + Container container = await database.CreateContainerIfNotExistsAsync("sessionRetryPolicy", "/id"); + string GUID = Guid.NewGuid().ToString(); + dynamic testObject = new + { + id = GUID, + name = "customer one", + address = new + { + line1 = "45 new street", + city = "mckinney", + postalCode = "98989", + } + + }; + + ItemResponse response = await container.CreateItemAsync(testObject); + Assert.IsNotNull(response); + + OperationExecutionResult executionResult = await this.PerformDocumentOperation(faultInjectionOperationType, container, testObject); + this.ValidateOperationExecutionResult(executionResult, remoteRegionPreferred); + + // For a non-write operation, the request can go to multiple replicas (upto 4 replicas) + // Check if the SessionTokenMismatchRetryPolicy retries on the bad / lagging region + // for sessionTokenMismatchRetryAttempts by tracking the badSessionTokenRule hit count + long hitCount = badSessionTokenRule.GetHitCount(); + + if (remoteRegionPreferred) + { + Assert.IsTrue(hitCount >= sessionTokenMismatchRetryAttempts && hitCount <= (1 + sessionTokenMismatchRetryAttempts) * 4); + } + + } + } + + [TestMethod] + [DataRow(FaultInjectionOperationType.CreateItem, 2, true, DisplayName = "Validate Write Item operation with remote region preferred.")] + [DataRow(FaultInjectionOperationType.ReplaceItem, 1, true, DisplayName = "Validate Replace Item operation with remote region preferred.")] + [DataRow(FaultInjectionOperationType.DeleteItem, 2, true, DisplayName = "Validate Delete Item operation with remote region preferred.")] + [DataRow(FaultInjectionOperationType.UpsertItem, 3, true, DisplayName = "Validate Upsert Item operation with remote region preferred.")] + [DataRow(FaultInjectionOperationType.PatchItem, 1, true, DisplayName = "Validate Patch Item operation with remote region preferred.")] + [DataRow(FaultInjectionOperationType.CreateItem, 3, false, DisplayName = "Validate Write Item operation with local region preferred.")] + [DataRow(FaultInjectionOperationType.ReplaceItem, 1, false, DisplayName = "Validate Replace Item operation with local region preferred.")] + [DataRow(FaultInjectionOperationType.DeleteItem, 2, false, DisplayName = "Validate Delete Item operation with local region preferred.")] + [DataRow(FaultInjectionOperationType.UpsertItem, 1, false, DisplayName = "Validate Upsert Item operation with local region preferred.")] + [DataRow(FaultInjectionOperationType.PatchItem, 1, false, DisplayName = "Validate Patch Item operation with remote region preferred.")] + [TestCategory("MultiMaster")] + public async Task WriteOperationWithReadSessionUnavailableTest(FaultInjectionOperationType faultInjectionOperationType, + int sessionTokenMismatchRetryAttempts, Boolean remoteRegionPreferred) + { + + string[] preferredRegions = this.writeRegionMap.Keys.ToArray(); + + FaultInjectionRule badSessionTokenRule = new FaultInjectionRuleBuilder( + id: "badSessionTokenRule", + condition: + new FaultInjectionConditionBuilder() + .WithOperationType(faultInjectionOperationType) + .WithRegion(preferredRegions[0]) + .Build(), + result: + FaultInjectionResultBuilder.GetResultBuilder(FaultInjectionServerErrorType.ReadSessionNotAvailable) + .Build()) + .WithDuration(TimeSpan.FromMinutes(10)) + .Build(); + + List rules = new List() { badSessionTokenRule }; + FaultInjector faultInjector = new FaultInjector(rules); + + CosmosClientOptions clientOptions = new CosmosClientOptions() + { + SessionRetryOptions = new SessionRetryOptions() + { + RemoteRegionPreferred = remoteRegionPreferred, + MinInRegionRetryTime = TimeSpan.FromMilliseconds(100), + MaxInRegionRetryCount = sessionTokenMismatchRetryAttempts + }, + ConsistencyLevel = ConsistencyLevel.Session, + ApplicationPreferredRegions = preferredRegions, + ConnectionMode = ConnectionMode.Direct, + }; + + using (CosmosClient faultInjectionClient = new CosmosClient( + connectionString: this.connectionString, + clientOptions: faultInjector.GetFaultInjectionClientOptions(clientOptions))) + { + Database database = faultInjectionClient.GetDatabase(MultiRegionSetupHelpers.dbName); + Container container = await database.CreateContainerIfNotExistsAsync("sessionRetryPolicy", "/id"); + string GUID = Guid.NewGuid().ToString(); + dynamic testObject = new + { + id = GUID, + name = "customer one", + address = new + { + line1 = "45 new street", + city = "mckinney", + postalCode = "98989", + } + + }; + + OperationExecutionResult executionResult = await this.PerformDocumentOperation(faultInjectionOperationType, container, testObject); + this.ValidateOperationExecutionResult(executionResult, remoteRegionPreferred); + + // For a write operation, the request can just go to the primary replica + // Check if the SessionTokenMismatchRetryPolicy retries on the bad / lagging region + // for sessionTokenMismatchRetryAttempts by tracking the badSessionTokenRule hit count + long hitCount = badSessionTokenRule.GetHitCount(); + if (remoteRegionPreferred) + { + // higher hit count is possible while in MinRetryWaitTimeWithinRegion + Assert.IsTrue(hitCount >= sessionTokenMismatchRetryAttempts); + } + + } + } + + + + private void ValidateOperationExecutionResult(OperationExecutionResult operationExecutionResult, Boolean remoteRegionPreferred) + { + int sessionTokenMismatchDefaultWaitTime = 5000; + + FaultInjectionOperationType executionOpType = operationExecutionResult.OperationType; + HttpStatusCode statusCode = operationExecutionResult.StatusCode; + + int executionDuration = operationExecutionResult.Duration; + Console.WriteLine($" status code is {statusCode}"); + Console.WriteLine($" execution duration is {executionDuration}"); + + if (executionOpType == FaultInjectionOperationType.CreateItem) + { + Assert.IsTrue(statusCode == HttpStatusCode.Created); + } + else if (executionOpType == FaultInjectionOperationType.DeleteItem) + { + Assert.IsTrue(statusCode == HttpStatusCode.NoContent); + + } + else if (executionOpType == FaultInjectionOperationType.UpsertItem) + { + Assert.IsTrue(statusCode == HttpStatusCode.OK || statusCode == HttpStatusCode.Created); + + } + else + { + Assert.IsTrue(statusCode == HttpStatusCode.OK); + } + + if (remoteRegionPreferred) + { + Assert.IsTrue(executionDuration < sessionTokenMismatchDefaultWaitTime); + } + else + { + Assert.IsTrue(executionDuration > sessionTokenMismatchDefaultWaitTime); + } + + } + + + private async Task PerformDocumentOperation(FaultInjectionOperationType operationType, Container container, + dynamic testObject) + { + + Stopwatch durationTimer = new Stopwatch(); + if (operationType == FaultInjectionOperationType.ReadItem) + { + durationTimer.Start(); + ItemResponse itemResponse = await container.ReadItemAsync(testObject.id, + new PartitionKey(testObject.id)); + durationTimer.Stop(); + int timeElapsed = Convert.ToInt32(durationTimer.Elapsed.TotalMilliseconds); + + return new OperationExecutionResult( + itemResponse.Diagnostics, + timeElapsed, + itemResponse.StatusCode, + operationType); + + } + + if (operationType == FaultInjectionOperationType.CreateItem) + { + durationTimer.Start(); + ItemResponse itemResponse = await container.CreateItemAsync(testObject); + + durationTimer.Stop(); + int timeElapsed = Convert.ToInt32(durationTimer.Elapsed.TotalMilliseconds); + + return new OperationExecutionResult( + itemResponse.Diagnostics, + timeElapsed, + itemResponse.StatusCode, + operationType); + + } + + if (operationType == FaultInjectionOperationType.ReplaceItem) + { + + await container.CreateItemAsync(testObject); + durationTimer.Start(); + + ItemResponse itemResponse = await container.ReplaceItemAsync(testObject, testObject.id, new PartitionKey(testObject.id)); + + durationTimer.Stop(); + int timeElapsed = Convert.ToInt32(durationTimer.Elapsed.TotalMilliseconds); + + return new OperationExecutionResult( + itemResponse.Diagnostics, + timeElapsed, + itemResponse.StatusCode, + operationType); + + } + + + if (operationType == FaultInjectionOperationType.UpsertItem) + { + + durationTimer.Start(); + ItemResponse itemResponse = await container.UpsertItemAsync(testObject, new PartitionKey(testObject.id)); + + durationTimer.Stop(); + int timeElapsed = Convert.ToInt32(durationTimer.Elapsed.TotalMilliseconds); + + return new OperationExecutionResult( + itemResponse.Diagnostics, + timeElapsed, + itemResponse.StatusCode, + operationType); + + } + + if (operationType == FaultInjectionOperationType.DeleteItem) + { + + await container.CreateItemAsync(testObject); + + durationTimer.Start(); + ItemResponse itemResponse = await container.DeleteItemAsync(testObject.id, new PartitionKey(testObject.id)); + + durationTimer.Stop(); + int timeElapsed = Convert.ToInt32(durationTimer.Elapsed.TotalMilliseconds); + + return new OperationExecutionResult( + itemResponse.Diagnostics, + timeElapsed, + itemResponse.StatusCode, + operationType); + + } + + if (operationType == FaultInjectionOperationType.QueryItem) + { + durationTimer.Start(); + String query = $"SELECT * from c where c.id = \"{testObject.id}\""; + FeedIterator feed = container.GetItemQueryIterator(query); + Assert.IsTrue(feed.HasMoreResults); + FeedResponse feedResponse = null; + while (feed.HasMoreResults) + { + feedResponse = await feed.ReadNextAsync(); + Assert.IsNotNull(feedResponse); + Console.WriteLine($" feed response count is {feedResponse.Count}"); + Assert.IsTrue(feedResponse.Count == 1); + } + + durationTimer.Stop(); + int timeElapsed = Convert.ToInt32(durationTimer.Elapsed.TotalMilliseconds); + + return new OperationExecutionResult( + feedResponse.Diagnostics, + timeElapsed, + feedResponse.StatusCode, + operationType); + + } + + if (operationType == FaultInjectionOperationType.PatchItem) + { + await container.CreateItemAsync(testObject); + durationTimer.Start(); + + ItemResponse itemResponse = await container.PatchItemAsync(testObject.id, new PartitionKey(testObject.id), + patchOperations: new[] + { + PatchOperation.Replace("/name", "Customer Two") + }); + + durationTimer.Stop(); + int timeElapsed = Convert.ToInt32(durationTimer.Elapsed.TotalMilliseconds); + + + + return new OperationExecutionResult( + itemResponse.Diagnostics, + timeElapsed, + itemResponse.StatusCode, + operationType); + + } + + + + return null; + } + + } + + internal class OperationExecutionResult + { + public CosmosDiagnostics Diagnostics { get; set; } + public int Duration { get; set; } + public HttpStatusCode StatusCode { get; set; } + public FaultInjectionOperationType OperationType { get; set; } + + public OperationExecutionResult(CosmosDiagnostics diagnostics, int duration, HttpStatusCode statusCode, FaultInjectionOperationType operationType) + { + this.Diagnostics = diagnostics; + this.Duration = duration; + this.StatusCode = statusCode; + this.OperationType = operationType; + } + } + + + +} diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/Utils/MultiRegionSetupHelpers.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/Utils/MultiRegionSetupHelpers.cs index 971611f5ce..5ceddc4277 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/Utils/MultiRegionSetupHelpers.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/Utils/MultiRegionSetupHelpers.cs @@ -11,9 +11,9 @@ namespace Microsoft.Azure.Cosmos.SDK.EmulatorTests public class MultiRegionSetupHelpers { - private const string dbName = "availabilityStrategyTestDb"; - private const string containerName = "availabilityStrategyTestContainer"; - private const string changeFeedContainerName = "availabilityStrategyTestChangeFeedContainer"; + public const string dbName = "availabilityStrategyTestDb"; + public const string containerName = "availabilityStrategyTestContainer"; + public const string changeFeedContainerName = "availabilityStrategyTestChangeFeedContainer"; public static async Task<(Database, Container, Container)> GetOrCreateMultiRegionDatabaseAndContainers(CosmosClient client) {