[RUBY-3496] Fix legacy read pool retry error (#2878)

Co-authored-by: Dmitry Rybakov <[email protected]> Co-authored-by: Dmitry Rybakov <[email protected]>
mongodb · Jul 4, 2024 · 3c5dc93 · 3c5dc93
1 parent de60b9e
commit 3c5dc93
Show file tree

Hide file tree

Showing 4 changed files with 75 additions and 38 deletions.
diff --git a/lib/mongo/retryable/base_worker.rb b/lib/mongo/retryable/base_worker.rb
@@ -49,7 +49,8 @@ def initialize(retryable)
 
       private
 
-      # Indicate which exception classes that are generally retryable. 
+      # Indicate which exception classes that are generally retryable
+      # when using modern retries mechanism.
       #
       # @return [ Array<Mongo:Error> ] Array of exception classes that are
       #   considered retryable.
@@ -58,18 +59,42 @@ def retryable_exceptions
           Error::ConnectionPerished,
           Error::ServerNotUsable,
           Error::SocketError,
-          Error::SocketTimeoutError
+          Error::SocketTimeoutError,
         ].freeze
       end
 
+      # Indicate which exception classes that are generally retryable
+      # when using legacy retries mechanism.
+      #
+      # @return [ Array<Mongo:Error> ] Array of exception classes that are
+      #   considered retryable.
+      def legacy_retryable_exceptions
+        [
+          Error::ConnectionPerished,
+          Error::ServerNotUsable,
+          Error::SocketError,
+          Error::SocketTimeoutError,
+          Error::PoolClearedError,
+          Error::PoolPausedError,
+        ].freeze
+      end
+
+
       # Tests to see if the given exception instance is of a type that can
-      # be retried.
+      # be retried with modern retry mechanism.
       #
       # @return [ true | false ] true if the exception is retryable.
       def is_retryable_exception?(e)
         retryable_exceptions.any? { |klass| klass === e }
       end
 
+      # Tests to see if the given exception instance is of a type that can
+      # be retried with legacy retry mechanism.
+      #
+      # @return [ true | false ] true if the exception is retryable.
+      def is_legacy_retryable_exception?(e)
+        legacy_retryable_exceptions.any? { |klass| klass === e }
+      end
       # Logs the given deprecation warning the first time it is called for a
       # given key; after that, it does nothing when given the same key.
       def deprecation_warning(key, warning)

diff --git a/lib/mongo/retryable/read_worker.rb b/lib/mongo/retryable/read_worker.rb
@@ -198,7 +198,7 @@ def modern_read_with_retry(session, server_selector, &block)
         raise e if !is_retryable_exception?(e) && !e.write_retryable?
         retry_read(e, session, server_selector, failed_server: server, &block)
       end
-  
+
       # Attempts to do a "legacy" read with retry. The operation will be
       # attempted multiple times, up to the client's `max_read_retries`
       # setting.
@@ -213,17 +213,18 @@ def modern_read_with_retry(session, server_selector, &block)
       def legacy_read_with_retry(session, server_selector, &block)
         attempt = attempt ? attempt + 1 : 1
         yield select_server(cluster, server_selector, session)
-      rescue *retryable_exceptions, Error::OperationFailure, Error::PoolError => e
+      rescue *legacy_retryable_exceptions, Error::OperationFailure => e
         e.add_notes('legacy retry', "attempt #{attempt}")
-
-        if is_retryable_exception?(e)
+
+        if is_legacy_retryable_exception?(e)
+
           raise e if attempt > client.max_read_retries || session&.in_transaction?
         elsif e.retryable? && !session&.in_transaction?
           raise e if attempt > client.max_read_retries
         else
           raise e
         end
-        
+
         log_retry(e, message: 'Legacy read retry')
         sleep(client.read_retry_interval) unless is_retryable_exception?(e)
         retry
@@ -261,7 +262,7 @@ def read_without_retry(session, server_selector, &block)
       # @param [ Mongo::Server ] failed_server The server on which the original
       #   operation failed.
       # @param [ Proc ] block The block to execute.
-      # 
+      #
       # @return [ Result ] The result of the operation.
       def retry_read(original_error, session, server_selector, failed_server: nil, &block)
         begin
@@ -270,9 +271,9 @@ def retry_read(original_error, session, server_selector, failed_server: nil, &bl
           original_error.add_note("later retry failed: #{e.class}: #{e}")
           raise original_error
         end
-  
+
         log_retry(original_error, message: 'Read retry')
-  
+
         begin
           yield server, true
         rescue *retryable_exceptions => e

diff --git a/lib/mongo/retryable/write_worker.rb b/lib/mongo/retryable/write_worker.rb
@@ -104,7 +104,7 @@ def nro_write_with_retry(write_concern, context:, &block)
         session = context.session
         server = select_server(cluster, ServerSelector.primary, session)
         options = session&.client&.options || {}
-        
+
         if options[:retry_writes]
           begin
             server.with_connection(connection_global_id: context.connection_global_id) do |connection|
@@ -219,7 +219,7 @@ def legacy_write_with_retry(server = nil, context:)
       def modern_write_with_retry(session, server, context, &block)
         txn_num = nil
         connection_succeeded = false
-        
+
         server.with_connection(connection_global_id: context.connection_global_id) do |connection|
           connection_succeeded = true
 
@@ -264,7 +264,7 @@ def retry_write(original_error, txn_num, context:, failed_server: nil, &block)
         # a socket error or a not master error should have marked the respective
         # server unknown). Here we just need to wait for server selection.
         server = select_server(cluster, ServerSelector.primary, session, failed_server)
-        
+
         unless server.retry_writes?
           # Do not need to add "modern retry" here, it should already be on
           # the first exception.
@@ -278,7 +278,7 @@ def retry_write(original_error, txn_num, context:, failed_server: nil, &block)
           # special marker class to bypass the ordinarily applicable rescues.
           raise Error::RaiseOriginalError
         end
-        
+
         log_retry(original_error, message: 'Write retry')
         server.with_connection(connection_global_id: context.connection_global_id) do |connection|
           yield(connection, txn_num, context)

diff --git a/spec/integration/retryable_reads_errors_spec.rb b/spec/integration/retryable_reads_errors_spec.rb
@@ -74,31 +74,42 @@
       client.subscribe(Mongo::Monitoring::CONNECTION_POOL, subscriber)
     end
 
-    it "retries on PoolClearedError" do
-      # After the first find fails, the pool is paused and retry is triggered.
-      # Now, a race is started between the second find acquiring a connection,
-      # and the first retrying the read. Now, retry reads cause the cluster to
-      # be rescanned and the pool to be unpaused, allowing the second checkout
-      # to succeed (when it should fail). Therefore we want the second find's
-      # check out to win the race. This gives the check out a little head start.
-      allow_any_instance_of(Mongo::Server::ConnectionPool).to receive(:ready).and_wrap_original do |m, *args, &block|
-        ::Utils.wait_for_condition(5) do
-          # check_out_results should contain:
-          # - find1 connection check out successful
-          # - pool cleared
-          # - find2 connection check out failed
-          # We wait here for the third event to happen before we ready the pool.
-          cmap_events.select do |e|
-            event_types.include?(e.class)
-          end.length >= 3
+    shared_examples_for 'retries on PoolClearedError' do
+      it "retries on PoolClearedError" do
+        # After the first find fails, the pool is paused and retry is triggered.
+        # Now, a race is started between the second find acquiring a connection,
+        # and the first retrying the read. Now, retry reads cause the cluster to
+        # be rescanned and the pool to be unpaused, allowing the second checkout
+        # to succeed (when it should fail). Therefore we want the second find's
+        # check out to win the race. This gives the check out a little head start.
+        allow_any_instance_of(Mongo::Server::ConnectionPool).to receive(:ready).and_wrap_original do |m, *args, &block|
+          ::Utils.wait_for_condition(5) do
+            # check_out_results should contain:
+            # - find1 connection check out successful
+            # - pool cleared
+            # - find2 connection check out failed
+            # We wait here for the third event to happen before we ready the pool.
+            cmap_events.select do |e|
+              event_types.include?(e.class)
+            end.length >= 3
+          end
+          m.call(*args, &block)
         end
-        m.call(*args, &block)
+        threads.map(&:join)
+        expect(check_out_results[0]).to be_a(Mongo::Monitoring::Event::Cmap::ConnectionCheckedOut)
+        expect(check_out_results[1]).to be_a(Mongo::Monitoring::Event::Cmap::PoolCleared)
+        expect(check_out_results[2]).to be_a(Mongo::Monitoring::Event::Cmap::ConnectionCheckOutFailed)
+        expect(find_events.length).to eq(3)
       end
-      threads.map(&:join)
-      expect(check_out_results[0]).to be_a(Mongo::Monitoring::Event::Cmap::ConnectionCheckedOut)
-      expect(check_out_results[1]).to be_a(Mongo::Monitoring::Event::Cmap::PoolCleared)
-      expect(check_out_results[2]).to be_a(Mongo::Monitoring::Event::Cmap::ConnectionCheckOutFailed)
-      expect(find_events.length).to eq(3)
+    end
+
+    it_behaves_like 'retries on PoolClearedError'
+
+    context 'legacy read retries' do
+
+      let(:client) { authorized_client.with(options.merge(retry_reads: false, max_read_retries: 1)) }
+
+      it_behaves_like 'retries on PoolClearedError'
     end
 
     after do