diff --git a/lib/rack/utf8_sanitizer.rb b/lib/rack/utf8_sanitizer.rb index e36185d..bcf22e4 100644 --- a/lib/rack/utf8_sanitizer.rb +++ b/lib/rack/utf8_sanitizer.rb @@ -8,6 +8,7 @@ module Rack class UTF8Sanitizer StringIO = ::StringIO NULL_BYTE_REGEX = /\x00/.freeze + NULL_BYTE_STRING_REGEX = Regexp.new('\\\u0000').freeze class NullByteInString < StandardError; end @@ -40,7 +41,7 @@ def call(env) invalid: :replace, undef: :replace) if sanitize_null_bytes - input = input.gsub(NULL_BYTE_REGEX, "") + input = input.gsub(NULL_BYTE_REGEX, "").gsub(NULL_BYTE_STRING_REGEX, '') end input end, @@ -48,7 +49,7 @@ def call(env) input. force_encoding(Encoding::ASCII_8BIT). encode!(Encoding::UTF_8) - if sanitize_null_bytes && NULL_BYTE_REGEX.match?(input) + if sanitize_null_bytes && (NULL_BYTE_REGEX.match?(input) || NULL_BYTE_STRING_REGEX.match?(input)) raise NullByteInString end input @@ -262,7 +263,8 @@ def sanitize_string(input) if input.is_a? String input = input.dup.force_encoding(Encoding::UTF_8) - if input.valid_encoding? && !(@sanitize_null_bytes && input =~ NULL_BYTE_REGEX) + if input.valid_encoding? && + !(@sanitize_null_bytes && (NULL_BYTE_REGEX.match?(input) || NULL_BYTE_STRING_REGEX.match?(input))) input else @strategy.call(input, sanitize_null_bytes: @sanitize_null_bytes) diff --git a/test/test_utf8_sanitizer.rb b/test/test_utf8_sanitizer.rb index e3f4f87..0490fdd 100644 --- a/test/test_utf8_sanitizer.rb +++ b/test/test_utf8_sanitizer.rb @@ -395,6 +395,18 @@ def read end end + it "optionally sanitizes null bytes plain string with the replace strategy" do + @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true) + input = "foo=bla\xED&quux=bar" + '\u0000' + @rack_input = StringIO.new input + + sanitize_form_data do |sanitized_input| + sanitized_input.encoding.should == Encoding::UTF_8 + sanitized_input.should.be.valid_encoding + sanitized_input.should == "foo=bla%EF%BF%BD&quux=bar" + end + end + it "optionally sanitizes encoded null bytes with the replace strategy" do @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitize_null_bytes: true) input = "foo=bla%ED&quux=bar%00"