Skip to content

Commit

Permalink
Merge pull request #3392 from alphagov/2938-silently-handle-invalid-b…
Browse files Browse the repository at this point in the history
…yte-sequence-in-utf-8-errors-l-2

Handle incorrectly UTF-8 encoded query and cookie url
  • Loading branch information
unoduetre authored Oct 31, 2024
2 parents 1888a5f + 99f3f8e commit 61dcb7a
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 1 deletion.
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ gem "govuk_personalisation"
gem "govuk_publishing_components"
gem "htmlentities"
gem "plek"
gem "rack-utf8_sanitizer"
gem "rails-controller-testing"
gem "rails-i18n"
gem "rails_translation_manager"
Expand Down
3 changes: 3 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,8 @@ GEM
rack (>= 3.0.0)
rack-test (2.1.0)
rack (>= 1.3)
rack-utf8_sanitizer (1.9.1)
rack (>= 1.0, < 4.0)
rackup (2.1.0)
rack (>= 3)
webrick (~> 1.8)
Expand Down Expand Up @@ -670,6 +672,7 @@ DEPENDENCIES
mocha
plek
pry-byebug
rack-utf8_sanitizer
rails (= 7.2.1.2)
rails-controller-testing
rails-i18n
Expand Down
13 changes: 12 additions & 1 deletion config/application.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
require "action_view/railtie"
# require "action_cable/engine"
require "rails/test_unit/railtie"
require_relative "../lib/sanitiser/strategy"

# Require the gems listed in Gemfile, including any gems
# you've limited to :test, :development, or :production.
Expand All @@ -26,7 +27,7 @@ class Application < Rails::Application
# Please, add to the `ignore` list any other `lib` subdirectories that do
# not contain `.rb` files, or that should not be reloaded or eager loaded.
# Common ones are `templates`, `generators`, or `middleware`, for example.
config.autoload_lib(ignore: %w[assets tasks])
config.autoload_lib(ignore: %w[assets tasks sanitiser])

# Settings in config/environments/* take precedence over those specified here.
# Application configuration can go into files in config/initializers
Expand Down Expand Up @@ -128,5 +129,15 @@ class Application < Rails::Application

# Do not swallow errors in after_commit/after_rollback callbacks.
# config.active_record.raise_in_transactional_callbacks = true

# Protect from "invalid byte sequence in UTF-8" errors,
# when a query or a cookie is a string with incorrect UTF-8 encoding.
config.middleware.insert_before(
0,
Rack::UTF8Sanitizer,
sanitizable_content_types: [],
only: %w[QUERY_STRING],
strategy: Sanitiser::Strategy,
)
end
end
18 changes: 18 additions & 0 deletions lib/sanitiser/strategy.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
module Sanitiser
class Strategy
class SanitisingError < StandardError; end

class << self
def call(input, sanitize_null_bytes: false)
input
.force_encoding(Encoding::ASCII_8BIT)
.encode!(Encoding::UTF_8)
if sanitize_null_bytes && Rack::UTF8Sanitizer::NULL_BYTE_REGEX.match?(input)
raise NullByteInString
end
rescue StandardError
raise SanitisingError, "Non-UTF-8 (or null) character in the query or in the cookie"
end
end
end
end
84 changes: 84 additions & 0 deletions test/integration/sanitiser_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
require "test_helper"

class SanitiserTest < ActiveSupport::TestCase
include Rack::Test::Methods

def app
Rails.application
end

test "with query being correct percent-encoded UTF-8 string does not raise exception" do
get "/?%41"
assert last_response.successful?
end

test "with query being incorrect percent-encoded UTF-8 string raises SanitisingError" do
assert_raises(Sanitiser::Strategy::SanitisingError) do
get "/?%AD"
end
end

test "with cookie key being correct UTF-8 does not raise exception" do
header("Cookie", "\x41=value")
get "/"
assert last_response.successful?
end

test "with cookie key being incorrect UTF-8 raises exception" do
header("Cookie", "\xAD=value")
assert_raises(ArgumentError, match: "invalid byte sequence in UTF-8") do
get "/"
end
end

test "with cookie value being correct UTF-8 does not raise exception" do
header("Cookie", "key=\x41")
get "/"
assert last_response.successful?
end

test "with cookie value being incorrect UTF-8 raises exception" do
header("Cookie", "key=\xAD")
assert_raises(ArgumentError, match: "invalid byte sequence in UTF-8") do
get "/"
end
end

test "with cookie path being correct UTF-8 does not raise exception" do
header("Cookie", "key=value; Path=/\x41")
get "/"
assert last_response.successful?
end

test "with cookie path being incorrect UTF-8 raises exception" do
header("Cookie", "key=value; Path=/\xAD")
assert_raises(ArgumentError, match: "invalid byte sequence in UTF-8") do
get "/"
end
end

test "with cookie path being correct percent-encoded UTF-8 does not raise exception" do
header("Cookie", "key=value; Path=/%41")
get "/"
assert last_response.successful?
end

test "with cookie path being incorrect percent-encoded UTF-8 raises SanitisingError" do
header("Cookie", "key=value; Path=/%AD")
assert_raises(Sanitiser::Strategy::SanitisingError) do
get "/"
end
end

test "with referrer headerbeing correct percent-encoded UTF-8 does not raise exception" do
header("Referer", "http://example.com/?%41")
get "/"
assert last_response.successful?
end

test "with referrer header being incorrect percent-encoded UTF-8 does not raise exception" do
header("Referer", "http://example.com/?%AD")
get "/"
assert last_response.successful?
end
end

0 comments on commit 61dcb7a

Please sign in to comment.