✨ Add nameprep stringprep profile

This isn't used by or needed for Net::IMAP, but adding a new profile is trivial with the new generic stringprep code. Nameprep is used by IDNA, and not useful without punycode. But, with the addition of punycode, Net::IMAP could also support IDNA hostnames. On the other hand, IDNA and punycode should probably be added to `uri`, not `net-imap`. Perhaps `stringprep` should be extracted to its own gem?
ruby · Aug 28, 2023 · ab05234 · ab05234
1 parent fb9d9ee
commit ab05234
Show file tree

Hide file tree

Showing 4 changed files with 268 additions and 1 deletion.
diff --git a/lib/net/imap/stringprep.rb b/lib/net/imap/stringprep.rb
@@ -9,6 +9,7 @@ class IMAP < Protocol
     # codepoint table defined in the RFC-3454 appendices is matched by a Regexp
     # defined in this module.
     module StringPrep
+      autoload :NamePrep, File.expand_path("stringprep/nameprep", __dir__)
       autoload :SASLprep, File.expand_path("stringprep/saslprep", __dir__)
       autoload :Tables,   File.expand_path("stringprep/tables",   __dir__)
       autoload :Trace,    File.expand_path("stringprep/trace",    __dir__)

diff --git a/lib/net/imap/stringprep/nameprep.rb b/lib/net/imap/stringprep/nameprep.rb
@@ -0,0 +1,70 @@
+# frozen_string_literal: true
+
+module Net
+  class IMAP
+    module StringPrep
+
+      # Defined in RFC3491[https://tools.ietf.org/html/rfc3491], the +nameprep+
+      # profile of "Stringprep" is:
+      # >>>
+      #   used by the IDNA protocol for preparing domain names; it is not
+      #   designed for any other purpose.  It is explicitly not designed for
+      #   processing arbitrary free text and SHOULD NOT be used for that
+      #   purpose.
+      #
+      #   ...
+      #
+      #   This profile specifies prohibiting using the following tables...:
+      #
+      #   - C.1.2 (Non-ASCII space characters)
+      #   - C.2.2 (Non-ASCII control characters)
+      #   - C.3 (Private use characters)
+      #   - C.4 (Non-character code points)
+      #   - C.5 (Surrogate codes)
+      #   - C.6 (Inappropriate for plain text)
+      #   - C.7 (Inappropriate for canonical representation)
+      #   - C.8 (Change display properties are deprecated)
+      #   - C.9 (Tagging characters)
+      #
+      #   IMPORTANT NOTE: This profile MUST be used with the IDNA protocol.
+      #   The IDNA protocol has additional prohibitions that are checked
+      #   outside of this profile.
+      module NamePrep
+
+        # From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §10
+        STRINGPREP_PROFILE = "nameprep"
+
+        # From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §2
+        UNASSIGNED_TABLE = "A.1"
+
+        # From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §3
+        MAPPING_TABLES = %w[B.1 B.2].freeze
+
+        # From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §4
+        NORMALIZATION = :nfkc
+
+        # From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §5
+        PROHIBITED_TABLES = %w[C.1.2 C.2.2 C.3 C.4 C.5 C.6 C.7 C.8 C.9].freeze
+
+        # From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §6
+        CHECK_BIDI = true
+
+        module_function
+
+        def nameprep(string, **opts)
+          StringPrep.stringprep(
+            string,
+            unassigned:    UNASSIGNED_TABLE,
+            maps:          MAPPING_TABLES,
+            prohibited:    PROHIBITED_TABLES,
+            normalization: NORMALIZATION,
+            bidi:          CHECK_BIDI,
+            profile:       STRINGPREP_PROFILE,
+            **opts,
+          )
+        end
+      end
+
+    end
+  end
+end
diff --git a/test/net/imap/test_stringprep_nameprep.rb b/test/net/imap/test_stringprep_nameprep.rb
@@ -0,0 +1,197 @@
+# frozen_string_literal: true
+
+require "net/imap"
+require "test/unit"
+
+class StringPrepNamePrepTest < Test::Unit::TestCase
+  include Net::IMAP::StringPrep
+  include Net::IMAP::StringPrep::NamePrep
+
+  # The following test cases were taken from
+  # https://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.txt
+  # ...mostly from Appendix A.
+
+  # Hash[name, [in, out | exception, stored = false]
+  NAMEPREP_TEST_VECTORS = {
+    "Map to nothing" => [
+      "foo\xC2\xAD\xCD\x8F\xE1\xA0\x86\xE1\xA0\x8B" \
+      "bar\xE2\x80\x8B\xE2\x81\xA0" \
+      "baz\xEF\xB8\x80\xEF\xB8\x88\xEF\xB8\x8F\xEF\xBB\xBF",
+      "foobarbaz"
+    ],
+    "Case folding ASCII U+0043 U+0041 U+0046 U+0045" => [
+      "CAFE", "cafe"
+    ],
+    "Case folding 8bit U+00DF (german sharp s)" => [
+      "\xC3\x9F", "ss"
+    ],
+    "Case folding U+0130 (turkish capital I with dot)" => [
+      "\xC4\xB0", "i\xcc\x87"
+    ],
+    "Case folding multibyte U+0143 U+037A" => [
+      "\xC5\x83\xCD\xBA", "\xC5\x84 \xCE\xB9"
+    ],
+    "Case folding U+2121 U+33C6 U+1D7BB" => [
+      "\xE2\x84\xA1\xE3\x8F\x86\xF0\x9D\x9E\xBB",
+      "telc\xE2\x88\x95""kg\xCF\x83"
+    ],
+    "Normalization of U+006a U+030c U+00A0 U+00AA" => [
+      "\x6A\xCC\x8C\xC2\xA0\xC2\xAA", "\xC7\xB0 a"
+    ],
+    "Case folding U+1FB7 and normalization" => [
+      "\xE1\xBE\xB7", "\xE1\xBE\xB6\xCE\xB9"
+    ],
+    "Incorrect UTF-8 encoding of U+00DF" => [
+      # n.b. this example isn't found in Appendix A, but is in §7.
+      "\xC3\xdf", [ArgumentError, /invalid byte sequence in UTF-8/]
+    ],
+    "Incorrect UTF-8 encoding of U+01F0" => [
+      # n.b. Appendix A doesn't indicate an error for this, but §7 does.
+      "\xC7\xF0", [ArgumentError, /invalid byte sequence in UTF-8/]
+    ],
+    "Self-reverting case folding U+0390 and normalization" => [
+      "\xCE\x90", "\xCE\x90"
+    ],
+    "Self-reverting case folding U+03B0 and normalization" => [
+      "\xCE\xB0", "\xCE\xB0"
+    ],
+    "Self-reverting case folding U+1E96 and normalization" => [
+      "\xE1\xBA\x96", "\xE1\xBA\x96"
+    ],
+    "Self-reverting case folding U+1F56 and normalization" => [
+      "\xE1\xBD\x96", "\xE1\xBD\x96"
+    ],
+    "ASCII space character U+0020" => [
+      "\x20", "\x20"
+    ],
+    "Non-ASCII 8bit space character U+00A0" => [
+      "\xC2\xA0", "\x20"
+    ],
+    "Non-ASCII multibyte space character U+1680" => [
+      "\xE1\x9A\x80", ProhibitedCodepoint
+    ],
+    "Non-ASCII multibyte space character U+2000" => [
+      "\xE2\x80\x80", "\x20"
+    ],
+    "Zero Width Space U+200b" => [
+      "\xE2\x80\x8b", ""
+    ],
+    "Non-ASCII multibyte space character U+3000" => [
+      "\xE3\x80\x80", "\x20"
+    ],
+    "ASCII control characters U+0010 U+007F" => [
+      "\x10\x7F", "\x10\x7F"
+    ],
+    "Non-ASCII 8bit control character U+0085" => [
+      "\xC2\x85", ProhibitedCodepoint
+    ],
+    "Non-ASCII multibyte control character U+180E" => [
+      "\xE1\xA0\x8E", ProhibitedCodepoint
+    ],
+    "Zero Width No-Break Space U+FEFF" => [
+      "\xEF\xBB\xBF", ""
+    ],
+    "Non-ASCII control character U+1D175" => [
+      "\xF0\x9D\x85\xB5", ProhibitedCodepoint
+    ],
+    "Plane 0 private use character U+F123" => [
+      "\xEF\x84\xA3", ProhibitedCodepoint
+    ],
+    "Plane 15 private use character U+F1234" => [
+      "\xF3\xB1\x88\xB4", ProhibitedCodepoint
+    ],
+    "Plane 16 private use character U+10F234" => [
+      "\xF4\x8F\x88\xB4", ProhibitedCodepoint
+    ],
+    "Non-character code point U+8FFFE" => [
+      "\xF2\x8F\xBF\xBE", ProhibitedCodepoint
+    ],
+    "Non-character code point U+10FFFF" => [
+      "\xF4\x8F\xBF\xBF", ProhibitedCodepoint
+    ],
+    "Surrogate code U+DF42" => [
+      "\xED\xBD\x82", [ArgumentError, /invalid byte sequence in UTF-8/]
+    ],
+    "Non-plain text character U+FFFD" => [
+      "\xEF\xBF\xBD", ProhibitedCodepoint
+    ],
+    "Ideographic description character U+2FF5" => [
+      "\xE2\xBF\xB5", ProhibitedCodepoint
+    ],
+    "Display property character U+0341" => [
+      "\xCD\x81", "\xCC\x81"
+    ],
+    "Left-to-right mark U+200E" => [
+      "\xE2\x80\x8E", ProhibitedCodepoint
+    ],
+    "Deprecated U+202A" => [
+      "\xE2\x80\xAA", ProhibitedCodepoint
+    ],
+    "Language tagging character U+E0001" => [
+      "\xF3\xA0\x80\x81", ProhibitedCodepoint
+    ],
+    "Language tagging character U+E0042" => [
+      "\xF3\xA0\x81\x82", ProhibitedCodepoint
+    ],
+    "Bidi: RandALCat character U+05BE and LCat characters" => [
+      "foo\xD6\xBE""bar",
+      [BidiStringError, /string with RandALCat.* must not contain LCat/]
+    ],
+    "Bidi: RandALCat character U+FD50 and LCat characters" => [
+      "foo\xEF\xB5\x90""bar",
+      [BidiStringError, /string with RandALCat.* must not contain LCat/]
+    ],
+    "Bidi: RandALCat character U+FB38 and LCat characters" => [
+      "foo\xEF\xB9\xB6""bar", "foo \xd9\x8e""bar"
+    ],
+    "Bidi: RandALCat without trailing RandALCat U+0627 U+0031" => [
+      "\xD8\xA7\x31",
+      [BidiStringError,
+       /string with RandALCat.* must start and end with RandALCat/]
+    ],
+    "Bidi: RandALCat character U+0627 U+0031 U+0628" => [
+      "\xD8\xA7\x31\xD8\xA8", "\xD8\xA7\x31\xD8\xA8"
+    ],
+    "Unassigned code point U+E0002" => [
+      "\xF3\xA0\x80\x82",
+      [ProhibitedCodepoint, /contains.* unassigned code points.*Unicode 3.2/i],
+      true
+    ],
+    "Larger test (shrinking)" => [
+      "X\xC2\xAD\xC3\x9F\xC4\xB0\xE2\x84\xA1\x6a\xcc\x8c\xc2\xa0\xc2" \
+      "\xaa\xce\xb0\xe2\x80\x80",
+      "xssi\xcc\x87tel\xc7\xb0 a\xce\xb0 ",
+      "Nameprep"
+    ],
+    "Larger test (expanding)" => [
+      "X\xC3\x9F\xe3\x8c\x96\xC4\xB0\xE2\x84\xA1\xE2\x92\x9F\xE3\x8c\x80",
+      "xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\x88" \
+      "\xe3\x83\xabi\xcc\x87tel\x28d\x29\xe3\x82\xa2\xe3\x83\x91" \
+      "\xe3\x83\xbc\xe3\x83\x88"
+    ],
+  }
+
+  NAMEPREP_TEST_VECTORS.each do |comment, (input, output, stored)|
+    stored ||= false
+    ex, message = output
+    case output
+    when String
+      test comment do
+        assert_equal output, nameprep(input, stored: stored), comment
+      end
+    when Class
+      if message # in Class => ex, (String | Regexp) => message
+        test comment do
+          assert_raise_with_message(ex, message, comment) {
+            nameprep(input, stored: stored)
+          }
+        end
+      else # in Class => ex
+        test comment do
+          assert_raise(ex, comment) { nameprep(input, stored: stored) }
+        end
+      end
+    end
+  end
+
+end
diff --git a/test/net/imap/test_stringprep_profiles.rb b/test/net/imap/test_stringprep_profiles.rb
@@ -1,7 +1,6 @@
 # frozen_string_literal: true
 
 require "net/imap"
-require "net/imap/sasl/stringprep"
 require "test/unit"
 
 class StringPrepProfilesTest < Test::Unit::TestCase