✨ Add BINARY FETCH support [🚧 docs]

Lex and parse LITERAL8, string8, nstring8. Add section_binary and section_part The BINARY extension isn't _fully_ supported; that requires updates to the APPEND command. But this should be sufficient for IMAP4rev2, which only requires the FETCH part of the extension.
ruby · Oct 26, 2023 · 164c099 · 164c099
1 parent 6dda581
commit 164c099
Show file tree

Hide file tree

Showing 3 changed files with 121 additions and 35 deletions.
diff --git a/lib/net/imap/response_parser.rb b/lib/net/imap/response_parser.rb
@@ -54,6 +54,7 @@ def parse(str)
       T_STAR     = :STAR         # atom special; list wildcard
       T_PERCENT  = :PERCENT      # atom special; list wildcard
       T_LITERAL  = :LITERAL      # starts with atom special
+      T_LITERAL8 = :LITERAL8     # starts with atom char "~"
       T_CRLF     = :CRLF         # atom special; text special; quoted special
       T_TEXT     = :TEXT         # any char except CRLF
       T_EOF      = :EOF          # end of response string
@@ -231,6 +232,16 @@ module RFC3629
         #                        ; sent from server to the client.
         LITERAL              = /\{(\d+)\}\r\n/n
 
+        # RFC3516 (BINARY):
+        #   literal8         =   "~{" number "}" CRLF *OCTET
+        #                        ; <number> represents the number of OCTETs
+        #                        ; in the response string.
+        # RFC9051:
+        #   literal8         =  "~{" number64 "}" CRLF *OCTET
+        #                        ; <number64> represents the number of OCTETs
+        #                        ; in the response string.
+        LITERAL8             = /~\{(\d+)\}\r\n/n
+
         module_function
 
         def unescape_quoted!(quoted)
@@ -250,27 +261,28 @@ def unescape_quoted(quoted)
       # the default, used in most places
       BEG_REGEXP = /\G(?:\
 (?# 1:  SPACE   )( )|\
-(?# 2:  ATOM prefixed with a compatible subtype)\
+(?# 2:  LITERAL8)#{Patterns::LITERAL8}|\
+(?# 3:  ATOM prefixed with a compatible subtype)\
 ((?:\
-(?# 3:  NIL     )(NIL)|\
-(?# 4:  NUMBER  )(\d+)|\
-(?# 5:  PLUS    )(\+))\
-(?# 6:  ATOM remaining after prefix )(#{Patterns::ATOMISH})?\
+(?# 4:  NIL     )(NIL)|\
+(?# 5:  NUMBER  )(\d+)|\
+(?# 6:  PLUS    )(\+))\
+(?# 7:  ATOM remaining after prefix )(#{Patterns::ATOMISH})?\
 (?# This enables greedy alternation without lookahead, in linear time.)\
 )|\
 (?# Also need to check for ATOM without a subtype prefix.)\
-(?# 7:  ATOM    )(#{Patterns::ATOMISH})|\
-(?# 8:  QUOTED  )#{Patterns::QUOTED_rev2}|\
-(?# 9: LPAR    )(\()|\
-(?# 10: RPAR    )(\))|\
-(?# 11: BSLASH  )(\\)|\
-(?# 12: STAR    )(\*)|\
-(?# 13: LBRA    )(\[)|\
-(?# 14: RBRA    )(\])|\
-(?# 15: LITERAL )#{Patterns::LITERAL}|\
-(?# 16: PERCENT )(%)|\
-(?# 17: CRLF    )(\r\n)|\
-(?# 18: EOF     )(\z))/ni
+(?# 8:  ATOM    )(#{Patterns::ATOMISH})|\
+(?# 9:  QUOTED  )#{Patterns::QUOTED_rev2}|\
+(?# 10: LPAR    )(\()|\
+(?# 11: RPAR    )(\))|\
+(?# 12: BSLASH  )(\\)|\
+(?# 13: STAR    )(\*)|\
+(?# 14: LBRA    )(\[)|\
+(?# 15: RBRA    )(\])|\
+(?# 16: LITERAL )#{Patterns::LITERAL}|\
+(?# 17: PERCENT )(%)|\
+(?# 18: CRLF    )(\r\n)|\
+(?# 19: EOF     )(\z))/ni
 
       # envelope, body(structure), namespaces
       DATA_REGEXP = /\G(?:\
@@ -311,6 +323,9 @@ def unescape_quoted(quoted)
       #   string          = quoted / literal
       def_token_matchers :string,  T_QUOTED, T_LITERAL
 
+      # used by nstring8 = nstring / literal8
+      def_token_matchers :string8, T_QUOTED, T_LITERAL, T_LITERAL8
+
       # use where string represents "LABEL" values
       def_token_matchers :case_insensitive__string,
                          T_QUOTED, T_LITERAL,
@@ -404,6 +419,10 @@ def nstring
         NIL? ? nil : string
       end
 
+      def nstring8
+        NIL? ? nil : string8
+      end
+
       def nquoted
         NIL? ? nil : quoted
       end
@@ -680,6 +699,8 @@ def msg_att(n)
             when "ENVELOPE"             then envelope
             when "INTERNALDATE"         then date_time
             when "RFC822.SIZE"          then number64
+            when /\ABINARY\[/ni         then nstring8           # BINARY, IMAP4rev2
+            when /\ABINARY\.SIZE\[/ni   then number             # BINARY, IMAP4rev2
             when "RFC822"               then nstring            # not in rev2
             when "RFC822.HEADER"        then nstring            # not in rev2
             when "RFC822.TEXT"          then nstring            # not in rev2
@@ -702,11 +723,18 @@ def msg_att__label
           lbra? and rbra
         when "BODY"
           peek_lbra? and name << section and
-            peek_str?("<") and name << atom # partial
+            peek_str?("<") and name << gt__number__lt # partial
+        when "BINARY", "BINARY.SIZE"
+          name << section_binary
+          # see https://www.rfc-editor.org/errata/eid7246 and the note above
+          peek_str?("<") and name << gt__number__lt # partial
         end
         name
       end
 
+      # this represents the partial size for BODY or BINARY
+      alias gt__number__lt atom
+
       def envelope
         @lex_state = EXPR_DATA
         token = lookahead
@@ -1010,6 +1038,13 @@ def section
         str << rbra
       end
 
+      # section-binary  = "[" [section-part] "]"
+      def section_binary
+        str = +lbra
+        str << section_part unless peek_rbra?
+        str << rbra
+      end
+
       # section-spec    = section-msgtext / (section-part ["." section-text])
       # section-msgtext = "HEADER" /
       #                   "HEADER.FIELDS" [".NOT"] SP header-list /
@@ -1040,6 +1075,11 @@ def header_list
         str << rpar
       end
 
+      # section-part    = nz-number *("." nz-number)
+      #                     ; body part reference.
+      #                     ; Allows for accessing nested body parts.
+      alias section_part atom
+
       # RFC3501 & RFC9051:
       # header-fld-name = astring
       #
@@ -1710,42 +1750,47 @@ def next_token
             @pos = $~.end(0)
             if $1
               return Token.new(T_SPACE, $+)
-            elsif $2 && $6
+            elsif $2
+              len = $+.to_i
+              val = @str[@pos, len]
+              @pos += len
+              return Token.new(T_LITERAL8, val)
+            elsif $3 && $7
               # greedily match ATOM, prefixed with NUMBER, NIL, or PLUS.
-              return Token.new(T_ATOM, $2)
-            elsif $3
-              return Token.new(T_NIL, $+)
+              return Token.new(T_ATOM, $3)
             elsif $4
-              return Token.new(T_NUMBER, $+)
+              return Token.new(T_NIL, $+)
             elsif $5
+              return Token.new(T_NUMBER, $+)
+            elsif $6
               return Token.new(T_PLUS, $+)
-            elsif $7
+            elsif $8
               # match ATOM, without a NUMBER, NIL, or PLUS prefix
               return Token.new(T_ATOM, $+)
-            elsif $8
-              return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
             elsif $9
-              return Token.new(T_LPAR, $+)
+              return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
             elsif $10
-              return Token.new(T_RPAR, $+)
+              return Token.new(T_LPAR, $+)
             elsif $11
-              return Token.new(T_BSLASH, $+)
+              return Token.new(T_RPAR, $+)
             elsif $12
-              return Token.new(T_STAR, $+)
+              return Token.new(T_BSLASH, $+)
             elsif $13
-              return Token.new(T_LBRA, $+)
+              return Token.new(T_STAR, $+)
             elsif $14
-              return Token.new(T_RBRA, $+)
+              return Token.new(T_LBRA, $+)
             elsif $15
+              return Token.new(T_RBRA, $+)
+            elsif $16
               len = $+.to_i
               val = @str[@pos, len]
               @pos += len
               return Token.new(T_LITERAL, val)
-            elsif $16
-              return Token.new(T_PERCENT, $+)
             elsif $17
-              return Token.new(T_CRLF, $+)
+              return Token.new(T_PERCENT, $+)
             elsif $18
+              return Token.new(T_CRLF, $+)
+            elsif $19
               return Token.new(T_EOF, $+)
             else
               parse_error("[Net::IMAP BUG] BEG_REGEXP is invalid")

diff --git a/test/net/imap/fixtures/response_parser/ruby.png b/test/net/imap/fixtures/response_parser/ruby.png
diff --git a/test/net/imap/test_imap_response_parser.rb b/test/net/imap/test_imap_response_parser.rb
@@ -99,4 +99,45 @@ def teardown
   # response data, should still use normal tests, below
   ############################################################################
 
+  # Strangly, there are no example responses for BINARY[section] in either
+  # RFC3516 or RFC9051!  The closest I found was RFC5259, and those examples
+  # aren't FETCH responses.
+  def test_fetch_binary_and_binary_size
+    debug, Net::IMAP.debug = Net::IMAP.debug, true
+    png      = File.binread(File.join(TEST_FIXTURE_PATH, "ruby.png"))
+    size     = png.bytesize
+    parser   = Net::IMAP::ResponseParser.new
+    # with literal8
+    response = "* 1 FETCH (UID 5 BINARY[3.2] ~{%d}\r\n%s)\r\n".b % [size, png]
+    parsed   = parser.parse response
+    assert_equal png,              parsed.data.attr["BINARY[3.2]"]
+    assert_equal png.bytesize,     parsed.data.attr["BINARY[3.2]"].bytesize
+    assert_equal Encoding::BINARY, parsed.data.attr["BINARY[3.2]"].encoding
+    # binary.size and partial
+    partial  = png[0, 32]
+    response = "* 1 FETCH (BINARY.SIZE[5] %d BINARY[5]<0> ~{32}\r\n%s)\r\n".b %
+      [png.bytesize, partial]
+    parsed   = parser.parse response
+    assert_equal png.bytesize, parsed.data.attr["BINARY.SIZE[5]"]
+    assert_equal 32,           parsed.data.attr["BINARY[5]<0>"].bytesize
+    assert_equal partial,      parsed.data.attr["BINARY[5]<0>"]
+    # test every type of value
+    literal8 = "\x00 to \xff\r\n".b * 8
+    literal  = "\x01 to \xff\r\n".b * 8
+    quoted   = "\x01 to \x7f\b\t".b * 8
+    response = "* 1 FETCH (" \
+               "BINARY[1] ~{%d}\r\n%s " \
+               "BINARY[2] {%d}\r\n%s " \
+               "BINARY[3] \"%s\" " \
+               "BINARY[4] NIL)\r\n".b %
+               [literal8.bytesize, literal8, literal.bytesize, literal, quoted]
+    parsed   = parser.parse response
+    assert_equal literal8, parsed.data.attr["BINARY[1]"]
+    assert_equal literal,  parsed.data.attr["BINARY[2]"]
+    assert_equal quoted,   parsed.data.attr["BINARY[3]"]
+    assert_nil             parsed.data.attr["BINARY[4]"]
+  ensure
+    Net::IMAP.debug = debug
+  end
+
 end