diff --git a/lib/net/imap/response_parser.rb b/lib/net/imap/response_parser.rb index 08a0e9200..18f45dd19 100644 --- a/lib/net/imap/response_parser.rb +++ b/lib/net/imap/response_parser.rb @@ -54,6 +54,7 @@ def parse(str) T_STAR = :STAR # atom special; list wildcard T_PERCENT = :PERCENT # atom special; list wildcard T_LITERAL = :LITERAL # starts with atom special + T_LITERAL8 = :LITERAL8 # starts with atom char "~" T_CRLF = :CRLF # atom special; text special; quoted special T_TEXT = :TEXT # any char except CRLF T_EOF = :EOF # end of response string @@ -231,6 +232,16 @@ module RFC3629 # ; sent from server to the client. LITERAL = /\{(\d+)\}\r\n/n + # RFC3516 (BINARY): + # literal8 = "~{" number "}" CRLF *OCTET + # ; represents the number of OCTETs + # ; in the response string. + # RFC9051: + # literal8 = "~{" number64 "}" CRLF *OCTET + # ; represents the number of OCTETs + # ; in the response string. + LITERAL8 = /~\{(\d+)\}\r\n/n + module_function def unescape_quoted!(quoted) @@ -250,27 +261,28 @@ def unescape_quoted(quoted) # the default, used in most places BEG_REGEXP = /\G(?:\ (?# 1: SPACE )( )|\ -(?# 2: ATOM prefixed with a compatible subtype)\ +(?# 2: LITERAL8)#{Patterns::LITERAL8}|\ +(?# 3: ATOM prefixed with a compatible subtype)\ ((?:\ -(?# 3: NIL )(NIL)|\ -(?# 4: NUMBER )(\d+)|\ -(?# 5: PLUS )(\+))\ -(?# 6: ATOM remaining after prefix )(#{Patterns::ATOMISH})?\ +(?# 4: NIL )(NIL)|\ +(?# 5: NUMBER )(\d+)|\ +(?# 6: PLUS )(\+))\ +(?# 7: ATOM remaining after prefix )(#{Patterns::ATOMISH})?\ (?# This enables greedy alternation without lookahead, in linear time.)\ )|\ (?# Also need to check for ATOM without a subtype prefix.)\ -(?# 7: ATOM )(#{Patterns::ATOMISH})|\ -(?# 8: QUOTED )#{Patterns::QUOTED_rev2}|\ -(?# 9: LPAR )(\()|\ -(?# 10: RPAR )(\))|\ -(?# 11: BSLASH )(\\)|\ -(?# 12: STAR )(\*)|\ -(?# 13: LBRA )(\[)|\ -(?# 14: RBRA )(\])|\ -(?# 15: LITERAL )#{Patterns::LITERAL}|\ -(?# 16: PERCENT )(%)|\ -(?# 17: CRLF )(\r\n)|\ -(?# 18: EOF )(\z))/ni +(?# 8: ATOM )(#{Patterns::ATOMISH})|\ +(?# 9: QUOTED )#{Patterns::QUOTED_rev2}|\ +(?# 10: LPAR )(\()|\ +(?# 11: RPAR )(\))|\ +(?# 12: BSLASH )(\\)|\ +(?# 13: STAR )(\*)|\ +(?# 14: LBRA )(\[)|\ +(?# 15: RBRA )(\])|\ +(?# 16: LITERAL )#{Patterns::LITERAL}|\ +(?# 17: PERCENT )(%)|\ +(?# 18: CRLF )(\r\n)|\ +(?# 19: EOF )(\z))/ni # envelope, body(structure), namespaces DATA_REGEXP = /\G(?:\ @@ -311,6 +323,9 @@ def unescape_quoted(quoted) # string = quoted / literal def_token_matchers :string, T_QUOTED, T_LITERAL + # used by nstring8 = nstring / literal8 + def_token_matchers :string8, T_QUOTED, T_LITERAL, T_LITERAL8 + # use where string represents "LABEL" values def_token_matchers :case_insensitive__string, T_QUOTED, T_LITERAL, @@ -404,6 +419,10 @@ def nstring NIL? ? nil : string end + def nstring8 + NIL? ? nil : string8 + end + def nquoted NIL? ? nil : quoted end @@ -680,6 +699,8 @@ def msg_att(n) when "ENVELOPE" then envelope when "INTERNALDATE" then date_time when "RFC822.SIZE" then number64 + when /\ABINARY\[/ni then nstring8 # BINARY, IMAP4rev2 + when /\ABINARY\.SIZE\[/ni then number # BINARY, IMAP4rev2 when "RFC822" then nstring # not in rev2 when "RFC822.HEADER" then nstring # not in rev2 when "RFC822.TEXT" then nstring # not in rev2 @@ -702,11 +723,18 @@ def msg_att__label lbra? and rbra when "BODY" peek_lbra? and name << section and - peek_str?("<") and name << atom # partial + peek_str?("<") and name << gt__number__lt # partial + when "BINARY", "BINARY.SIZE" + name << section_binary + # see https://www.rfc-editor.org/errata/eid7246 and the note above + peek_str?("<") and name << gt__number__lt # partial end name end + # this represents the partial size for BODY or BINARY + alias gt__number__lt atom + def envelope @lex_state = EXPR_DATA token = lookahead @@ -1010,6 +1038,13 @@ def section str << rbra end + # section-binary = "[" [section-part] "]" + def section_binary + str = +lbra + str << section_part unless peek_rbra? + str << rbra + end + # section-spec = section-msgtext / (section-part ["." section-text]) # section-msgtext = "HEADER" / # "HEADER.FIELDS" [".NOT"] SP header-list / @@ -1040,6 +1075,11 @@ def header_list str << rpar end + # section-part = nz-number *("." nz-number) + # ; body part reference. + # ; Allows for accessing nested body parts. + alias section_part atom + # RFC3501 & RFC9051: # header-fld-name = astring # @@ -1710,42 +1750,47 @@ def next_token @pos = $~.end(0) if $1 return Token.new(T_SPACE, $+) - elsif $2 && $6 + elsif $2 + len = $+.to_i + val = @str[@pos, len] + @pos += len + return Token.new(T_LITERAL8, val) + elsif $3 && $7 # greedily match ATOM, prefixed with NUMBER, NIL, or PLUS. - return Token.new(T_ATOM, $2) - elsif $3 - return Token.new(T_NIL, $+) + return Token.new(T_ATOM, $3) elsif $4 - return Token.new(T_NUMBER, $+) + return Token.new(T_NIL, $+) elsif $5 + return Token.new(T_NUMBER, $+) + elsif $6 return Token.new(T_PLUS, $+) - elsif $7 + elsif $8 # match ATOM, without a NUMBER, NIL, or PLUS prefix return Token.new(T_ATOM, $+) - elsif $8 - return Token.new(T_QUOTED, Patterns.unescape_quoted($+)) elsif $9 - return Token.new(T_LPAR, $+) + return Token.new(T_QUOTED, Patterns.unescape_quoted($+)) elsif $10 - return Token.new(T_RPAR, $+) + return Token.new(T_LPAR, $+) elsif $11 - return Token.new(T_BSLASH, $+) + return Token.new(T_RPAR, $+) elsif $12 - return Token.new(T_STAR, $+) + return Token.new(T_BSLASH, $+) elsif $13 - return Token.new(T_LBRA, $+) + return Token.new(T_STAR, $+) elsif $14 - return Token.new(T_RBRA, $+) + return Token.new(T_LBRA, $+) elsif $15 + return Token.new(T_RBRA, $+) + elsif $16 len = $+.to_i val = @str[@pos, len] @pos += len return Token.new(T_LITERAL, val) - elsif $16 - return Token.new(T_PERCENT, $+) elsif $17 - return Token.new(T_CRLF, $+) + return Token.new(T_PERCENT, $+) elsif $18 + return Token.new(T_CRLF, $+) + elsif $19 return Token.new(T_EOF, $+) else parse_error("[Net::IMAP BUG] BEG_REGEXP is invalid") diff --git a/test/net/imap/fixtures/response_parser/ruby.png b/test/net/imap/fixtures/response_parser/ruby.png new file mode 100644 index 000000000..a63d57dfd Binary files /dev/null and b/test/net/imap/fixtures/response_parser/ruby.png differ diff --git a/test/net/imap/test_imap_response_parser.rb b/test/net/imap/test_imap_response_parser.rb index 1e42ee3e4..be7125e44 100644 --- a/test/net/imap/test_imap_response_parser.rb +++ b/test/net/imap/test_imap_response_parser.rb @@ -99,4 +99,45 @@ def teardown # response data, should still use normal tests, below ############################################################################ + # Strangly, there are no example responses for BINARY[section] in either + # RFC3516 or RFC9051! The closest I found was RFC5259, and those examples + # aren't FETCH responses. + def test_fetch_binary_and_binary_size + debug, Net::IMAP.debug = Net::IMAP.debug, true + png = File.binread(File.join(TEST_FIXTURE_PATH, "ruby.png")) + size = png.bytesize + parser = Net::IMAP::ResponseParser.new + # with literal8 + response = "* 1 FETCH (UID 5 BINARY[3.2] ~{%d}\r\n%s)\r\n".b % [size, png] + parsed = parser.parse response + assert_equal png, parsed.data.attr["BINARY[3.2]"] + assert_equal png.bytesize, parsed.data.attr["BINARY[3.2]"].bytesize + assert_equal Encoding::BINARY, parsed.data.attr["BINARY[3.2]"].encoding + # binary.size and partial + partial = png[0, 32] + response = "* 1 FETCH (BINARY.SIZE[5] %d BINARY[5]<0> ~{32}\r\n%s)\r\n".b % + [png.bytesize, partial] + parsed = parser.parse response + assert_equal png.bytesize, parsed.data.attr["BINARY.SIZE[5]"] + assert_equal 32, parsed.data.attr["BINARY[5]<0>"].bytesize + assert_equal partial, parsed.data.attr["BINARY[5]<0>"] + # test every type of value + literal8 = "\x00 to \xff\r\n".b * 8 + literal = "\x01 to \xff\r\n".b * 8 + quoted = "\x01 to \x7f\b\t".b * 8 + response = "* 1 FETCH (" \ + "BINARY[1] ~{%d}\r\n%s " \ + "BINARY[2] {%d}\r\n%s " \ + "BINARY[3] \"%s\" " \ + "BINARY[4] NIL)\r\n".b % + [literal8.bytesize, literal8, literal.bytesize, literal, quoted] + parsed = parser.parse response + assert_equal literal8, parsed.data.attr["BINARY[1]"] + assert_equal literal, parsed.data.attr["BINARY[2]"] + assert_equal quoted, parsed.data.attr["BINARY[3]"] + assert_nil parsed.data.attr["BINARY[4]"] + ensure + Net::IMAP.debug = debug + end + end