Skip to content

Commit

Permalink
✨ Add BINARY FETCH support [🚧 docs]
Browse files Browse the repository at this point in the history
Lex and parse LITERAL8, string8, nstring8.
Add section_binary and section_part

The BINARY extension isn't _fully_ supported; that requires updates to
the APPEND command.  But this should be sufficient for IMAP4rev2, which
only requires the FETCH part of the extension.
  • Loading branch information
nevans committed Oct 26, 2023
1 parent 6dda581 commit 164c099
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 35 deletions.
115 changes: 80 additions & 35 deletions lib/net/imap/response_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def parse(str)
T_STAR = :STAR # atom special; list wildcard
T_PERCENT = :PERCENT # atom special; list wildcard
T_LITERAL = :LITERAL # starts with atom special
T_LITERAL8 = :LITERAL8 # starts with atom char "~"
T_CRLF = :CRLF # atom special; text special; quoted special
T_TEXT = :TEXT # any char except CRLF
T_EOF = :EOF # end of response string
Expand Down Expand Up @@ -231,6 +232,16 @@ module RFC3629
# ; sent from server to the client.
LITERAL = /\{(\d+)\}\r\n/n

# RFC3516 (BINARY):
# literal8 = "~{" number "}" CRLF *OCTET
# ; <number> represents the number of OCTETs
# ; in the response string.
# RFC9051:
# literal8 = "~{" number64 "}" CRLF *OCTET
# ; <number64> represents the number of OCTETs
# ; in the response string.
LITERAL8 = /~\{(\d+)\}\r\n/n

module_function

def unescape_quoted!(quoted)
Expand All @@ -250,27 +261,28 @@ def unescape_quoted(quoted)
# the default, used in most places
BEG_REGEXP = /\G(?:\
(?# 1: SPACE )( )|\
(?# 2: ATOM prefixed with a compatible subtype)\
(?# 2: LITERAL8)#{Patterns::LITERAL8}|\
(?# 3: ATOM prefixed with a compatible subtype)\
((?:\
(?# 3: NIL )(NIL)|\
(?# 4: NUMBER )(\d+)|\
(?# 5: PLUS )(\+))\
(?# 6: ATOM remaining after prefix )(#{Patterns::ATOMISH})?\
(?# 4: NIL )(NIL)|\
(?# 5: NUMBER )(\d+)|\
(?# 6: PLUS )(\+))\
(?# 7: ATOM remaining after prefix )(#{Patterns::ATOMISH})?\
(?# This enables greedy alternation without lookahead, in linear time.)\
)|\
(?# Also need to check for ATOM without a subtype prefix.)\
(?# 7: ATOM )(#{Patterns::ATOMISH})|\
(?# 8: QUOTED )#{Patterns::QUOTED_rev2}|\
(?# 9: LPAR )(\()|\
(?# 10: RPAR )(\))|\
(?# 11: BSLASH )(\\)|\
(?# 12: STAR )(\*)|\
(?# 13: LBRA )(\[)|\
(?# 14: RBRA )(\])|\
(?# 15: LITERAL )#{Patterns::LITERAL}|\
(?# 16: PERCENT )(%)|\
(?# 17: CRLF )(\r\n)|\
(?# 18: EOF )(\z))/ni
(?# 8: ATOM )(#{Patterns::ATOMISH})|\
(?# 9: QUOTED )#{Patterns::QUOTED_rev2}|\
(?# 10: LPAR )(\()|\
(?# 11: RPAR )(\))|\
(?# 12: BSLASH )(\\)|\
(?# 13: STAR )(\*)|\
(?# 14: LBRA )(\[)|\
(?# 15: RBRA )(\])|\
(?# 16: LITERAL )#{Patterns::LITERAL}|\
(?# 17: PERCENT )(%)|\
(?# 18: CRLF )(\r\n)|\
(?# 19: EOF )(\z))/ni

# envelope, body(structure), namespaces
DATA_REGEXP = /\G(?:\
Expand Down Expand Up @@ -311,6 +323,9 @@ def unescape_quoted(quoted)
# string = quoted / literal
def_token_matchers :string, T_QUOTED, T_LITERAL

# used by nstring8 = nstring / literal8
def_token_matchers :string8, T_QUOTED, T_LITERAL, T_LITERAL8

# use where string represents "LABEL" values
def_token_matchers :case_insensitive__string,
T_QUOTED, T_LITERAL,
Expand Down Expand Up @@ -404,6 +419,10 @@ def nstring
NIL? ? nil : string
end

def nstring8
NIL? ? nil : string8
end

def nquoted
NIL? ? nil : quoted
end
Expand Down Expand Up @@ -680,6 +699,8 @@ def msg_att(n)
when "ENVELOPE" then envelope
when "INTERNALDATE" then date_time
when "RFC822.SIZE" then number64
when /\ABINARY\[/ni then nstring8 # BINARY, IMAP4rev2
when /\ABINARY\.SIZE\[/ni then number # BINARY, IMAP4rev2
when "RFC822" then nstring # not in rev2
when "RFC822.HEADER" then nstring # not in rev2
when "RFC822.TEXT" then nstring # not in rev2
Expand All @@ -702,11 +723,18 @@ def msg_att__label
lbra? and rbra
when "BODY"
peek_lbra? and name << section and
peek_str?("<") and name << atom # partial
peek_str?("<") and name << gt__number__lt # partial
when "BINARY", "BINARY.SIZE"
name << section_binary
# see https://www.rfc-editor.org/errata/eid7246 and the note above
peek_str?("<") and name << gt__number__lt # partial
end
name
end

# this represents the partial size for BODY or BINARY
alias gt__number__lt atom

def envelope
@lex_state = EXPR_DATA
token = lookahead
Expand Down Expand Up @@ -1010,6 +1038,13 @@ def section
str << rbra
end

# section-binary = "[" [section-part] "]"
def section_binary
str = +lbra
str << section_part unless peek_rbra?
str << rbra
end

# section-spec = section-msgtext / (section-part ["." section-text])
# section-msgtext = "HEADER" /
# "HEADER.FIELDS" [".NOT"] SP header-list /
Expand Down Expand Up @@ -1040,6 +1075,11 @@ def header_list
str << rpar
end

# section-part = nz-number *("." nz-number)
# ; body part reference.
# ; Allows for accessing nested body parts.
alias section_part atom

# RFC3501 & RFC9051:
# header-fld-name = astring
#
Expand Down Expand Up @@ -1710,42 +1750,47 @@ def next_token
@pos = $~.end(0)
if $1
return Token.new(T_SPACE, $+)
elsif $2 && $6
elsif $2
len = $+.to_i
val = @str[@pos, len]
@pos += len
return Token.new(T_LITERAL8, val)
elsif $3 && $7
# greedily match ATOM, prefixed with NUMBER, NIL, or PLUS.
return Token.new(T_ATOM, $2)
elsif $3
return Token.new(T_NIL, $+)
return Token.new(T_ATOM, $3)
elsif $4
return Token.new(T_NUMBER, $+)
return Token.new(T_NIL, $+)
elsif $5
return Token.new(T_NUMBER, $+)
elsif $6
return Token.new(T_PLUS, $+)
elsif $7
elsif $8
# match ATOM, without a NUMBER, NIL, or PLUS prefix
return Token.new(T_ATOM, $+)
elsif $8
return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
elsif $9
return Token.new(T_LPAR, $+)
return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
elsif $10
return Token.new(T_RPAR, $+)
return Token.new(T_LPAR, $+)
elsif $11
return Token.new(T_BSLASH, $+)
return Token.new(T_RPAR, $+)
elsif $12
return Token.new(T_STAR, $+)
return Token.new(T_BSLASH, $+)
elsif $13
return Token.new(T_LBRA, $+)
return Token.new(T_STAR, $+)
elsif $14
return Token.new(T_RBRA, $+)
return Token.new(T_LBRA, $+)
elsif $15
return Token.new(T_RBRA, $+)
elsif $16
len = $+.to_i
val = @str[@pos, len]
@pos += len
return Token.new(T_LITERAL, val)
elsif $16
return Token.new(T_PERCENT, $+)
elsif $17
return Token.new(T_CRLF, $+)
return Token.new(T_PERCENT, $+)
elsif $18
return Token.new(T_CRLF, $+)
elsif $19
return Token.new(T_EOF, $+)
else
parse_error("[Net::IMAP BUG] BEG_REGEXP is invalid")
Expand Down
Binary file added test/net/imap/fixtures/response_parser/ruby.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
41 changes: 41 additions & 0 deletions test/net/imap/test_imap_response_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -99,4 +99,45 @@ def teardown
# response data, should still use normal tests, below
############################################################################

# Strangly, there are no example responses for BINARY[section] in either
# RFC3516 or RFC9051! The closest I found was RFC5259, and those examples
# aren't FETCH responses.
def test_fetch_binary_and_binary_size
debug, Net::IMAP.debug = Net::IMAP.debug, true
png = File.binread(File.join(TEST_FIXTURE_PATH, "ruby.png"))
size = png.bytesize
parser = Net::IMAP::ResponseParser.new
# with literal8
response = "* 1 FETCH (UID 5 BINARY[3.2] ~{%d}\r\n%s)\r\n".b % [size, png]
parsed = parser.parse response
assert_equal png, parsed.data.attr["BINARY[3.2]"]
assert_equal png.bytesize, parsed.data.attr["BINARY[3.2]"].bytesize
assert_equal Encoding::BINARY, parsed.data.attr["BINARY[3.2]"].encoding
# binary.size and partial
partial = png[0, 32]
response = "* 1 FETCH (BINARY.SIZE[5] %d BINARY[5]<0> ~{32}\r\n%s)\r\n".b %
[png.bytesize, partial]
parsed = parser.parse response
assert_equal png.bytesize, parsed.data.attr["BINARY.SIZE[5]"]
assert_equal 32, parsed.data.attr["BINARY[5]<0>"].bytesize
assert_equal partial, parsed.data.attr["BINARY[5]<0>"]
# test every type of value
literal8 = "\x00 to \xff\r\n".b * 8
literal = "\x01 to \xff\r\n".b * 8
quoted = "\x01 to \x7f\b\t".b * 8
response = "* 1 FETCH (" \
"BINARY[1] ~{%d}\r\n%s " \
"BINARY[2] {%d}\r\n%s " \
"BINARY[3] \"%s\" " \
"BINARY[4] NIL)\r\n".b %
[literal8.bytesize, literal8, literal.bytesize, literal, quoted]
parsed = parser.parse response
assert_equal literal8, parsed.data.attr["BINARY[1]"]
assert_equal literal, parsed.data.attr["BINARY[2]"]
assert_equal quoted, parsed.data.attr["BINARY[3]"]
assert_nil parsed.data.attr["BINARY[4]"]
ensure
Net::IMAP.debug = debug
end

end

0 comments on commit 164c099

Please sign in to comment.