From d84b39ab4c4199ca74d49818972c3f9979cc5e85 Mon Sep 17 00:00:00 2001 From: nick evans Date: Mon, 13 Feb 2023 09:59:12 -0500 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Add=20BINARY=20FETCH=20support=20[?= =?UTF-8?q?=F0=9F=9A=A7=20docs]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lex and parse LITERAL8, string8, nstring8. Add section_binary and section_part The BINARY extension isn't _fully_ supported; that requires updates to the APPEND command. But this should be sufficient for IMAP4rev2, which only requires the FETCH part of the extension. --- lib/net/imap/response_parser.rb | 115 ++++++++++++------ .../imap/fixtures/response_parser/ruby.png | Bin 0 -> 5365 bytes test/net/imap/test_imap_response_parser.rb | 41 +++++++ 3 files changed, 121 insertions(+), 35 deletions(-) create mode 100644 test/net/imap/fixtures/response_parser/ruby.png diff --git a/lib/net/imap/response_parser.rb b/lib/net/imap/response_parser.rb index 08a0e9200..18f45dd19 100644 --- a/lib/net/imap/response_parser.rb +++ b/lib/net/imap/response_parser.rb @@ -54,6 +54,7 @@ def parse(str) T_STAR = :STAR # atom special; list wildcard T_PERCENT = :PERCENT # atom special; list wildcard T_LITERAL = :LITERAL # starts with atom special + T_LITERAL8 = :LITERAL8 # starts with atom char "~" T_CRLF = :CRLF # atom special; text special; quoted special T_TEXT = :TEXT # any char except CRLF T_EOF = :EOF # end of response string @@ -231,6 +232,16 @@ module RFC3629 # ; sent from server to the client. LITERAL = /\{(\d+)\}\r\n/n + # RFC3516 (BINARY): + # literal8 = "~{" number "}" CRLF *OCTET + # ; represents the number of OCTETs + # ; in the response string. + # RFC9051: + # literal8 = "~{" number64 "}" CRLF *OCTET + # ; represents the number of OCTETs + # ; in the response string. + LITERAL8 = /~\{(\d+)\}\r\n/n + module_function def unescape_quoted!(quoted) @@ -250,27 +261,28 @@ def unescape_quoted(quoted) # the default, used in most places BEG_REGEXP = /\G(?:\ (?# 1: SPACE )( )|\ -(?# 2: ATOM prefixed with a compatible subtype)\ +(?# 2: LITERAL8)#{Patterns::LITERAL8}|\ +(?# 3: ATOM prefixed with a compatible subtype)\ ((?:\ -(?# 3: NIL )(NIL)|\ -(?# 4: NUMBER )(\d+)|\ -(?# 5: PLUS )(\+))\ -(?# 6: ATOM remaining after prefix )(#{Patterns::ATOMISH})?\ +(?# 4: NIL )(NIL)|\ +(?# 5: NUMBER )(\d+)|\ +(?# 6: PLUS )(\+))\ +(?# 7: ATOM remaining after prefix )(#{Patterns::ATOMISH})?\ (?# This enables greedy alternation without lookahead, in linear time.)\ )|\ (?# Also need to check for ATOM without a subtype prefix.)\ -(?# 7: ATOM )(#{Patterns::ATOMISH})|\ -(?# 8: QUOTED )#{Patterns::QUOTED_rev2}|\ -(?# 9: LPAR )(\()|\ -(?# 10: RPAR )(\))|\ -(?# 11: BSLASH )(\\)|\ -(?# 12: STAR )(\*)|\ -(?# 13: LBRA )(\[)|\ -(?# 14: RBRA )(\])|\ -(?# 15: LITERAL )#{Patterns::LITERAL}|\ -(?# 16: PERCENT )(%)|\ -(?# 17: CRLF )(\r\n)|\ -(?# 18: EOF )(\z))/ni +(?# 8: ATOM )(#{Patterns::ATOMISH})|\ +(?# 9: QUOTED )#{Patterns::QUOTED_rev2}|\ +(?# 10: LPAR )(\()|\ +(?# 11: RPAR )(\))|\ +(?# 12: BSLASH )(\\)|\ +(?# 13: STAR )(\*)|\ +(?# 14: LBRA )(\[)|\ +(?# 15: RBRA )(\])|\ +(?# 16: LITERAL )#{Patterns::LITERAL}|\ +(?# 17: PERCENT )(%)|\ +(?# 18: CRLF )(\r\n)|\ +(?# 19: EOF )(\z))/ni # envelope, body(structure), namespaces DATA_REGEXP = /\G(?:\ @@ -311,6 +323,9 @@ def unescape_quoted(quoted) # string = quoted / literal def_token_matchers :string, T_QUOTED, T_LITERAL + # used by nstring8 = nstring / literal8 + def_token_matchers :string8, T_QUOTED, T_LITERAL, T_LITERAL8 + # use where string represents "LABEL" values def_token_matchers :case_insensitive__string, T_QUOTED, T_LITERAL, @@ -404,6 +419,10 @@ def nstring NIL? ? nil : string end + def nstring8 + NIL? ? nil : string8 + end + def nquoted NIL? ? nil : quoted end @@ -680,6 +699,8 @@ def msg_att(n) when "ENVELOPE" then envelope when "INTERNALDATE" then date_time when "RFC822.SIZE" then number64 + when /\ABINARY\[/ni then nstring8 # BINARY, IMAP4rev2 + when /\ABINARY\.SIZE\[/ni then number # BINARY, IMAP4rev2 when "RFC822" then nstring # not in rev2 when "RFC822.HEADER" then nstring # not in rev2 when "RFC822.TEXT" then nstring # not in rev2 @@ -702,11 +723,18 @@ def msg_att__label lbra? and rbra when "BODY" peek_lbra? and name << section and - peek_str?("<") and name << atom # partial + peek_str?("<") and name << gt__number__lt # partial + when "BINARY", "BINARY.SIZE" + name << section_binary + # see https://www.rfc-editor.org/errata/eid7246 and the note above + peek_str?("<") and name << gt__number__lt # partial end name end + # this represents the partial size for BODY or BINARY + alias gt__number__lt atom + def envelope @lex_state = EXPR_DATA token = lookahead @@ -1010,6 +1038,13 @@ def section str << rbra end + # section-binary = "[" [section-part] "]" + def section_binary + str = +lbra + str << section_part unless peek_rbra? + str << rbra + end + # section-spec = section-msgtext / (section-part ["." section-text]) # section-msgtext = "HEADER" / # "HEADER.FIELDS" [".NOT"] SP header-list / @@ -1040,6 +1075,11 @@ def header_list str << rpar end + # section-part = nz-number *("." nz-number) + # ; body part reference. + # ; Allows for accessing nested body parts. + alias section_part atom + # RFC3501 & RFC9051: # header-fld-name = astring # @@ -1710,42 +1750,47 @@ def next_token @pos = $~.end(0) if $1 return Token.new(T_SPACE, $+) - elsif $2 && $6 + elsif $2 + len = $+.to_i + val = @str[@pos, len] + @pos += len + return Token.new(T_LITERAL8, val) + elsif $3 && $7 # greedily match ATOM, prefixed with NUMBER, NIL, or PLUS. - return Token.new(T_ATOM, $2) - elsif $3 - return Token.new(T_NIL, $+) + return Token.new(T_ATOM, $3) elsif $4 - return Token.new(T_NUMBER, $+) + return Token.new(T_NIL, $+) elsif $5 + return Token.new(T_NUMBER, $+) + elsif $6 return Token.new(T_PLUS, $+) - elsif $7 + elsif $8 # match ATOM, without a NUMBER, NIL, or PLUS prefix return Token.new(T_ATOM, $+) - elsif $8 - return Token.new(T_QUOTED, Patterns.unescape_quoted($+)) elsif $9 - return Token.new(T_LPAR, $+) + return Token.new(T_QUOTED, Patterns.unescape_quoted($+)) elsif $10 - return Token.new(T_RPAR, $+) + return Token.new(T_LPAR, $+) elsif $11 - return Token.new(T_BSLASH, $+) + return Token.new(T_RPAR, $+) elsif $12 - return Token.new(T_STAR, $+) + return Token.new(T_BSLASH, $+) elsif $13 - return Token.new(T_LBRA, $+) + return Token.new(T_STAR, $+) elsif $14 - return Token.new(T_RBRA, $+) + return Token.new(T_LBRA, $+) elsif $15 + return Token.new(T_RBRA, $+) + elsif $16 len = $+.to_i val = @str[@pos, len] @pos += len return Token.new(T_LITERAL, val) - elsif $16 - return Token.new(T_PERCENT, $+) elsif $17 - return Token.new(T_CRLF, $+) + return Token.new(T_PERCENT, $+) elsif $18 + return Token.new(T_CRLF, $+) + elsif $19 return Token.new(T_EOF, $+) else parse_error("[Net::IMAP BUG] BEG_REGEXP is invalid") diff --git a/test/net/imap/fixtures/response_parser/ruby.png b/test/net/imap/fixtures/response_parser/ruby.png new file mode 100644 index 0000000000000000000000000000000000000000..a63d57dfdd62d32c2c9000a4634df8daa2421cc6 GIT binary patch literal 5365 zcmVvebszBmIp6nn_kGOk-kIIm)$U4KZxDh)fE~%$CR%KSL6{1J1SDY|206y2%1&H% z6haloB{?PmhZu!`LWE>ql_G(ajS)DKZ1Ad9D0;za#cHM1t}X3jc4l^7_ubv!cTV=+ zxy|g1UMt&BKKgX(bl07l-*@J8_r15)sw#dwp(;LyC`433KZiiZfT+I!= zFa|7uBC3A0cI|t|{_TgQ+m%jRw`|iOhB%*{>1KI552^2kJhusitSD89oS?O^eESW5 zc<}4rxMt7S#&_=ekp369d{!=RSEWaX$HWFc?$Z({)azv`Op2qTe&lzrJNeE3>wxkU*cegxpoT~Z z5Csu1(1W1LNfeMR{K8v_6f*?eys!VVo26CbtBuok*iiq2PdDEx(DAR1rv7NdZfWp&lR=hVS)I#6SIa|9<+i%l_i3s~fX(qqT|> z10WzK0ICX6&Anzsc~Fkg*#Eum(qd_%_IIJ5-uGAcT=R~VW3!{OhSBN&9HU8LP@yLZ3!mj8Cg?|yjh(Z{P}30teGqQL6w2iYn}bW~06 zbZhqE{TJ;1`4>z-@;^Uy@2~wPfYD6^g(@)c>e+e%bU5X#{Yn8D^j?Yb4AF~T0-*EV zJN*2NoSq4bbEUGo;%z_s-P`%egMX8R%*Ih)4|qU80q8G+PohSzyKv<23#RY?z3aYr z!^abgq`uZ{BB-v{7sI`_4G;rLDk1?$F1a*+c<=HTJ^^NFw?enOoaTwMue@loREmxq z{M*p$p3#T1-GC6pa1ZD#FTb!^zwbA%J@Bbps%5w;NdRGuyHPI+Cg?^z6q*LNnM*r& zE`0IRohP0^oCL4pMRMOtJ?GiBBytx_JwCJ2ZZ@oQLId|8OMg)g_}sq$`aSQv`jK0| zGBySqlS-jHcRdCLQ?`idcfFAr^LO2e@~AeNA@d>k>cg2(5qLl1%!@bIA1{{{X6GZz zVoZRp(mhB3Vm?>;J@0?#fm^>aJ`NtZ2f)9JUL|;-{?D&!9@xL~#A37+(&y?RhS#)` z$ufx+PF*xwJG^D{$%Q5MDLWU6`+WcibWZg9f9E&$fA-50W1xss3hO!O6@aJ`84Our zNh>d&9RJzNAG!MV;*e!I00p!kqzcv5gj~sYczH0gY?=C$y z@5TlPLNQaJ?aM+kkhs9H-&5`g}B@B10ziNwU>Tp&ux z`#duuwMvD@o~7&a-+y55^`EFFkbznWX1$W0Pp<$C76?AouE(oy{$%BXT>uV$`>&Q9 zy6Ol4;-xQCDBxAq3lOxwJU9>uIOKWA@&G7tYAhok8J~c)&rHALH-4oDj8w@B3=JFv z22eopeEPLlDjE%3{LVLDaP4nl5F;1A?8w2Vw7J3{2QU#6Ku`%P0Z@Q};ZsMTOw?aM z06Az3tWL`_&>#8otq1S@!KNzt04A(PSmmN%otqwv#+**Sa_6OA*n=W=yzUK$zIJ=8 zfB}Onl!_k=pyDG5F`yVm@d89aBOt2RpMkCt{`$iow17qk#UCC8k&E8b^XXN=jMki1 zCabTx>pmj(v&U4#)#~+K7nTjA2UAuhDBuQq4;ma%(I8zNO)B~zM3pR8z}26r>kobY zb4UJWZn{h%6x|Sj``I&0v4Q8+lSTWK+8D2U=s;YnxA)yHX^)5`j>_!Eeqzhw<1+?; zY6cQQH2fv2{)Y{k+AS zZ;-45|=6(0#r1J87+;C#>PSelvnWx zKn^wp3qAWmwps>)5lwnN0QMQ_CvLy343x*?m2U1uz-00CK?dkq^)P6h(xg^-)BO*P z_{PzHeI?>DmqyTRs}fWds?BDloFrxPsj`6#`$|N}K^e#Z3{{|52u$TpaEffeO`d+4 zcJJF)uOg3Q0-Fxj5-+%N!{2Gr>v5nB^ zKvBG_MGmztD%YnLkImU)HkA4qgE}J+m@qg81R~?Llz9OZFi-D#K0iw*o~n%}ooINb z(L)*nZRq{_zW@PDAV$b_WdS>0{i-X!^PTpdFCF^*e`BY&a|Z1-po-ytgA_tCI$Ey8 zvjGNBkernj08s#BRj++|wVNA7D7YIh({%P^RH=++sZk(ur0DlP%~CH)a0Kw$xDRrn zH%J@W>fn<1zianRUpV>k-#hw8HzenupLadleYavbDUjg?w9!edHDi(FnX*w@b1N0s zDB!`~QmN-lUW%I!4ADlqdT%#Ra}?3?BGPPlUeVC37X*M+3`2@N7y#Nl1q;0PbDw$1 z`+ohg_rLqt=e}6lwJB|-kRG%j08|u3siIzjt&Un|ub{%JfY2bL$OI4~C;%;fvniD} z(oLxnZ`tNcWwMs$=Lk87h(H7kdS7%JT0HLZwzj)C@1hIexaYRXm%RMJD_(u-FZY$t z8_n7&L{_Cx6rsaX5hM_Gl4^Ci+p~b249Hf#EtL=gfZ=ej zkK&j>h)HA70GM@iQcTXkZ3&J-U|M_M3X~`7WbGPV+AW4wU;2t>x5H5kA~09yPEjud z+-mCq^g@ta8(jq8#Xob|FZ{;6l)Y%7MDeW~P!vQo+!QEl zHsi6H8L5VL2iD4Zw+As0Q&5F6=>X0|Mzu0I31E%h*>{&L%~xOXbJ@aNe)2dG2>~FQ zKZS0O$N?(dZnKFNu=8aXUw+*OU;fT_18Cf{ci%5w(QD_W%}Ma0svz3f>B0{Y5NWX& z8v+N(i3TvpAS)rDA2AC60YFg8Q7&va_n}m;ZKSvE{QC6uH%`9gn$utT6UkCHQPVs> zQYoIn!k6!ju6T7b%yXfM=A|O7o=WC|};!%_J@|Za`E+QBS z$a8@zk)oJ26s<1Wuc{{G*4x0$AQ`x2;Eo6mA{d51$#7(#hAptJ>+18o)z*o4dGB9u ze&S%X>m~Io-%@`ALc0kz9LHo#@w{#M#uoHj8z(<| z(W^1DJ^8d_?%3{<#_(d`SYHdOG+;9 z<^yp+KMR6r@R~$RGb=vNjkAD20U!*BgpEOf;H)V25CL5cPBH4K6m*uU!|D|4%?f~k z0Lh^dK`UqI<7;&G`I6VJ?r*nr-UbwTKwR(NAiUt|zl~r1ubZNO~4FL+M z4zfT!U;)M2!${ZZ*9rwPLREBorZEvS?a1h4ge8&)ptVs!u@}mqq!Lezn{s81-s7bB=Id|G@Q)?z0Z)*w&-Qo?AE zZ61zU41*{p`U$1uCZ@yVDa?i#7^i951*1?CH*?u_(=c zNC#A{<>hAXeXtBEx~|0*JYZ?4EW|~pix=dCdeE(bnMn;hqJBZJ#4qAFQ9crS7Kn(Y zc&ls(9SJHZ6?euVJsul%fJ%U|T0Hy|GP9D7BqsAgWEH3o$uw?t1BGm``@xphI*I`G z0D)zF%P}Z&K%iR!O(~OMWwK06WCnU7faoHVl!JLBYj;%=qGIHbsrrCqVm-aOZHJ4Y zUSLTIhcQ^{W{WLXCd~!I#)tloce^QDUN!$P7oA2ZzE07#nLz+Dz$vyUkAs3?Vw~+R z^c09*l$5I0-q%@aODMAe6hN-h^GG&yef*+}lgV0pX3-^(;`<^nGB5M(d~3vMkX0RP zXhS3j0kZCKtyCPUB9IceaKnRg=%~`Pni6vuK+UTAX?jCQft{sf#F@Q~rA7!NhMA}n zv@1`eyJ~gvl|Qw3;?5G0s20;n%(9$?5fFJFKpFzaNt#2t9gbOb-JlE3+DEYy0aCyy zwt{OgW_3sk@w4ffsHeI;DZRWjP^oKBt9pTO!JcR%pq53QV9F4tpG1w#jVfi^(SYw$i~nJrs|5>$WXo{l>rR zeV2aw+jrmkrMiJBl~hy-0D01GHL12cFh-dmqJ|XMa7Tv%1ThB25}AIj>W7zsrhup_ zG>D2wP+9v2QXWtcU=$W`80HwL0Oc5^3hEPMKe?&q%AL+edJ(^H%NK)l_k8x&Xeljk zuCR-tDk9yHq%>y|7fA@9p&JTPC8z>{;E4n*K{ZRoLlr9h900*EE5LvO2(SPzm=twkPizX+dEn7`>BzCS+u9Qbc%M%l&sZCezIG?s`$3}V)ulpaj?6~3$ z`#$;yGv7bZ(Y9llGJMIJij6v*UUNx+9LNSM9b`d24Adf616Pe)y=shgk#nU|G%}J* z)RL*G($;O!*qAdmN|LBjNyf(GdcCxHYcf4;tTV>IIT(W%5*F*@qPM;M%sTqbZ_Iq} z{?msKtsFhlSzfG`M@lXAb2B_zt&Wem+JxJ@seInHs8+YkHj3j)B_10~>h<#UmeTYV z<03YOorQJyk%h%G#m+ar>C6HE5u|B26RKb$YfYRm{!xU*a|bg@B?B1zql^C!SmWN* T<^cYc00000NkvXXu0mjfRn|2> literal 0 HcmV?d00001 diff --git a/test/net/imap/test_imap_response_parser.rb b/test/net/imap/test_imap_response_parser.rb index 1e42ee3e4..be7125e44 100644 --- a/test/net/imap/test_imap_response_parser.rb +++ b/test/net/imap/test_imap_response_parser.rb @@ -99,4 +99,45 @@ def teardown # response data, should still use normal tests, below ############################################################################ + # Strangly, there are no example responses for BINARY[section] in either + # RFC3516 or RFC9051! The closest I found was RFC5259, and those examples + # aren't FETCH responses. + def test_fetch_binary_and_binary_size + debug, Net::IMAP.debug = Net::IMAP.debug, true + png = File.binread(File.join(TEST_FIXTURE_PATH, "ruby.png")) + size = png.bytesize + parser = Net::IMAP::ResponseParser.new + # with literal8 + response = "* 1 FETCH (UID 5 BINARY[3.2] ~{%d}\r\n%s)\r\n".b % [size, png] + parsed = parser.parse response + assert_equal png, parsed.data.attr["BINARY[3.2]"] + assert_equal png.bytesize, parsed.data.attr["BINARY[3.2]"].bytesize + assert_equal Encoding::BINARY, parsed.data.attr["BINARY[3.2]"].encoding + # binary.size and partial + partial = png[0, 32] + response = "* 1 FETCH (BINARY.SIZE[5] %d BINARY[5]<0> ~{32}\r\n%s)\r\n".b % + [png.bytesize, partial] + parsed = parser.parse response + assert_equal png.bytesize, parsed.data.attr["BINARY.SIZE[5]"] + assert_equal 32, parsed.data.attr["BINARY[5]<0>"].bytesize + assert_equal partial, parsed.data.attr["BINARY[5]<0>"] + # test every type of value + literal8 = "\x00 to \xff\r\n".b * 8 + literal = "\x01 to \xff\r\n".b * 8 + quoted = "\x01 to \x7f\b\t".b * 8 + response = "* 1 FETCH (" \ + "BINARY[1] ~{%d}\r\n%s " \ + "BINARY[2] {%d}\r\n%s " \ + "BINARY[3] \"%s\" " \ + "BINARY[4] NIL)\r\n".b % + [literal8.bytesize, literal8, literal.bytesize, literal, quoted] + parsed = parser.parse response + assert_equal literal8, parsed.data.attr["BINARY[1]"] + assert_equal literal, parsed.data.attr["BINARY[2]"] + assert_equal quoted, parsed.data.attr["BINARY[3]"] + assert_nil parsed.data.attr["BINARY[4]"] + ensure + Net::IMAP.debug = debug + end + end