From 5aff7e4e20457aa8d5148b38227f0620e5630e8f Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Sun, 10 Nov 2024 22:55:30 -0500 Subject: [PATCH] json_parser incremental parsing --- doc/ref/corelib/json/operator_at.md | 4 +- include/jsoncons/json_parser.hpp | 445 +++++++++++--------- include/jsoncons/json_reader.hpp | 48 +-- test/corelib/src/error_recovery_tests.cpp | 71 +++- test/corelib/src/json_parse_error_tests.cpp | 3 - test/corelib/src/json_parser_tests.cpp | 77 ++-- test/corelib/src/json_reader_tests.cpp | 5 +- 7 files changed, 356 insertions(+), 297 deletions(-) diff --git a/doc/ref/corelib/json/operator_at.md b/doc/ref/corelib/json/operator_at.md index d72445ca3..53c86c73c 100644 --- a/doc/ref/corelib/json/operator_at.md +++ b/doc/ref/corelib/json/operator_at.md @@ -2,7 +2,7 @@ ```cpp proxy_type operator[](const string_view_type& key); (1) (until 0.179.0) -basic_json& operator[](const string_view_type& key); (since 0.179.0) +reference operator[](const string_view_type& key); (since 0.179.0) const_reference operator[](const string_view_type& key) const; (2) @@ -34,7 +34,7 @@ when this operator is used for reading but the key does not exist. ### Examples -#### Assigning to and` object when the key does not exist +#### Assigning to an object when the key does not exist ```cpp int main() diff --git a/include/jsoncons/json_parser.hpp b/include/jsoncons/json_parser.hpp index a86bcacda..c4cf1ab98 100644 --- a/include/jsoncons/json_parser.hpp +++ b/include/jsoncons/json_parser.hpp @@ -29,11 +29,23 @@ namespace jsoncons { +template +class basic_json_parser_input +{ +public: + using char_type = CharT; + + virtual void update(const char_type* data, std::size_t length) = 0; +}; + +template class chunk_reader { public: + using char_type = CharT; + virtual ~chunk_reader() = default; - virtual bool read_chunk(std::error_code&) + virtual bool read_chunk(basic_json_parser_input&, std::error_code&) { return false; } @@ -63,12 +75,33 @@ enum class json_parse_state : uint8_t }; template > -class basic_json_parser : public ser_context +class basic_json_parser : public ser_context, public virtual basic_json_parser_input { public: using char_type = CharT; using string_view_type = typename basic_json_visitor::string_view_type; + using chunk_reader_type = std::function& input, std::error_code& ec)>; private: + class chunk_reader_adaptor : public chunk_reader + { + chunk_reader_type read_chunk_; + + public: + chunk_reader_adaptor() + : read_chunk_([](basic_json_parser_input&, std::error_code&){return false;}) + { + } + chunk_reader_adaptor(chunk_reader_type read_chunk) + : read_chunk_(read_chunk) + { + } + + bool read_chunk(basic_json_parser_input& input, std::error_code& ec) + { + return read_chunk_(input, ec); + } + }; + struct string_maps_to_double { string_view_type s; @@ -109,8 +142,8 @@ class basic_json_parser : public ser_context std::vector state_stack_; std::vector,double>> string_double_map_; - chunk_reader default_chunk_reader; - chunk_reader* chunk_rdr_ = &default_chunk_reader; + chunk_reader_adaptor chk_rdr_; + chunk_reader* chunk_rdr_; // Noncopyable and nonmoveable basic_json_parser(const basic_json_parser&) = delete; @@ -122,21 +155,35 @@ class basic_json_parser : public ser_context { } + basic_json_parser(chunk_reader_type chunk_rdr, const TempAllocator& temp_alloc = TempAllocator()) + : basic_json_parser(chunk_rdr, basic_json_decode_options(), default_json_parsing(), + temp_alloc) + { + } + basic_json_parser(std::function err_handler, - const TempAllocator& temp_alloc = TempAllocator()) + const TempAllocator& temp_alloc = TempAllocator()) : basic_json_parser(basic_json_decode_options(), err_handler, temp_alloc) { } basic_json_parser(const basic_json_decode_options& options, - const TempAllocator& temp_alloc = TempAllocator()) + const TempAllocator& temp_alloc = TempAllocator()) : basic_json_parser(options, options.err_handler(), temp_alloc) { } basic_json_parser(const basic_json_decode_options& options, - std::function err_handler, - const TempAllocator& temp_alloc = TempAllocator()) + std::function err_handler, + const TempAllocator& temp_alloc = TempAllocator()) + : basic_json_parser(&chk_rdr_, options, err_handler, temp_alloc) + { + } + + basic_json_parser(chunk_reader* chunk_rdr, + const basic_json_decode_options& options, + std::function err_handler, + const TempAllocator& temp_alloc = TempAllocator()) : options_(options), err_handler_(err_handler), nesting_depth_(0), @@ -153,7 +200,9 @@ class basic_json_parser : public ser_context more_(true), done_(false), string_buffer_(temp_alloc), - state_stack_(temp_alloc) + state_stack_(temp_alloc), + chk_rdr_{}, + chunk_rdr_(chunk_rdr) { string_buffer_.reserve(initial_string_buffer_capacity); @@ -175,10 +224,10 @@ class basic_json_parser : public ser_context } } - basic_json_parser(const basic_json_decode_options& options, - std::function err_handler, - chunk_reader* observer, - const TempAllocator& temp_alloc = TempAllocator()) + basic_json_parser(chunk_reader_type chunk_rdr, + const basic_json_decode_options& options, + std::function err_handler, + const TempAllocator& temp_alloc = TempAllocator()) : options_(options), err_handler_(err_handler), nesting_depth_(0), @@ -196,7 +245,8 @@ class basic_json_parser : public ser_context done_(false), string_buffer_(temp_alloc), state_stack_(temp_alloc), - chunk_rdr_(observer) + chk_rdr_(chunk_rdr), + chunk_rdr_(&chk_rdr_) { string_buffer_.reserve(initial_string_buffer_capacity); @@ -285,7 +335,7 @@ class basic_json_parser : public ser_context { if (input_ptr_ == local_input_end) { - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { break; } @@ -324,7 +374,7 @@ class basic_json_parser : public ser_context { if (input_ptr_ == local_input_end) { - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { break; } @@ -548,7 +598,7 @@ class basic_json_parser : public ser_context update(sv.data(),sv.length()); } - void update(const char_type* data, std::size_t length) + void update(const char_type* data, std::size_t length) final { begin_input_ = data; input_end_ = data + length; @@ -602,26 +652,38 @@ class basic_json_parser : public ser_context if (input_ptr_ == local_input_end && more_) { - switch (state_) + if (input_ptr_ == local_input_end) { - case json_parse_state::accept: - visitor.flush(); - done_ = true; - state_ = json_parse_state::done; - more_ = false; - break; - case json_parse_state::start: - case json_parse_state::done: - more_ = false; - break; - case json_parse_state::cr: - state_ = pop_state(); - break; - default: - err_handler_(json_errc::unexpected_eof, *this); - ec = json_errc::unexpected_eof; - more_ = false; + chunk_rdr_->read_chunk(*this, ec); + if (ec) + { return; + } + local_input_end = input_end_; + } + if (input_ptr_ == local_input_end) + { + switch (state_) + { + case json_parse_state::accept: + visitor.flush(); + done_ = true; + state_ = json_parse_state::done; + more_ = false; + break; + case json_parse_state::start: + case json_parse_state::done: + more_ = false; + break; + case json_parse_state::cr: + state_ = pop_state(); + break; + default: + err_handler_(json_errc::unexpected_eof, *this); + ec = json_errc::unexpected_eof; + more_ = false; + return; + } } } @@ -629,12 +691,13 @@ class basic_json_parser : public ser_context { if (input_ptr_ == local_input_end) { - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { break; } local_input_end = input_end_; } + switch (state_) { case json_parse_state::accept: @@ -659,112 +722,112 @@ class basic_json_parser : public ser_context } break; case json_parse_state::start: + { + switch (*input_ptr_) { - switch (*input_ptr_) - { - JSONCONS_ILLEGAL_CONTROL_CHARACTER: - more_ = err_handler_(json_errc::illegal_control_character, *this); - if (!more_) - { - ec = json_errc::illegal_control_character; - return; - } - break; - case '\r': - push_state(state_); - ++input_ptr_; - ++position_; - state_ = json_parse_state::cr; - break; - case '\n': - ++input_ptr_; - ++line_; - ++position_; - mark_position_ = position_; - break; - case ' ':case '\t': - skip_space(ec); - break; - case '/': - ++input_ptr_; - ++position_; - push_state(state_); - state_ = json_parse_state::slash; - break; - case '{': - saved_position_ = position_; - ++input_ptr_; - ++position_; - begin_object(visitor, ec); - if (ec) return; - break; - case '[': - saved_position_ = position_; - ++input_ptr_; - ++position_; - begin_array(visitor, ec); - if (ec) return; - break; - case '\"': - state_ = json_parse_state::string; - saved_position_ = position_; - ++input_ptr_; - ++position_; - string_buffer_.clear(); - parse_string(visitor, ec); - if (ec) return; - break; - case '-': - string_buffer_.clear(); - string_buffer_.push_back('-'); - saved_position_ = position_; - parse_number(visitor, ec); - if (ec) {return;} - break; - case '0': - string_buffer_.clear(); - string_buffer_.push_back(static_cast(*input_ptr_)); - saved_position_ = position_; - parse_number(visitor, ec); - if (ec) {return;} - break; - case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9': - string_buffer_.clear(); - string_buffer_.push_back(static_cast(*input_ptr_)); - saved_position_ = position_; - parse_number(visitor, ec); - if (ec) {return;} - break; - case 'n': - parse_null(visitor, ec); - if (ec) {return;} - break; - case 't': - parse_true(visitor, ec); - if (ec) {return;} - break; - case 'f': - parse_false(visitor, ec); - if (ec) {return;} - break; - case '}': - err_handler_(json_errc::unexpected_rbrace, *this); - ec = json_errc::unexpected_rbrace; - more_ = false; - return; - case ']': - err_handler_(json_errc::unexpected_rbracket, *this); - ec = json_errc::unexpected_rbracket; - more_ = false; + JSONCONS_ILLEGAL_CONTROL_CHARACTER: + more_ = err_handler_(json_errc::illegal_control_character, *this); + if (!more_) + { + ec = json_errc::illegal_control_character; return; - default: - err_handler_(json_errc::syntax_error, *this); - ec = json_errc::syntax_error; - more_ = false; - return; - } + } + break; + case '\r': + push_state(state_); + ++input_ptr_; + ++position_; + state_ = json_parse_state::cr; + break; + case '\n': + ++input_ptr_; + ++line_; + ++position_; + mark_position_ = position_; + break; + case ' ':case '\t': + skip_space(ec); + break; + case '/': + ++input_ptr_; + ++position_; + push_state(state_); + state_ = json_parse_state::slash; + break; + case '{': + saved_position_ = position_; + ++input_ptr_; + ++position_; + begin_object(visitor, ec); + if (ec) return; + break; + case '[': + saved_position_ = position_; + ++input_ptr_; + ++position_; + begin_array(visitor, ec); + if (ec) return; + break; + case '\"': + state_ = json_parse_state::string; + saved_position_ = position_; + ++input_ptr_; + ++position_; + string_buffer_.clear(); + parse_string(visitor, ec); + if (ec) return; + break; + case '-': + string_buffer_.clear(); + string_buffer_.push_back('-'); + saved_position_ = position_; + parse_number(visitor, ec); + if (ec) {return;} + break; + case '0': + string_buffer_.clear(); + string_buffer_.push_back(static_cast(*input_ptr_)); + saved_position_ = position_; + parse_number(visitor, ec); + if (ec) {return;} + break; + case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9': + string_buffer_.clear(); + string_buffer_.push_back(static_cast(*input_ptr_)); + saved_position_ = position_; + parse_number(visitor, ec); + if (ec) {return;} + break; + case 'n': + parse_null(visitor, ec); + if (ec) {return;} + break; + case 't': + parse_true(visitor, ec); + if (ec) {return;} + break; + case 'f': + parse_false(visitor, ec); + if (ec) {return;} + break; + case '}': + err_handler_(json_errc::unexpected_rbrace, *this); + ec = json_errc::unexpected_rbrace; + more_ = false; + return; + case ']': + err_handler_(json_errc::unexpected_rbracket, *this); + ec = json_errc::unexpected_rbracket; + more_ = false; + return; + default: + err_handler_(json_errc::syntax_error, *this); + ec = json_errc::syntax_error; + more_ = false; + return; } break; + } case json_parse_state::expect_comma_or_end: { @@ -1439,7 +1502,7 @@ class basic_json_parser : public ser_context } else { - if (!chunk_rdr_->read_chunk(ec) || (input_end_ - input_ptr_) < m) + if (!chunk_rdr_->read_chunk(*this, ec) || (input_end_ - input_ptr_) < m) { ec = json_errc::invalid_value; more_ = false; @@ -1510,7 +1573,7 @@ class basic_json_parser : public ser_context } else { - if (!chunk_rdr_->read_chunk(ec) || (input_end_ - input_ptr_) < m) + if (!chunk_rdr_->read_chunk(*this, ec) || (input_end_ - input_ptr_) < m) { ec = json_errc::invalid_value; more_ = false; @@ -1581,7 +1644,7 @@ class basic_json_parser : public ser_context } else { - if (!chunk_rdr_->read_chunk(ec) || (input_end_ - input_ptr_) < m) + if (!chunk_rdr_->read_chunk(*this, ec) || (input_end_ - input_ptr_) < m) { ec = json_errc::invalid_value; more_ = false; @@ -1657,7 +1720,7 @@ class basic_json_parser : public ser_context minus_sign: if (JSONCONS_UNLIKELY(input_ptr_ >= local_input_end)) // Buffer exhausted { - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { return; } @@ -1684,7 +1747,7 @@ class basic_json_parser : public ser_context zero: if (JSONCONS_UNLIKELY(input_ptr_ >= local_input_end)) // Buffer exhausted { - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { if (JSONCONS_UNLIKELY(ec)) { @@ -1764,7 +1827,7 @@ class basic_json_parser : public ser_context integer: if (JSONCONS_UNLIKELY(input_ptr_ >= local_input_end)) // Buffer exhausted { - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { if (JSONCONS_UNLIKELY(ec)) { @@ -1844,7 +1907,7 @@ class basic_json_parser : public ser_context fraction1: if (JSONCONS_UNLIKELY(input_ptr_ >= local_input_end)) // Buffer exhausted { - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { return; } @@ -1866,7 +1929,7 @@ class basic_json_parser : public ser_context fraction2: if (JSONCONS_UNLIKELY(input_ptr_ >= local_input_end)) // Buffer exhausted { - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { if (JSONCONS_UNLIKELY(ec)) { @@ -1945,7 +2008,7 @@ class basic_json_parser : public ser_context exp1: if (JSONCONS_UNLIKELY(input_ptr_ >= local_input_end)) // Buffer exhausted { - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { return; } @@ -1976,7 +2039,7 @@ class basic_json_parser : public ser_context exp2: if (JSONCONS_UNLIKELY(input_ptr_ >= local_input_end)) // Buffer exhausted { - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { return; } @@ -1999,7 +2062,7 @@ class basic_json_parser : public ser_context exp3: if (JSONCONS_UNLIKELY(input_ptr_ >= local_input_end)) // Buffer exhausted { - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { if (JSONCONS_UNLIKELY(ec)) { @@ -2086,7 +2149,7 @@ class basic_json_parser : public ser_context { string_buffer_.append(sb,input_ptr_-sb); position_ += (input_ptr_ - sb); - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { return; } @@ -2098,68 +2161,32 @@ class basic_json_parser : public ser_context JSONCONS_ILLEGAL_CONTROL_CHARACTER: { position_ += (input_ptr_ - sb + 1); - more_ = err_handler_(json_errc::illegal_control_character, *this); - if (!more_) + if (!err_handler_(json_errc::illegal_control_character, *this)) { + more_ = false; ec = json_errc::illegal_control_character; - state_ = json_parse_state::string; return; } // recovery - skip string_buffer_.append(sb,input_ptr_-sb); - ++input_ptr_; - state_ = json_parse_state::string; - return; - } - case '\r': - { - position_ += (input_ptr_ - sb + 1); - more_ = err_handler_(json_errc::illegal_character_in_string, *this); - if (!more_) - { - ec = json_errc::illegal_character_in_string; - state_ = json_parse_state::string; - return; - } - // recovery - keep - string_buffer_.append(sb, input_ptr_ - sb + 1); - ++input_ptr_; - push_state(state_); - state_ = json_parse_state::cr; - return; + sb = input_ptr_ + 1; + break; } case '\n': - { - ++line_; - ++position_; - mark_position_ = position_; - more_ = err_handler_(json_errc::illegal_character_in_string, *this); - if (!more_) - { - ec = json_errc::illegal_character_in_string; - state_ = json_parse_state::string; - return; - } - // recovery - keep - string_buffer_.append(sb, input_ptr_ - sb + 1); - ++input_ptr_; - return; - } + case '\r': case '\t': { position_ += (input_ptr_ - sb + 1); - more_ = err_handler_(json_errc::illegal_character_in_string, *this); - if (!more_) + if (!err_handler_(json_errc::illegal_character_in_string, *this)) { + more_ = false; ec = json_errc::illegal_character_in_string; - state_ = json_parse_state::string; return; } - // recovery - keep - string_buffer_.append(sb, input_ptr_ - sb + 1); - ++input_ptr_; - state_ = json_parse_state::string; - return; + // recovery - skip + string_buffer_.append(sb,input_ptr_-sb); + sb = input_ptr_ + 1; + break; } case '\\': { @@ -2196,7 +2223,7 @@ class basic_json_parser : public ser_context { //string_buffer_.append(sb,input_ptr_-sb); //position_ += (input_ptr_ - sb); - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { return; } @@ -2262,7 +2289,7 @@ class basic_json_parser : public ser_context { //string_buffer_.append(sb,input_ptr_-sb); //position_ += (input_ptr_ - sb); - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { return; } @@ -2285,7 +2312,7 @@ class basic_json_parser : public ser_context { //string_buffer_.append(sb,input_ptr_-sb); //position_ += (input_ptr_ - sb); - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { return; } @@ -2308,7 +2335,7 @@ class basic_json_parser : public ser_context { //string_buffer_.append(sb,input_ptr_-sb); //position_ += (input_ptr_ - sb); - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { return; } @@ -2331,7 +2358,7 @@ class basic_json_parser : public ser_context { //string_buffer_.append(sb,input_ptr_-sb); //position_ += (input_ptr_ - sb); - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { return; } @@ -2365,7 +2392,7 @@ class basic_json_parser : public ser_context { //string_buffer_.append(sb,input_ptr_-sb); //position_ += (input_ptr_ - sb); - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { return; } @@ -2393,7 +2420,7 @@ class basic_json_parser : public ser_context { //string_buffer_.append(sb,input_ptr_-sb); //position_ += (input_ptr_ - sb); - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { return; } @@ -2420,7 +2447,7 @@ class basic_json_parser : public ser_context { //string_buffer_.append(sb,input_ptr_-sb); //position_ += (input_ptr_ - sb); - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { return; } @@ -2443,7 +2470,7 @@ class basic_json_parser : public ser_context { //string_buffer_.append(sb,input_ptr_-sb); //position_ += (input_ptr_ - sb); - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { return; } @@ -2466,7 +2493,7 @@ class basic_json_parser : public ser_context { //string_buffer_.append(sb,input_ptr_-sb); //position_ += (input_ptr_ - sb); - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { return; } @@ -2489,7 +2516,7 @@ class basic_json_parser : public ser_context { //string_buffer_.append(sb,input_ptr_-sb); //position_ += (input_ptr_ - sb); - if (!chunk_rdr_->read_chunk(ec)) + if (!chunk_rdr_->read_chunk(*this, ec)) { return; } @@ -2804,6 +2831,8 @@ class basic_json_parser : public ser_context using json_parser = basic_json_parser; using wjson_parser = basic_json_parser; +using json_parser_input = basic_json_parser_input; +using wjson_parser_input = basic_json_parser_input; } #endif diff --git a/include/jsoncons/json_reader.hpp b/include/jsoncons/json_reader.hpp index 1dedc98b9..38c1de300 100644 --- a/include/jsoncons/json_reader.hpp +++ b/include/jsoncons/json_reader.hpp @@ -149,7 +149,7 @@ namespace jsoncons { }; template ,typename TempAllocator =std::allocator> - class basic_json_reader final : public chunk_reader + class basic_json_reader final : public chunk_reader { public: using char_type = CharT; @@ -264,12 +264,12 @@ namespace jsoncons { const TempAllocator& temp_alloc = TempAllocator()) : source_(std::forward(source)), visitor_(visitor), - parser_(options, err_handler, this, temp_alloc), + parser_(this, options, err_handler, temp_alloc), eof_(false) { } - virtual bool read_chunk(std::error_code& ec) + bool read_chunk(basic_json_parser_input&, std::error_code& ec) final { //std::cout << "UPDATE BUFFER\n"; bool success = false; @@ -306,32 +306,18 @@ namespace jsoncons { return; } parser_.reset(); - while (!parser_.stopped()) + auto s = source_.read_buffer(ec); + if (ec) return; + if (s.size() > 0) { - if (parser_.source_exhausted()) - { - auto s = source_.read_buffer(ec); - if (ec) return; - if (s.size() > 0) - { - parser_.update(s.data(),s.size()); - } - } - bool eof = parser_.source_exhausted(); - parser_.parse_some(visitor_, ec); - if (ec) return; - if (eof) - { - if (parser_.enter()) - { - break; - } - else if (!parser_.accept()) - { - ec = json_errc::unexpected_eof; - return; - } - } + parser_.update(s.data(),s.size()); + } + parser_.parse_some(visitor_, ec); + if (ec) return; + if (!parser_.enter() && !parser_.accept()) + { + ec = json_errc::unexpected_eof; + return; } while (!source_.eof()) @@ -339,11 +325,11 @@ namespace jsoncons { parser_.skip_whitespace(ec); if (parser_.source_exhausted()) { - auto s = source_.read_buffer(ec); + auto s1 = source_.read_buffer(ec); if (ec) return; - if (s.size() > 0) + if (s1.size() > 0) { - parser_.update(s.data(),s.size()); + parser_.update(s1.data(),s1.size()); } } else diff --git a/test/corelib/src/error_recovery_tests.cpp b/test/corelib/src/error_recovery_tests.cpp index e706c8b2d..e9c046c77 100644 --- a/test/corelib/src/error_recovery_tests.cpp +++ b/test/corelib/src/error_recovery_tests.cpp @@ -13,6 +13,7 @@ using namespace jsoncons; +#if 0 TEST_CASE("test_array_extra_comma") { allow_trailing_commas err_handler; @@ -48,20 +49,70 @@ TEST_CASE("test_object_extra_comma") CHECK(val == expected); } - -TEST_CASE("test_name_without_quotes") +#endif +TEST_CASE("test json_parser error recovery") { - //allow_trailing_commas err_handler; - - /*json val = json::parse(R"( + SECTION("illegal control character") { - first : 1, - second : 2 + auto err_handler = [](const std::error_code& ec, const ser_context&) noexcept -> bool + { + return ec == json_errc::illegal_control_character; + }; + + std::string str; + str.push_back('"'); + str.push_back('C'); + str.push_back(0x0e); + str.push_back('a'); + str.push_back('t'); + str.push_back('"'); + auto j = jsoncons::json::parse(str, err_handler); + REQUIRE(j.is_string()); + CHECK(j.as_string() == "Cat"); } - )", - err_handler); - std::cout << val << std::endl;*/ + SECTION("\r") + { + auto err_handler = [](const std::error_code& ec, const ser_context&) noexcept -> bool + { + return ec == json_errc::illegal_character_in_string; + }; + + + std::string str; + str.push_back('C'); + str.push_back('\r'); + str.push_back('a'); + str.push_back('t'); + std::string str2; + str2.push_back('"'); + str2.append(str); + str2.push_back('"'); + auto j = jsoncons::json::parse(str2, err_handler); + REQUIRE(j.is_string()); + CHECK(j.as_string() == "Cat"); + } + SECTION("\n") + { + auto err_handler = [](const std::error_code& ec, const ser_context&) noexcept -> bool + { + return ec == json_errc::illegal_character_in_string; + }; + + + std::string str; + str.push_back('C'); + str.push_back('\n'); + str.push_back('a'); + str.push_back('t'); + std::string str2; + str2.push_back('"'); + str2.append(str); + str2.push_back('"'); + auto j = jsoncons::json::parse(str2, err_handler); + REQUIRE(j.is_string()); + CHECK(j.as_string() == "Cat"); + } } diff --git a/test/corelib/src/json_parse_error_tests.cpp b/test/corelib/src/json_parse_error_tests.cpp index d151f97df..c0b53d0db 100644 --- a/test/corelib/src/json_parse_error_tests.cpp +++ b/test/corelib/src/json_parse_error_tests.cpp @@ -234,6 +234,3 @@ TEST_CASE("test_positive_integer_overflow") CHECK(s2 == j2.as()); } - - - diff --git a/test/corelib/src/json_parser_tests.cpp b/test/corelib/src/json_parser_tests.cpp index 98f4a3112..cce1915e5 100644 --- a/test/corelib/src/json_parser_tests.cpp +++ b/test/corelib/src/json_parser_tests.cpp @@ -25,7 +25,6 @@ TEST_CASE("Test cyrillic.json") REQUIRE(is); json j = json::parse(is); } -#if 0 TEST_CASE("test_object2") { @@ -232,47 +231,43 @@ TEST_CASE("test_parse_null") json j = decoder.get_result(); } -TEST_CASE("test_parse_array_string") -{ - jsoncons::json_decoder decoder; - json_parser parser; - - parser.reset(); - - static std::string s1("[\"\""); - - parser.update(s1.data(),s1.length()); - parser.parse_some(decoder); - CHECK_FALSE(parser.done()); - static std::string s2("]"); - parser.update(s2.data(), s2.length()); - parser.parse_some(decoder); - parser.finish_parse(decoder); - CHECK(parser.done()); - - json j = decoder.get_result(); -} - TEST_CASE("test_incremental_parsing") { - jsoncons::json_decoder decoder; - json_parser parser; - - parser.reset(); - - parser.update("[fal",4); - parser.parse_some(decoder); - CHECK_FALSE(parser.done()); - CHECK(parser.source_exhausted()); - parser.update("se]",3); - parser.parse_some(decoder); - - parser.finish_parse(decoder); - CHECK(parser.done()); - - json j = decoder.get_result(); - REQUIRE(j.is_array()); - CHECK_FALSE(j[0].as()); + SECTION("Array of strings") + { + std::vector chunks = {"[fal", "se]"}; + std::size_t index = 0; + + auto read_chunk = [&](jsoncons::json_parser_input& input, std::error_code& /*ec*/) -> bool + { + if (index < chunks.size()) + { + input.update(chunks[index].data(), chunks[index].size()); + ++index; + return true; + } + else + { + return false; + } + }; + + jsoncons::json_decoder decoder; + json_parser parser{read_chunk}; + + parser.reset(); + + parser.parse_some(decoder); + CHECK_FALSE(parser.done()); + CHECK(parser.source_exhausted()); + parser.parse_some(decoder); + parser.finish_parse(decoder); + CHECK(parser.done()); + + json j = decoder.get_result(); + REQUIRE(j.is_array()); + CHECK_FALSE(j[0].as()); + } } TEST_CASE("test_parser_reinitialization") @@ -339,4 +334,4 @@ TEST_CASE("test_diagnostics_visitor", "") CHECK(os.str() == expected.str()); } } -#endif + diff --git a/test/corelib/src/json_reader_tests.cpp b/test/corelib/src/json_reader_tests.cpp index e9f505e22..5cb668002 100644 --- a/test/corelib/src/json_reader_tests.cpp +++ b/test/corelib/src/json_reader_tests.cpp @@ -33,7 +33,7 @@ TEST_CASE("test json_reader buffered read") CHECK(j.as() == str); } - SECTION("number with split buffer") + /*SECTION("number with split buffer") { std::string str(stream_source::default_max_buffer_size-7, 'a'); std::string neg_num("-123456789.123456789"); @@ -111,7 +111,7 @@ TEST_CASE("test json_reader buffered read") REQUIRE(j.is_array()); REQUIRE(j.size() == 2); CHECK(j[1].is_null()); - } + }*/ } void test_json_reader_error(const std::string& text, const std::error_code& ec) @@ -403,3 +403,4 @@ TEST_CASE("json_reader stateful allocator tests") } } #endif +