From b27ea39eb439fb2b0c20d74186b639993042b396 Mon Sep 17 00:00:00 2001 From: Zoltan Herczeg Date: Tue, 2 Nov 2021 07:53:30 +0000 Subject: [PATCH] Improve location info for syntax errors. JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com --- docs/02.API-REFERENCE.md | 91 +++++++++++- jerry-core/api/jerry.c | 58 ++++++++ jerry-core/ecma/base/ecma-extended-info.c | 6 +- jerry-core/ecma/base/ecma-extended-info.h | 6 +- jerry-core/ecma/base/ecma-gc.c | 12 ++ jerry-core/ecma/base/ecma-globals.h | 14 ++ jerry-core/include/jerryscript-core.h | 2 + jerry-core/include/jerryscript-types.h | 10 ++ jerry-core/jmem/jmem.h | 2 +- jerry-core/lit/lit-magic-strings.h | 1 + jerry-core/parser/js/js-lexer.c | 116 ++++++++------- jerry-core/parser/js/js-parser-internal.h | 3 + jerry-core/parser/js/js-parser.c | 74 ++++++---- jerry-main/main-utils.c | 144 ++++++++++--------- tests/unit-core/CMakeLists.txt | 1 + tests/unit-core/test-api.c | 26 +--- tests/unit-core/test-syntax-error-location.c | 136 ++++++++++++++++++ 17 files changed, 533 insertions(+), 169 deletions(-) create mode 100644 tests/unit-core/test-syntax-error-location.c diff --git a/docs/02.API-REFERENCE.md b/docs/02.API-REFERENCE.md index 3591953a82..8454056261 100644 --- a/docs/02.API-REFERENCE.md +++ b/docs/02.API-REFERENCE.md @@ -1462,6 +1462,30 @@ typedef struct - [jerry_source_info_enabled_fields_t](#jerry_source_info_enabled_fields_t) - [jerry_get_source_info](#jerry_get_source_info) +## jerry_syntax_error_location_t + +**Summary** + +Detailed location info for SyntaxErrors. It contains the +start and end location of the token which caused the SyntaxError. + +**Prototype** + +```c +typedef struct +{ + uint32_t line; /**< start line of the invalid token */ + uint32_t column_start; /**< start column of the invalid token */ + uint32_t column_end; /**< end column of the invalid token */ +} jerry_syntax_error_location_t; +``` + +*New in version [[NEXT_RELEASE]]*. + +**See also** + +- [jerry_get_syntax_error_location](#jerry_get_syntax_error_location) + ## jerry_arraybuffer_type_t **Summary** @@ -12236,8 +12260,8 @@ Returns a newly created source info structure corresponding to the passed script The function is lower level than `toString()` operation, but provides more contextual information. *Notes*: -- Returned value must be freed with [jerry_free_source_info](#jerry_free_source_info) when it -is no longer needed. +- Returned value must be freed with [jerry_free_source_info](#jerry_free_source_info) + when it is no longer needed. - This API depends on a build option (`JERRY_FUNCTION_TO_STRING`) and can be checked in runtime with the `JERRY_FEATURE_FUNCTION_TO_STRING` feature enum value, see: [jerry_is_feature_enabled](#jerry_is_feature_enabled). @@ -12320,6 +12344,69 @@ See [jerry_get_source_info](#jerry_get_source_info) - [jerry_get_source_info](#jerry_get_source_info) - [jerry_source_info_t](#jerry_source_info_t) +## jerry_get_syntax_error_location + +**Summary** + +Gets the resource name and location info assigned to a SyntaxError object generated by the parser. + +*Notes*: +- Returned value must be freed with [jerry_release_value](#jerry_release_value) + when it is no longer needed. +- This API depends on a build option (`JERRY_ERROR_MESSAGES`) and can be checked + in runtime with the `JERRY_FEATURE_ERROR_MESSAGES` feature enum value, + see: [jerry_is_feature_enabled](#jerry_is_feature_enabled). + +**Prototype** + +```c +jerry_value_t jerry_get_syntax_error_location (jerry_value_t value, + jerry_syntax_error_location_t *error_location_p); +``` +- `value` - SyntaxError object +- `error_location_p` - output location info +- return + - resource name - if the `value` object has a location info data + - error - otherwise + +*New in version [[NEXT_RELEASE]]*. + +**Example** + +[doctest]: # () + +```c +#include "jerryscript.h" + +int +main (void) +{ + jerry_init (JERRY_INIT_EMPTY); + + const jerry_char_t script[] = "aa bb"; + + jerry_value_t result_value = jerry_parse (script, sizeof (script) - 1, NULL); + + jerry_syntax_error_location_t error_location; + jerry_value_t resource_value = jerry_get_syntax_error_location (result_value, &error_location); + + if (jerry_value_is_string (resource_value)) + { + /* Prints the location of the error. */ + } + + jerry_release_value (resource_value); + jerry_release_value (result_value); + + jerry_cleanup (); + return 0; +} +``` + +**See also** + +- [jerry_syntax_error_location_t](#jerry_syntax_error_location_t) + # Functions for realm objects diff --git a/jerry-core/api/jerry.c b/jerry-core/api/jerry.c index 6566e184a0..6487708e78 100644 --- a/jerry-core/api/jerry.c +++ b/jerry-core/api/jerry.c @@ -5773,6 +5773,64 @@ jerry_free_source_info (jerry_source_info_t *source_info_p) /**< source info blo #endif /* JERRY_FUNCTION_TO_STRING */ } /* jerry_free_source_info */ +/** + * Gets the resource name and location info assigned to a SyntaxError object generated by the parser. + * + * @return resource name, if a location info is available + * error, otherwise + */ +jerry_value_t +jerry_get_syntax_error_location (jerry_value_t value, /**< SyntaxError object */ + jerry_syntax_error_location_t *error_location_p) /**< [out] location info */ +{ + jerry_assert_api_available (); + +#if JERRY_ERROR_MESSAGES + if (ecma_is_value_error_reference (value)) + { + value = ecma_get_extended_primitive_from_value (value)->u.value; + } + + if (ecma_is_value_object (value)) + { + ecma_object_t *object_p = ecma_get_object_from_value (value); + + ecma_string_t *name_p = ecma_get_internal_string (LIT_INTERNAL_MAGIC_STRING_SYNTAX_ERROR_LOCATION); + ecma_property_t *property_p = ecma_find_named_property (object_p, name_p); + + if (property_p != NULL) + { + ecma_value_t error_property_value = ECMA_PROPERTY_VALUE_PTR (property_p)->value; + uint8_t *location_p = ECMA_GET_NON_NULL_POINTER_FROM_POINTER_TAG (uint8_t, error_property_value); + ecma_value_t result = *(ecma_value_t *) location_p; + + if (error_location_p != NULL) + { + size_t size_data = error_property_value & ECMA_SYNTAX_ERROR_ALLOCATION_SIZE_MASK; + location_p += ((size_data + 1) << ECMA_SYNTAX_ERROR_ALLOCATION_UNIT_SHIFT); + + error_location_p->line = ecma_extended_info_decode_vlq (&location_p); + error_location_p->column_start = ecma_extended_info_decode_vlq (&location_p); + + uint32_t difference = ecma_extended_info_decode_vlq (&location_p); + + error_location_p->column_end = error_location_p->column_start + difference; + } + + ecma_ref_ecma_string (ecma_get_string_from_value (result)); + return result; + } + } + + return jerry_throw (ecma_raise_type_error (ECMA_ERR_MSG ("Location is not available"))); +#else /* !JERRY_ERROR_MESSAGES */ + JERRY_UNUSED (value); + JERRY_UNUSED (error_location_p); + + return jerry_throw (ecma_raise_type_error (ECMA_ERR_MSG ("Location is not available"))); +#endif /* JERRY_ERROR_MESSAGES */ +} /* jerry_get_syntax_error_location */ + /** * Replaces the currently active realm with another realm. * diff --git a/jerry-core/ecma/base/ecma-extended-info.c b/jerry-core/ecma/base/ecma-extended-info.c index b5fabf5801..070b9a1012 100644 --- a/jerry-core/ecma/base/ecma-extended-info.c +++ b/jerry-core/ecma/base/ecma-extended-info.c @@ -17,7 +17,7 @@ #include "ecma-helpers.h" #include "ecma-extended-info.h" -#if JERRY_ESNEXT || JERRY_FUNCTION_TO_STRING +#if JERRY_ESNEXT || JERRY_ERROR_MESSAGES || JERRY_FUNCTION_TO_STRING /** \addtogroup ecma ECMA * @{ @@ -107,6 +107,10 @@ ecma_extended_info_get_encoded_length (uint32_t value) /**< encoded value */ return length; } /* ecma_extended_info_get_encoded_length */ +#endif /* JERRY_ESNEXT || JERRY_ERROR_MESSAGES || JERRY_FUNCTION_TO_STRING */ + +#if JERRY_ESNEXT || JERRY_FUNCTION_TO_STRING + /** * Get the extended info from a byte code * diff --git a/jerry-core/ecma/base/ecma-extended-info.h b/jerry-core/ecma/base/ecma-extended-info.h index c9fc1c90ed..14c78a164d 100644 --- a/jerry-core/ecma/base/ecma-extended-info.h +++ b/jerry-core/ecma/base/ecma-extended-info.h @@ -23,7 +23,7 @@ * @{ */ -#if JERRY_ESNEXT || JERRY_FUNCTION_TO_STRING +#if JERRY_ESNEXT || JERRY_ERROR_MESSAGES || JERRY_FUNCTION_TO_STRING #include "ecma-globals.h" @@ -46,6 +46,10 @@ uint32_t ecma_extended_info_decode_vlq (uint8_t **buffer_p); void ecma_extended_info_encode_vlq (uint8_t **buffer_p, uint32_t value); uint32_t ecma_extended_info_get_encoded_length (uint32_t value); +#endif /* JERRY_ESNEXT || JERRY_ERROR_MESSAGES || JERRY_FUNCTION_TO_STRING */ + +#if JERRY_ESNEXT || JERRY_FUNCTION_TO_STRING + uint8_t *ecma_compiled_code_resolve_extended_info (const ecma_compiled_code_t *bytecode_header_p); #endif /* JERRY_ESNEXT || JERRY_FUNCTION_TO_STRING */ diff --git a/jerry-core/ecma/base/ecma-gc.c b/jerry-core/ecma/base/ecma-gc.c index 2bd2e7fcac..5534273551 100644 --- a/jerry-core/ecma/base/ecma-gc.c +++ b/jerry-core/ecma/base/ecma-gc.c @@ -1633,6 +1633,18 @@ ecma_gc_free_property (ecma_object_t *object_p, /**< object */ break; } #endif /* JERRY_BUILTIN_CONTAINER */ +#if JERRY_ERROR_MESSAGES + case LIT_INTERNAL_MAGIC_STRING_SYNTAX_ERROR_LOCATION: + { + uint8_t *location_p = ECMA_GET_NON_NULL_POINTER_FROM_POINTER_TAG (uint8_t, value); + + ecma_deref_ecma_string (ecma_get_string_from_value (*(ecma_value_t *) location_p)); + + size_t size_data = value & ECMA_SYNTAX_ERROR_ALLOCATION_SIZE_MASK; + jmem_heap_free_block (location_p, (size_data + 1) << ECMA_SYNTAX_ERROR_ALLOCATION_UNIT_SHIFT); + break; + } +#endif /* JERRY_ERROR_MESSAGES */ default: { JERRY_ASSERT (name_cp == LIT_INTERNAL_MAGIC_STRING_NATIVE_POINTER diff --git a/jerry-core/ecma/base/ecma-globals.h b/jerry-core/ecma/base/ecma-globals.h index b4678a32ba..e20b595b2e 100644 --- a/jerry-core/ecma/base/ecma-globals.h +++ b/jerry-core/ecma/base/ecma-globals.h @@ -1953,6 +1953,20 @@ typedef struct ecma_stringbuilder_header_t *header_p; /**< pointer to header */ } ecma_stringbuilder_t; +#if JERRY_ERROR_MESSAGES + +/** + * Allocation block size shift for SyntaxError line info data. + */ +#define ECMA_SYNTAX_ERROR_ALLOCATION_UNIT_SHIFT 3 + +/** + * Mask for extracting allocation size. + */ +#define ECMA_SYNTAX_ERROR_ALLOCATION_SIZE_MASK 0x3 + +#endif /* JERRY_ERROR_MESSAGES */ + #ifndef JERRY_BUILTIN_BIGINT /** * BigInt type. diff --git a/jerry-core/include/jerryscript-core.h b/jerry-core/include/jerryscript-core.h index 2490cd807a..cf8ff681dd 100644 --- a/jerry-core/include/jerryscript-core.h +++ b/jerry-core/include/jerryscript-core.h @@ -367,6 +367,8 @@ jerry_value_t jerry_get_user_value (const jerry_value_t value); bool jerry_is_eval_code (const jerry_value_t value); jerry_source_info_t *jerry_get_source_info (const jerry_value_t value); void jerry_free_source_info (jerry_source_info_t *source_info_p); +jerry_value_t jerry_get_syntax_error_location (jerry_value_t value, + jerry_syntax_error_location_t *error_location_p); /** * Array buffer components. diff --git a/jerry-core/include/jerryscript-types.h b/jerry-core/include/jerryscript-types.h index 6b7e1e8e89..1e37d44fdf 100644 --- a/jerry-core/include/jerryscript-types.h +++ b/jerry-core/include/jerryscript-types.h @@ -818,6 +818,16 @@ typedef struct uint32_t source_range_length; /**< source length of the function in the source code */ } jerry_source_info_t; +/** + * Detailed location info for SyntaxErrors. + */ +typedef struct +{ + uint32_t line; /**< start line of the invalid token */ + uint32_t column_start; /**< start column of the invalid token */ + uint32_t column_end; /**< end column of the invalid token */ +} jerry_syntax_error_location_t; + /** * Array buffer types. */ diff --git a/jerry-core/jmem/jmem.h b/jerry-core/jmem/jmem.h index ae29892699..52363b65a3 100644 --- a/jerry-core/jmem/jmem.h +++ b/jerry-core/jmem/jmem.h @@ -268,7 +268,7 @@ void * JERRY_ATTR_PURE jmem_decompress_pointer (uintptr_t compressed_pointer); { \ JERRY_ASSERT ((uintptr_t) tag < (uintptr_t) (JMEM_ALIGNMENT)); \ jmem_cpointer_tag_t compressed_ptr = jmem_compress_pointer (pointer); \ - (cp_value) = (jmem_cpointer_tag_t) ((compressed_ptr << JMEM_TAG_SHIFT) | tag); \ + (cp_value) = (jmem_cpointer_tag_t) ((compressed_ptr << JMEM_TAG_SHIFT) | (tag)); \ } while (false); /** diff --git a/jerry-core/lit/lit-magic-strings.h b/jerry-core/lit/lit-magic-strings.h index c0695f73b2..07155d5084 100644 --- a/jerry-core/lit/lit-magic-strings.h +++ b/jerry-core/lit/lit-magic-strings.h @@ -65,6 +65,7 @@ typedef enum * data properties */ LIT_INTERNAL_MAGIC_STRING_NATIVE_POINTER_WITH_REFERENCES, /**< native pointer info associated with an object * which contains references to other values */ + LIT_INTERNAL_MAGIC_STRING_SYNTAX_ERROR_LOCATION, /**< location info for syntax error */ LIT_INTERNAL_MAGIC_STRING_ENVIRONMENT_RECORD, /**< dynamic environment record needed by class constructors */ LIT_INTERNAL_MAGIC_STRING_CLASS_FIELD_COMPUTED, /**< computed class field name list */ LIT_INTERNAL_MAGIC_STRING_CONTAINER_WEAK_REFS, /**< Weak references to the current container object */ diff --git a/jerry-core/parser/js/js-lexer.c b/jerry-core/parser/js/js-lexer.c index e0c7220b33..c5b28c7b05 100644 --- a/jerry-core/parser/js/js-lexer.c +++ b/jerry-core/parser/js/js-lexer.c @@ -683,8 +683,14 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */ if (code_point == UINT32_MAX) { - context_p->source_p = source_p; +#if JERRY_ERROR_MESSAGES context_p->token.column = column; + if (source_p + 1 < source_end_p && source_p[1] == LIT_CHAR_LOWERCASE_U) + { + column++; + } + context_p->column = column + 1; +#endif /* JERRY_ERROR_MESSAGES */ parser_raise_error (context_p, PARSER_ERR_INVALID_UNICODE_ESCAPE_SEQUENCE); } @@ -958,8 +964,6 @@ lexer_parse_string (parser_context_t *context_p, /**< context */ const uint8_t *source_end_p = context_p->source_end_p; parser_line_counter_t line = context_p->line; parser_line_counter_t column = (parser_line_counter_t) (context_p->column + 1); - parser_line_counter_t original_line = line; - parser_line_counter_t original_column = column; size_t length = 0; lexer_lit_location_flags_t status_flags = LEXER_LIT_LOCATION_IS_ASCII; @@ -974,8 +978,10 @@ lexer_parse_string (parser_context_t *context_p, /**< context */ { if (source_p >= source_end_p) { - context_p->token.line = original_line; - context_p->token.column = (parser_line_counter_t) (original_column - 1); +#if JERRY_ERROR_MESSAGES + context_p->line = line; + context_p->column = column + 1; +#endif /* JERRY_ERROR_MESSAGES */ parser_raise_error (context_p, PARSER_ERR_UNTERMINATED_STRING); } @@ -999,33 +1005,16 @@ lexer_parse_string (parser_context_t *context_p, /**< context */ /* Newline is ignored. */ if (*source_p == LIT_CHAR_CR) { - source_p++; - if (source_p < source_end_p - && *source_p == LIT_CHAR_LF) - { -#if JERRY_ESNEXT - raw_length_adjust--; -#endif /* JERRY_ESNEXT */ - source_p++; - } - - line++; - column = 1; - continue; + goto next_line_cr; } else if (*source_p == LIT_CHAR_LF) { - source_p++; - line++; - column = 1; - continue; + goto next_line; } else if (*source_p == LEXER_NEWLINE_LS_PS_BYTE_1 && LEXER_NEWLINE_LS_PS_BYTE_23 (source_p)) { - source_p += 3; - line++; - column = 1; - continue; + source_p += 2; + goto next_line; } #if JERRY_ESNEXT @@ -1141,8 +1130,16 @@ lexer_parse_string (parser_context_t *context_p, /**< context */ if (code_point == UINT32_MAX) { +#if JERRY_ERROR_MESSAGES + if (escape_length > (uint32_t) (source_end_p - source_p)) + { + escape_length = (uint32_t) (source_end_p - source_p); + } context_p->token.line = line; - context_p->token.column = (parser_line_counter_t) (column - 1); + context_p->token.column = column - 1; + context_p->line = line; + context_p->column = column + 1; +#endif /* JERRY_ERROR_MESSAGES */ parser_raise_error (context_p, PARSER_ERR_INVALID_UNICODE_ESCAPE_SEQUENCE); } @@ -1184,18 +1181,15 @@ lexer_parse_string (parser_context_t *context_p, /**< context */ } else if (*source_p == LIT_CHAR_TAB) { - column = align_column_to_tab (column); /* Subtract -1 because column is increased below. */ - column--; + column = align_column_to_tab (column) - 1; } #if JERRY_ESNEXT else if (*source_p == LEXER_NEWLINE_LS_PS_BYTE_1 && LEXER_NEWLINE_LS_PS_BYTE_23 (source_p)) { - source_p += 3; + source_p += 2; length += 3; - line++; - column = 1; - continue; + goto next_line; } else if (str_end_character == LIT_CHAR_GRAVE_ACCENT) { @@ -1204,25 +1198,13 @@ lexer_parse_string (parser_context_t *context_p, /**< context */ if (*source_p == LIT_CHAR_CR) { status_flags = LEXER_LIT_LOCATION_HAS_ESCAPE; - source_p++; length++; - if (source_p < source_end_p - && *source_p == LIT_CHAR_LF) - { - source_p++; - raw_length_adjust--; - } - line++; - column = 1; - continue; + goto next_line_cr; } else if (*source_p == LIT_CHAR_LF) { - source_p++; length++; - line++; - column = 1; - continue; + goto next_line; } } #endif /* JERRY_ESNEXT */ @@ -1232,8 +1214,10 @@ lexer_parse_string (parser_context_t *context_p, /**< context */ #endif /* !JERRY_ESNEXT */ || *source_p == LIT_CHAR_LF) { - context_p->token.line = line; - context_p->token.column = column; +#if JERRY_ERROR_MESSAGES + context_p->line = line; + context_p->column = column; +#endif /* JERRY_ERROR_MESSAGES */ parser_raise_error (context_p, PARSER_ERR_NEWLINE_NOT_ALLOWED); } @@ -1247,6 +1231,29 @@ lexer_parse_string (parser_context_t *context_p, /**< context */ source_p++; length++; } + + continue; + +next_line_cr: + if (source_p < source_end_p + && *source_p == LIT_CHAR_LF) + { +#if JERRY_ESNEXT + raw_length_adjust--; +#endif /* JERRY_ESNEXT */ + source_p++; + } + +next_line: +#if JERRY_ERROR_MESSAGES + if (line == context_p->token.line) + { + context_p->token_end_column = column; + } +#endif /* JERRY_ERROR_MESSAGES */ + source_p++; + line++; + column = 1; } #if JERRY_ESNEXT @@ -3178,6 +3185,12 @@ lexer_expect_identifier (parser_context_t *context_p, /**< context */ parser_raise_error (context_p, PARSER_ERR_AWAIT_NOT_ALLOWED); } #endif /* JERRY_ESNEXT */ + +#if JERRY_ERROR_MESSAGES + /* Provides location info for the invalid token. */ + lexer_next_token (context_p); +#endif /* JERRY_ERROR_MESSAGES */ + parser_raise_error (context_p, PARSER_ERR_IDENTIFIER_EXPECTED); } /* lexer_expect_identifier */ @@ -3373,6 +3386,11 @@ lexer_expect_object_literal_id (parser_context_t *context_p, /**< context */ return; } +#if JERRY_ERROR_MESSAGES + /* Provides location info for the invalid token. */ + lexer_next_token (context_p); +#endif /* JERRY_ERROR_MESSAGES */ + parser_raise_error (context_p, PARSER_ERR_PROPERTY_IDENTIFIER_EXPECTED); } /* lexer_expect_object_literal_id */ diff --git a/jerry-core/parser/js/js-parser-internal.h b/jerry-core/parser/js/js-parser-internal.h index 352ea61100..ad23866cd9 100644 --- a/jerry-core/parser/js/js-parser-internal.h +++ b/jerry-core/parser/js/js-parser-internal.h @@ -592,6 +592,9 @@ typedef struct const uint8_t *source_end_p; /**< last source byte */ parser_line_counter_t line; /**< current line */ parser_line_counter_t column; /**< current column */ +#if JERRY_ERROR_MESSAGES + parser_line_counter_t token_end_column; /**< token end column for multiline tokens */ +#endif /* JERRY_ERROR_MESSAGES */ /* Scanner members. */ scanner_info_t *next_scanner_info_p; /**< next scanner info block */ diff --git a/jerry-core/parser/js/js-parser.c b/jerry-core/parser/js/js-parser.c index f29a10276b..70181ec59f 100644 --- a/jerry-core/parser/js/js-parser.c +++ b/jerry-core/parser/js/js-parser.c @@ -2351,38 +2351,64 @@ parser_parse_source (void *source_p, /**< source code */ } #if JERRY_ERROR_MESSAGES - ecma_string_t *err_str_p; + if (context.error != PARSER_ERR_INVALID_REGEXP) + { + ecma_raise_syntax_error (parser_error_to_string (context.error)); + } - if (context.error == PARSER_ERR_INVALID_REGEXP) + const size_t allocation_unit_mask = (1 << ECMA_SYNTAX_ERROR_ALLOCATION_UNIT_SHIFT) - 1; + size_t total_size = sizeof (ecma_value_t) + allocation_unit_mask; + uint32_t difference = 0; + + total_size += ecma_extended_info_get_encoded_length (context.token.line); + total_size += ecma_extended_info_get_encoded_length (context.token.column); + + if (context.token.line == context.line) { - ecma_value_t error = jcontext_take_exception (); - ecma_property_t *prop_p = ecma_find_named_property (ecma_get_object_from_value (error), - ecma_get_magic_string (LIT_MAGIC_STRING_MESSAGE)); - ecma_free_value (error); - JERRY_ASSERT (prop_p); - err_str_p = ecma_get_string_from_value (ECMA_PROPERTY_VALUE_PTR (prop_p)->value); - ecma_ref_ecma_string (err_str_p); + JERRY_ASSERT (context.column >= context.token.column); + + difference = context.column - context.token.column; + + if (difference == 0 && context.source_p != context.source_end_p) + { + difference = 1; + } } else { - const lit_utf8_byte_t *err_bytes_p = (const lit_utf8_byte_t *) parser_error_to_string (context.error); - lit_utf8_size_t err_bytes_size = lit_zt_utf8_string_size (err_bytes_p); - err_str_p = ecma_new_ecma_string_from_utf8 (err_bytes_p, err_bytes_size); + JERRY_ASSERT (context.line > context.token.line); + + difference = context.token_end_column - context.token.column; } - ecma_value_t err_str_val = ecma_make_string_value (err_str_p); - ecma_value_t line_str_val = ecma_make_uint32_value (context.token.line); - ecma_value_t col_str_val = ecma_make_uint32_value (context.token.column); - ecma_raise_standard_error_with_format (JERRY_ERROR_SYNTAX, - "% [%:%:%]", - err_str_val, - resource_name, - line_str_val, - col_str_val); + total_size += ecma_extended_info_get_encoded_length (difference); + total_size &= ~allocation_unit_mask; + + uint8_t *location_p = jmem_heap_alloc_block (total_size); + + *(ecma_value_t *) location_p = resource_name; + ecma_ref_ecma_string (ecma_get_string_from_value (resource_name)); + + ecma_value_t error_property_value; + ECMA_SET_NON_NULL_POINTER_TAG (error_property_value, location_p, 0); + + location_p += total_size; + + total_size = (total_size >> ECMA_SYNTAX_ERROR_ALLOCATION_UNIT_SHIFT) - 1; + JERRY_ASSERT (total_size <= ECMA_SYNTAX_ERROR_ALLOCATION_SIZE_MASK); + error_property_value |= (ecma_value_t) total_size; + + ecma_extended_info_encode_vlq (&location_p, context.token.line); + ecma_extended_info_encode_vlq (&location_p, context.token.column); + ecma_extended_info_encode_vlq (&location_p, difference); + + ecma_object_t *error_object_p = ecma_get_object_from_value (JERRY_CONTEXT (error_value)); + + ecma_string_t *name_p = ecma_get_internal_string (LIT_INTERNAL_MAGIC_STRING_SYNTAX_ERROR_LOCATION); + ecma_property_value_t *property_value_p; - ecma_free_value (col_str_val); - ecma_free_value (line_str_val); - ecma_deref_ecma_string (err_str_p); + property_value_p = ecma_create_named_data_property (error_object_p, name_p, 0, NULL); + property_value_p->value = error_property_value; #else /* !JERRY_ERROR_MESSAGES */ if (context.error == PARSER_ERR_INVALID_REGEXP) { diff --git a/jerry-main/main-utils.c b/jerry-main/main-utils.c index 7fe59ce5e0..1e048f0764 100644 --- a/jerry-main/main-utils.c +++ b/jerry-main/main-utils.c @@ -304,83 +304,54 @@ main_print_unhandled_exception (jerry_value_t error_value) /**< error value */ jerry_char_t err_str_buf[256]; - jerry_value_t err_str_val = jerry_value_to_string (error_value); - jerry_size_t err_str_size = jerry_get_utf8_string_size (err_str_val); + jerry_syntax_error_location_t location = { 0 }; + jerry_value_t resource_value = jerry_get_syntax_error_location (error_value, &location); - if (err_str_size >= 256) - { - const char msg[] = "[Error message too long]"; - err_str_size = sizeof (msg) / sizeof (char) - 1; - memcpy (err_str_buf, msg, err_str_size + 1); - } - else + /* Certain unicode newlines are not supported, and tabs does not update + * column position, so the error printing is not always precise. */ + + if (jerry_value_is_string (resource_value)) { - jerry_size_t string_end = jerry_string_to_utf8_char_buffer (err_str_val, err_str_buf, err_str_size); - assert (string_end == err_str_size); - err_str_buf[string_end] = 0; + jerry_size_t resource_str_size = jerry_get_utf8_string_size (resource_value); - if (jerry_is_feature_enabled (JERRY_FEATURE_ERROR_MESSAGES) - && jerry_get_error_type (error_value) == JERRY_ERROR_SYNTAX) + if (resource_str_size < sizeof (err_str_buf)) { - jerry_char_t *string_end_p = err_str_buf + string_end; - unsigned int err_line = 0; - unsigned int err_col = 0; - char *path_str_p = NULL; - char *path_str_end_p = NULL; - - /* 1. parse column and line information */ - for (jerry_char_t *current_p = err_str_buf; current_p < string_end_p; current_p++) - { - if (*current_p == '[') - { - current_p++; - - if (*current_p == '<') - { - break; - } + jerry_string_to_utf8_char_buffer (resource_value, err_str_buf, resource_str_size); + err_str_buf[resource_str_size] = 0; - path_str_p = (char *) current_p; - while (current_p < string_end_p && *current_p != ':') - { - current_p++; - } - - path_str_end_p = (char *) current_p++; + uint32_t err_line = location.line; + uint32_t err_col = location.column_start; - err_line = (unsigned int) strtol ((char *) current_p, (char **) ¤t_p, 10); - - current_p++; - - err_col = (unsigned int) strtol ((char *) current_p, NULL, 10); - break; - } - } /* for */ + jerry_port_log (JERRY_LOG_LEVEL_ERROR, "Error at %s:%d:%d\n", err_str_buf, (int) err_line, (int) err_col); - if (err_line != 0 && err_col > 0 && err_col < SYNTAX_ERROR_MAX_LINE_LENGTH) + if (err_col < SYNTAX_ERROR_MAX_LINE_LENGTH) { - /* Temporarily modify the error message, so we can use the path. */ - *path_str_end_p = '\0'; - size_t source_size; - uint8_t *source_p = jerry_port_read_source (path_str_p, &source_size); - - /* Revert the error message. */ - *path_str_end_p = ':'; + uint8_t *source_p = jerry_port_read_source ((char *) err_str_buf, &source_size); if (source_p != NULL) { - uint32_t curr_line = 1; uint32_t pos = 0; /* 2. seek and print */ - while (pos < source_size && curr_line < err_line) + while (pos < source_size && err_line > 1) { - if (source_p[pos] == '\n') + switch (source_p[pos]) { - curr_line++; + case '\r': + { + if (pos + 1 < source_size && source_p[pos + 1] == '\n') + { + pos++; + } + /* FALLTHRU */ + } + case '\n': + { + err_line--; + break; + } } - pos++; } @@ -389,11 +360,15 @@ main_print_unhandled_exception (jerry_value_t error_value) /**< error value */ * - The current position is valid (it is not the end of the source). * - The current character is not a newline. **/ - for (uint32_t char_count = 0; - (char_count < SYNTAX_ERROR_MAX_LINE_LENGTH) && (pos < source_size) && (source_p[pos] != '\n'); - char_count++, pos++) + uint32_t char_count = 0; + + while (char_count < SYNTAX_ERROR_MAX_LINE_LENGTH + && pos < source_size + && source_p[pos] != '\r' + && source_p[pos] != '\n') { - jerry_port_log (JERRY_LOG_LEVEL_ERROR, "%c", source_p[pos]); + jerry_port_log (JERRY_LOG_LEVEL_ERROR, "%c", source_p[pos++]); + char_count++; } jerry_port_log (JERRY_LOG_LEVEL_ERROR, "\n"); @@ -401,16 +376,53 @@ main_print_unhandled_exception (jerry_value_t error_value) /**< error value */ while (--err_col) { - jerry_port_log (JERRY_LOG_LEVEL_ERROR, "~"); + jerry_port_log (JERRY_LOG_LEVEL_ERROR, " "); + } + + err_col = location.column_end; + + if (err_col > char_count) + { + err_col = char_count + 1; + } + + if (err_col <= location.column_start) + { + jerry_port_log (JERRY_LOG_LEVEL_ERROR, "^\n\n"); } + else + { + err_col -= location.column_start; + + do + { + jerry_port_log (JERRY_LOG_LEVEL_ERROR, "^"); + } + while (--err_col); - jerry_port_log (JERRY_LOG_LEVEL_ERROR, "^\n\n"); + jerry_port_log (JERRY_LOG_LEVEL_ERROR, "\n\n"); + } } } } } - jerry_port_log (JERRY_LOG_LEVEL_ERROR, "%s\n", err_str_buf); + jerry_release_value (resource_value); + + jerry_value_t err_str_val = jerry_value_to_string (error_value); + jerry_size_t err_str_size = jerry_get_utf8_string_size (err_str_val); + + if (err_str_size >= sizeof (err_str_buf)) + { + jerry_port_log (JERRY_LOG_LEVEL_ERROR, "[Error message too long]\n"); + } + else + { + jerry_string_to_utf8_char_buffer (err_str_val, err_str_buf, err_str_size); + err_str_buf[err_str_size] = 0; + jerry_port_log (JERRY_LOG_LEVEL_ERROR, "%s\n", err_str_buf); + } + jerry_release_value (err_str_val); if (jerry_value_is_object (error_value)) diff --git a/tests/unit-core/CMakeLists.txt b/tests/unit-core/CMakeLists.txt index 782a7a7231..bc9d021bd9 100644 --- a/tests/unit-core/CMakeLists.txt +++ b/tests/unit-core/CMakeLists.txt @@ -88,6 +88,7 @@ set(SOURCE_UNIT_TEST_MAIN_MODULES test-stringbuilder.c test-strings.c test-symbol.c + test-syntax-error-location.c test-to-integer.c test-to-length.c test-to-property-descriptor.c diff --git a/tests/unit-core/test-api.c b/tests/unit-core/test-api.c index d3dff7c3ef..7dcdd0e109 100644 --- a/tests/unit-core/test-api.c +++ b/tests/unit-core/test-api.c @@ -1065,32 +1065,8 @@ main (void) test_syntax_error ("b = 'hello';\nvar a = (;", NULL, - "SyntaxError: Unexpected end of input [:2:10]", + "SyntaxError: Unexpected end of input", false); - - parse_options.options = JERRY_PARSE_HAS_RESOURCE; - parse_options.resource_name = jerry_create_string ((const jerry_char_t *) "filename.js"); - - test_syntax_error ("b = 'hello';\nvar a = (;", - &parse_options, - "SyntaxError: Unexpected end of input [filename.js:2:10]", - false); - - test_syntax_error ("eval(\"var b;\\nfor (,); \");", - &parse_options, - "SyntaxError: Unexpected end of input [:2:6]", - true); - - parse_options.options |= JERRY_PARSE_HAS_START; - parse_options.start_line = 10; - parse_options.start_column = 20; - - test_syntax_error ("for (var a in []", - &parse_options, - "SyntaxError: Expected ')' token [filename.js:10:36]", - false); - - jerry_release_value (parse_options.resource_name); jerry_cleanup (); } diff --git a/tests/unit-core/test-syntax-error-location.c b/tests/unit-core/test-syntax-error-location.c new file mode 100644 index 0000000000..4d1b44db33 --- /dev/null +++ b/tests/unit-core/test-syntax-error-location.c @@ -0,0 +1,136 @@ +/* + * Copyright JS Foundation and other contributors, http://js.foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "jerryscript.h" + +#include "test-common.h" + +static void +compare_string (jerry_value_t left_string_value, /**< left value to compare */ + const char *right_string_p) /**< right value to compare */ +{ + TEST_ASSERT (jerry_value_is_string (left_string_value)); + + jerry_char_t buffer[128]; + size_t size = strlen (right_string_p); + + TEST_ASSERT (size < sizeof (buffer)); + TEST_ASSERT (size == jerry_get_string_size (left_string_value)); + + jerry_string_to_char_buffer (left_string_value, buffer, (jerry_size_t) size); + TEST_ASSERT (memcmp (buffer, right_string_p, size) == 0); +} /* compare_string */ + +static void +compare_location (jerry_syntax_error_location_t *location_p, /**< expected location */ + uint32_t line, /**< start line of the invalid token */ + uint32_t column_start, /**< start column of the invalid token */ + uint32_t column_end) /**< end column of the invalid token */ +{ + TEST_ASSERT (location_p->line == line); + TEST_ASSERT (location_p->column_start == column_start); + TEST_ASSERT (location_p->column_end == column_end); +} /* compare_location */ + +int +main (void) +{ + TEST_INIT (); + + if (!jerry_is_feature_enabled (JERRY_FEATURE_ERROR_MESSAGES)) + { + return 0; + } + + jerry_init (JERRY_INIT_EMPTY); + + jerry_syntax_error_location_t error_location; + + jerry_value_t error_value = jerry_create_number (13); + jerry_value_t resource_value = jerry_get_syntax_error_location (error_value, NULL); + TEST_ASSERT (jerry_value_is_error (resource_value)); + jerry_release_value (resource_value); + jerry_release_value (error_value); + + char *source_p = TEST_STRING_LITERAL ("new SyntaxError('Bad token!')"); + error_value = jerry_eval ((jerry_char_t *) source_p, strlen (source_p), JERRY_PARSE_NO_OPTS); + TEST_ASSERT (jerry_get_error_type (error_value) == JERRY_ERROR_SYNTAX); + error_location.line = 100; + error_location.column_start = 200; + error_location.column_end = 300; + resource_value = jerry_get_syntax_error_location (error_value, &error_location); + /* This SyntaxError is not generated by the parser. */ + TEST_ASSERT (jerry_value_is_error (resource_value)); + compare_location (&error_location, 100, 200, 300); + jerry_release_value (resource_value); + jerry_release_value (error_value); + + source_p = TEST_STRING_LITERAL ("\n\naa bb1 cc"); + error_value = jerry_parse ((jerry_char_t *) source_p, strlen (source_p), NULL); + TEST_ASSERT (jerry_get_error_type (error_value) == JERRY_ERROR_SYNTAX); + resource_value = jerry_get_syntax_error_location (error_value, NULL); + compare_string (resource_value, ""); + jerry_release_value (resource_value); + resource_value = jerry_get_syntax_error_location (error_value, &error_location); + compare_string (resource_value, ""); + compare_location (&error_location, 3, 4, 7); + jerry_release_value (resource_value); + jerry_release_value (error_value); + + source_p = TEST_STRING_LITERAL ("var s = '1234567890'\n" + "for (var i = 0; i < 6; i++) {\n" + " s += s\n" + "}\n" + "eval('aa \"' + s + '\"')"); + error_value = jerry_eval ((jerry_char_t *) source_p, strlen (source_p), JERRY_PARSE_NO_OPTS); + TEST_ASSERT (jerry_get_error_type (error_value) == JERRY_ERROR_SYNTAX); + error_value = jerry_get_value_from_error (error_value, true); + TEST_ASSERT (!jerry_value_is_error (error_value)); + resource_value = jerry_get_syntax_error_location (error_value, &error_location); + compare_string (resource_value, ""); + compare_location (&error_location, 1, 4, 646); + jerry_release_value (resource_value); + jerry_release_value (error_value); + + jerry_parse_options_t parse_options; + parse_options.options = JERRY_PARSE_HAS_RESOURCE | JERRY_PARSE_HAS_START; + parse_options.resource_name = jerry_create_string ((const jerry_char_t *) "[generated.js:1:2]"); + parse_options.start_line = 1234567890; + parse_options.start_column = 1234567890; + + source_p = TEST_STRING_LITERAL ("aa(>>=2)"); + error_value = jerry_parse ((jerry_char_t *) source_p, strlen (source_p), &parse_options); + TEST_ASSERT (jerry_get_error_type (error_value) == JERRY_ERROR_SYNTAX); + resource_value = jerry_get_syntax_error_location (error_value, &error_location); + compare_string (resource_value, "[generated.js:1:2]"); + compare_location (&error_location, 1234567890, 1234567893, 1234567896); + jerry_release_value (resource_value); + jerry_release_value (error_value); + + source_p = TEST_STRING_LITERAL ("\n\n\nabcd 'ab\\\ncd\\\ne'"); + error_value = jerry_parse ((jerry_char_t *) source_p, strlen (source_p), &parse_options); + TEST_ASSERT (jerry_get_error_type (error_value) == JERRY_ERROR_SYNTAX); + resource_value = jerry_get_syntax_error_location (error_value, &error_location); + compare_string (resource_value, "[generated.js:1:2]"); + compare_location (&error_location, 1234567893, 6, 10); + jerry_release_value (resource_value); + jerry_release_value (error_value); + + jerry_release_value (parse_options.resource_name); + + jerry_cleanup (); + return 0; +} /* main */