Skip to content

Commit

Permalink
aws versions of isspace() isalpha() etc (#642)
Browse files Browse the repository at this point in the history
Versions of isspace() isalpha() etc for use with ASCII/UTF-8.
C's built-in functions are affected by user's locale.
  • Loading branch information
graebm authored Jun 4, 2020
1 parent e9dbef0 commit 94b62d2
Show file tree
Hide file tree
Showing 8 changed files with 259 additions and 48 deletions.
40 changes: 40 additions & 0 deletions include/aws/common/byte_buf.h
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,46 @@ AWS_COMMON_API bool aws_byte_buf_write_be64(struct aws_byte_buf *buf, uint64_t x
*/
AWS_COMMON_API bool aws_byte_buf_write_float_be64(struct aws_byte_buf *buf, double x);

/**
* Like isalnum(), but ignores C locale.
* Returns true if ch has the value of ASCII/UTF-8: 'a'-'z', 'A'-'Z', or '0'-'9'.
*/
AWS_COMMON_API bool aws_isalnum(uint8_t ch);

/**
* Like isalpha(), but ignores C locale.
* Returns true if ch has the value of ASCII/UTF-8: 'a'-'z' or 'A'-'Z'.
*/
AWS_COMMON_API bool aws_isalpha(uint8_t ch);

/**
* Like isdigit().
* Returns true if ch has the value of ASCII/UTF-8: '0'-'9'.
*
* Note: C's built-in isdigit() is also supposed to ignore the C locale,
* but cppreference.com claims "some implementations (e.g. Microsoft in 1252 codepage)
* may classify additional single-byte characters as digits"
*/
AWS_COMMON_API bool aws_isdigit(uint8_t ch);

/**
* Like isxdigit().
* Returns true if ch has the value of ASCII/UTF-8: '0'-'9', 'a'-'f', or 'A'-'F'.
*
* Note: C's built-in isxdigit() is also supposed to ignore the C locale,
* but cppreference.com claims "some implementations (e.g. Microsoft in 1252 codepage)
* may classify additional single-byte characters as digits"
*/

AWS_COMMON_API bool aws_isxdigit(uint8_t ch);

/**
* Like isspace(), but ignores C locale.
* Return true if ch has the value of ASCII/UTF-8: space (0x20), form feed (0x0C),
* line feed (0x0A), carriage return (0x0D), horizontal tab (0x09), or vertical tab (0x0B).
*/
AWS_COMMON_API bool aws_isspace(uint8_t ch);

AWS_EXTERN_C_END

#endif /* AWS_COMMON_BYTE_BUF_H */
2 changes: 1 addition & 1 deletion include/aws/common/string.inl
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ bool aws_c_string_is_valid(const char *str) {
*/
AWS_STATIC_IMPL
bool aws_char_is_space(uint8_t c) {
return isspace((int)c) != 0;
return aws_isspace(c);
}

AWS_EXTERN_C_END
Expand Down
35 changes: 35 additions & 0 deletions source/byte_buf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1444,3 +1444,38 @@ static struct aws_byte_cursor s_null_terminator_cursor = AWS_BYTE_CUR_INIT_FROM_
int aws_byte_buf_append_null_terminator(struct aws_byte_buf *buf) {
return aws_byte_buf_append_dynamic(buf, &s_null_terminator_cursor);
}

bool aws_isalnum(uint8_t ch) {
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9');
}

bool aws_isalpha(uint8_t ch) {
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
}

bool aws_isdigit(uint8_t ch) {
return (ch >= '0' && ch <= '9');
}

bool aws_isxdigit(uint8_t ch) {
return (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
}

bool aws_isspace(uint8_t ch) {
switch (ch) {
case 0x20: /* ' ' - space */
return true;
case 0x09: /* '\t' - horizontal tab */
return true;
case 0x0A: /* '\n' - line feed */
return true;
case 0x0B: /* '\v' - vertical tab */
return true;
case 0x0C: /* '\f' - form feed */
return true;
case 0x0D: /* '\r' - carriage return */
return true;
default:
return false;
}
}
62 changes: 31 additions & 31 deletions source/date_time.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ static int s_parse_iso_8601_basic(const struct aws_byte_cursor *date_str_cursor,
size_t sub_index = index - state_start_index;
switch (state) {
case ON_YEAR:
if (isdigit(c)) {
if (aws_isdigit(c)) {
parsed_time->tm_year = parsed_time->tm_year * 10 + (c - '0');
if (sub_index == 3) {
state = ON_MONTH;
Expand All @@ -238,7 +238,7 @@ static int s_parse_iso_8601_basic(const struct aws_byte_cursor *date_str_cursor,
break;

case ON_MONTH:
if (isdigit(c)) {
if (aws_isdigit(c)) {
parsed_time->tm_mon = parsed_time->tm_mon * 10 + (c - '0');
if (sub_index == 1) {
state = ON_MONTH_DAY;
Expand All @@ -254,15 +254,15 @@ static int s_parse_iso_8601_basic(const struct aws_byte_cursor *date_str_cursor,
if (c == 'T' && sub_index == 2) {
state = ON_HOUR;
state_start_index = index + 1;
} else if (isdigit(c)) {
} else if (aws_isdigit(c)) {
parsed_time->tm_mday = parsed_time->tm_mday * 10 + (c - '0');
} else {
error = true;
}
break;

case ON_HOUR:
if (isdigit(c)) {
if (aws_isdigit(c)) {
parsed_time->tm_hour = parsed_time->tm_hour * 10 + (c - '0');
if (sub_index == 1) {
state = ON_MINUTE;
Expand All @@ -274,7 +274,7 @@ static int s_parse_iso_8601_basic(const struct aws_byte_cursor *date_str_cursor,
break;

case ON_MINUTE:
if (isdigit(c)) {
if (aws_isdigit(c)) {
parsed_time->tm_min = parsed_time->tm_min * 10 + (c - '0');
if (sub_index == 1) {
state = ON_SECOND;
Expand All @@ -286,7 +286,7 @@ static int s_parse_iso_8601_basic(const struct aws_byte_cursor *date_str_cursor,
break;

case ON_SECOND:
if (isdigit(c)) {
if (aws_isdigit(c)) {
parsed_time->tm_sec = parsed_time->tm_sec * 10 + (c - '0');
if (sub_index == 1) {
state = ON_TZ;
Expand All @@ -300,7 +300,7 @@ static int s_parse_iso_8601_basic(const struct aws_byte_cursor *date_str_cursor,
case ON_TZ:
if (c == 'Z' && (sub_index == 0 || sub_index == 3)) {
state = FINISHED;
} else if (!isdigit(c) || sub_index > 3) {
} else if (!aws_isdigit(c) || sub_index > 3) {
error = true;
}
break;
Expand Down Expand Up @@ -334,7 +334,7 @@ static int s_parse_iso_8601(const struct aws_byte_cursor *date_str_cursor, struc
state = ON_MONTH;
state_start_index = index + 1;
parsed_time->tm_year -= 1900;
} else if (isdigit(c)) {
} else if (aws_isdigit(c)) {
parsed_time->tm_year = parsed_time->tm_year * 10 + (c - '0');
} else {
error = true;
Expand All @@ -345,7 +345,7 @@ static int s_parse_iso_8601(const struct aws_byte_cursor *date_str_cursor, struc
state = ON_MONTH_DAY;
state_start_index = index + 1;
parsed_time->tm_mon -= 1;
} else if (isdigit(c)) {
} else if (aws_isdigit(c)) {
parsed_time->tm_mon = parsed_time->tm_mon * 10 + (c - '0');
} else {
error = true;
Expand All @@ -356,7 +356,7 @@ static int s_parse_iso_8601(const struct aws_byte_cursor *date_str_cursor, struc
if (c == 'T' && index - state_start_index == 2) {
state = ON_HOUR;
state_start_index = index + 1;
} else if (isdigit(c)) {
} else if (aws_isdigit(c)) {
parsed_time->tm_mday = parsed_time->tm_mday * 10 + (c - '0');
} else {
error = true;
Expand All @@ -368,13 +368,13 @@ static int s_parse_iso_8601(const struct aws_byte_cursor *date_str_cursor, struc
if (index - state_start_index == 2) {
state = ON_MINUTE;
state_start_index = index + 1;
if (isdigit(c)) {
if (aws_isdigit(c)) {
state_start_index = index;
advance = false;
} else if (c != ':') {
error = true;
}
} else if (isdigit(c)) {
} else if (aws_isdigit(c)) {
parsed_time->tm_hour = parsed_time->tm_hour * 10 + (c - '0');
} else {
error = true;
Expand All @@ -386,13 +386,13 @@ static int s_parse_iso_8601(const struct aws_byte_cursor *date_str_cursor, struc
if (index - state_start_index == 2) {
state = ON_SECOND;
state_start_index = index + 1;
if (isdigit(c)) {
if (aws_isdigit(c)) {
state_start_index = index;
advance = false;
} else if (c != ':') {
error = true;
}
} else if (isdigit(c)) {
} else if (aws_isdigit(c)) {
parsed_time->tm_min = parsed_time->tm_min * 10 + (c - '0');
} else {
error = true;
Expand All @@ -406,7 +406,7 @@ static int s_parse_iso_8601(const struct aws_byte_cursor *date_str_cursor, struc
} else if (c == '.' && index - state_start_index == 2) {
state = ON_TZ;
state_start_index = index + 1;
} else if (isdigit(c)) {
} else if (aws_isdigit(c)) {
parsed_time->tm_sec = parsed_time->tm_sec * 10 + (c - '0');
} else {
error = true;
Expand All @@ -417,7 +417,7 @@ static int s_parse_iso_8601(const struct aws_byte_cursor *date_str_cursor, struc
if (c == 'Z') {
state = FINISHED;
state_start_index = index + 1;
} else if (!isdigit(c)) {
} else if (!aws_isdigit(c)) {
error = true;
}
break;
Expand Down Expand Up @@ -459,32 +459,32 @@ static int s_parse_rfc_822(
if (c == ',') {
state = ON_SPACE_DELIM;
state_start_index = index + 1;
} else if (isdigit(c)) {
} else if (aws_isdigit(c)) {
state = ON_MONTH_DAY;
} else if (!isalpha(c)) {
} else if (!aws_isalpha(c)) {
error = true;
}
break;
case ON_SPACE_DELIM:
if (isspace(c)) {
if (aws_isspace(c)) {
state = ON_MONTH_DAY;
state_start_index = index + 1;
} else {
error = true;
}
break;
case ON_MONTH_DAY:
if (isdigit(c)) {
if (aws_isdigit(c)) {
parsed_time->tm_mday = parsed_time->tm_mday * 10 + (c - '0');
} else if (isspace(c)) {
} else if (aws_isspace(c)) {
state = ON_MONTH;
state_start_index = index + 1;
} else {
error = true;
}
break;
case ON_MONTH:
if (isspace(c)) {
if (aws_isspace(c)) {
int monthNumber =
get_month_number_from_str((const char *)date_str_cursor->ptr, state_start_index, index + 1);

Expand All @@ -495,21 +495,21 @@ static int s_parse_rfc_822(
} else {
error = true;
}
} else if (!isalpha(c)) {
} else if (!aws_isalpha(c)) {
error = true;
}
break;
/* year can be 4 or 2 digits. */
case ON_YEAR:
if (isspace(c) && index - state_start_index == 4) {
if (aws_isspace(c) && index - state_start_index == 4) {
state = ON_HOUR;
state_start_index = index + 1;
parsed_time->tm_year -= 1900;
} else if (isspace(c) && index - state_start_index == 2) {
} else if (aws_isspace(c) && index - state_start_index == 2) {
state = 5;
state_start_index = index + 1;
parsed_time->tm_year += 2000 - 1900;
} else if (isdigit(c)) {
} else if (aws_isdigit(c)) {
parsed_time->tm_year = parsed_time->tm_year * 10 + (c - '0');
} else {
error = true;
Expand All @@ -519,7 +519,7 @@ static int s_parse_rfc_822(
if (c == ':' && index - state_start_index == 2) {
state = ON_MINUTE;
state_start_index = index + 1;
} else if (isdigit(c)) {
} else if (aws_isdigit(c)) {
parsed_time->tm_hour = parsed_time->tm_hour * 10 + (c - '0');
} else {
error = true;
Expand All @@ -529,24 +529,24 @@ static int s_parse_rfc_822(
if (c == ':' && index - state_start_index == 2) {
state = ON_SECOND;
state_start_index = index + 1;
} else if (isdigit(c)) {
} else if (aws_isdigit(c)) {
parsed_time->tm_min = parsed_time->tm_min * 10 + (c - '0');
} else {
error = true;
}
break;
case ON_SECOND:
if (isspace(c) && index - state_start_index == 2) {
if (aws_isspace(c) && index - state_start_index == 2) {
state = ON_TZ;
state_start_index = index + 1;
} else if (isdigit(c)) {
} else if (aws_isdigit(c)) {
parsed_time->tm_sec = parsed_time->tm_sec * 10 + (c - '0');
} else {
error = true;
}
break;
case ON_TZ:
if ((isalnum(c) || c == '-' || c == '+') && (index - state_start_index) < 5) {
if ((aws_isalnum(c) || c == '-' || c == '+') && (index - state_start_index) < 5) {
dt->tz[index - state_start_index] = c;
} else {
error = true;
Expand Down
10 changes: 5 additions & 5 deletions source/posix/system_info.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ bool aws_is_debugger_present(void) {

/* If it's not 0, then there's a debugger */
for (const char *cur = tracer_pid + sizeof(tracerPidString) - 1; cur <= buf + num_read; ++cur) {
if (!isspace(*cur)) {
return isdigit(*cur) != 0 && *cur != '0';
if (!aws_isspace(*cur)) {
return aws_isdigit(*cur) && *cur != '0';
}
}

Expand Down Expand Up @@ -118,8 +118,8 @@ struct aws_stack_frame_info {
char *s_whitelist_chars(char *path) {
char *cur = path;
while (*cur) {
bool whitelisted =
isalnum(*cur) || isspace(*cur) || *cur == '/' || *cur == '_' || *cur == '.' || (cur > path && *cur == '-');
bool whitelisted = aws_isalnum(*cur) || aws_isspace(*cur) || *cur == '/' || *cur == '_' || *cur == '.' ||
(cur > path && *cur == '-');
if (!whitelisted) {
*cur = '_';
}
Expand Down Expand Up @@ -150,7 +150,7 @@ int s_parse_symbol(const char *symbol, void *addr, struct aws_stack_frame_info *
const char *current_exe = s_get_executable_path();
/* parse exe/shared lib */
const char *exe_start = strstr(symbol, " ");
while (isspace(*exe_start)) {
while (aws_isspace(*exe_start)) {
++exe_start;
}
const char *exe_end = strstr(exe_start, " ");
Expand Down
5 changes: 5 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,11 @@ add_test_case(test_byte_buf_reserve_relative)
add_test_case(test_byte_buf_reset)
add_test_case(test_byte_cursor_compare_lexical)
add_test_case(test_byte_cursor_compare_lookup)
add_test_case(test_isalnum)
add_test_case(test_isalpha)
add_test_case(test_isdigit)
add_test_case(test_isxdigit)
add_test_case(test_isspace)

add_test_case(byte_swap_test)

Expand Down
Loading

0 comments on commit 94b62d2

Please sign in to comment.