From 1b19fc65ad84b223876c50dd4fcd7d5a08c311dc Mon Sep 17 00:00:00 2001 From: Henri Nurmi Date: Tue, 27 Aug 2024 17:39:34 -0700 Subject: [PATCH] getaddrinfo: improve the service/port resolution (#524) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hello, While experimenting with the `wasm32-wasip2` target and CPython, I discovered an issue with the `getaddrinfo()` implementation: it fails to resolve the provided service into a port number, causing `sin_port` to always be set to 0. This issue leads to failures in network-related functions that rely on `getaddrinfo()`, such as Python's `urllib3` library, which passes the result directly to `connect()`. This results in connection attempts using a port value of 0, which naturally fails. ### Minimal example to reproduce the problem ```c #include #include #include int main(void) { struct addrinfo *res = NULL; getaddrinfo("google.com", "443", NULL, &res); for (struct addrinfo *i = res; i != NULL; i = i->ai_next) { char str[INET6_ADDRSTRLEN]; if (i->ai_addr->sa_family == AF_INET) { struct sockaddr_in *p = (struct sockaddr_in *)i->ai_addr; int port = ntohs(p->sin_port); printf("%s: %i\n", inet_ntop(AF_INET, &p->sin_addr, str, sizeof(str)), port); } else if (i->ai_addr->sa_family == AF_INET6) { struct sockaddr_in6 *p = (struct sockaddr_in6 *)i->ai_addr; int port = ntohs(p->sin6_port); printf("%s: %i\n", inet_ntop(AF_INET6, &p->sin6_addr, str, sizeof(str)), port); } } return 0; } ``` ``` $ /opt/wasi-sdk/bin/clang -target wasm32-wasip2 -o foo foo.c $ wasmtime run -S allow-ip-name-lookup=y foo 216.58.211.238: 0 2a00:1450:4026:808::200e: 0 ``` Expected output: ``` 216.58.211.238: 443 2a00:1450:4026:808::200e: 443 ``` ### Root Cause The root cause is that `getaddrinfo()` does not correctly translate the provided service into a port number. As described in the `getaddrinfo()` man [page](https://man7.org/linux/man-pages/man3/getaddrinfo.3.html), the function should: > service sets the port in each returned address structure. If this argument is a service name (see [services(5)](https://man7.org/linux/man-pages/man5/services.5.html)), it is translated to the corresponding port number. This argument can also be specified as a decimal number, which is simply converted to binary. If service is NULL, then the port number of the returned socket addresses will be left uninitialized. ### Proposed Fix This pull request addresses the issue by implementing the following behavior for `getaddrinfo()`: * If the service is `NULL`, the port number in the returned socket addresses remains uninitialized. * The value is converted to an integer and validated if the service is numeric. The PR does not currently add support for translating named services into port numbers because `getservbyname()` has not been implemented. In cases where a named service is provided, the `EAI_NONAME` error code is returned. --- expected/wasm32-wasip2/defined-symbols.txt | 1 + .../headers/private/wasi/sockets_utils.h | 1 + libc-bottom-half/sources/netdb.c | 21 +++++++++++++++--- libc-bottom-half/sources/sockets_utils.c | 22 +++++++++++++++++++ 4 files changed, 42 insertions(+), 3 deletions(-) diff --git a/expected/wasm32-wasip2/defined-symbols.txt b/expected/wasm32-wasip2/defined-symbols.txt index 2c3ba420..8960ca0d 100644 --- a/expected/wasm32-wasip2/defined-symbols.txt +++ b/expected/wasm32-wasip2/defined-symbols.txt @@ -309,6 +309,7 @@ __wasi_sockets_utils__map_error __wasi_sockets_utils__output_addr_validate __wasi_sockets_utils__output_addr_write __wasi_sockets_utils__parse_address +__wasi_sockets_utils__parse_port __wasi_sockets_utils__posix_family __wasi_sockets_utils__stream __wasi_sockets_utils__tcp_bind diff --git a/libc-bottom-half/headers/private/wasi/sockets_utils.h b/libc-bottom-half/headers/private/wasi/sockets_utils.h index 93cf1f45..396c6301 100644 --- a/libc-bottom-half/headers/private/wasi/sockets_utils.h +++ b/libc-bottom-half/headers/private/wasi/sockets_utils.h @@ -49,5 +49,6 @@ bool __wasi_sockets_utils__stream(udp_socket_t *socket, udp_socket_streams_t *result, network_error_code_t *error); void __wasi_sockets_utils__drop_streams(udp_socket_streams_t streams); +int __wasi_sockets_utils__parse_port(const char *port); #endif diff --git a/libc-bottom-half/sources/netdb.c b/libc-bottom-half/sources/netdb.c index c30a3967..f6b1718b 100644 --- a/libc-bottom-half/sources/netdb.c +++ b/libc-bottom-half/sources/netdb.c @@ -1,6 +1,7 @@ #include #include #include +#include #include @@ -25,6 +26,7 @@ static int map_error(ip_name_lookup_error_code_t error) } static int add_addr(ip_name_lookup_option_ip_address_t address, + in_port_t port, const struct addrinfo *restrict hint, struct addrinfo **restrict current, struct addrinfo **restrict res) @@ -51,7 +53,7 @@ static int add_addr(ip_name_lookup_option_ip_address_t address, struct sockaddr_in sockaddr = { .sin_family = AF_INET, - .sin_port = 0, + .sin_port = port, .sin_addr = { .s_addr = ip.f0 | (ip.f1 << 8) | (ip.f2 << 16) | (ip.f3 << 24) }, }; @@ -76,7 +78,7 @@ static int add_addr(ip_name_lookup_option_ip_address_t address, struct sockaddr_in6 sockaddr = { .sin6_family = AF_INET6, - .sin6_port = 0, + .sin6_port = port, .sin6_addr = { .s6_addr = { ip.f0 >> 8, @@ -152,12 +154,25 @@ int getaddrinfo(const char *restrict host, const char *restrict serv, &error)) { ip_name_lookup_borrow_resolve_address_stream_t stream_borrow = ip_name_lookup_borrow_resolve_address_stream(stream); + // The 'serv' parameter can be either a port number or a service name. + // + // TODO wasi-sockets: If the conversion of 'serv' to a valid port + // number fails, use getservbyname() to resolve the service name to + // its corresponding port number. This can be done after the + // getservbyname function is implemented.) + int port = 0; + if (serv != NULL) { + port = __wasi_sockets_utils__parse_port(serv); + if (port < 0) { + return EAI_NONAME; + } + } while (true) { ip_name_lookup_option_ip_address_t address; if (ip_name_lookup_method_resolve_address_stream_resolve_next_address( stream_borrow, &address, &error)) { if (address.is_some) { - int error = add_addr(address, hint, + int error = add_addr(address, htons(port), hint, ¤t, res); if (error) { return error; diff --git a/libc-bottom-half/sources/sockets_utils.c b/libc-bottom-half/sources/sockets_utils.c index 4f5658a5..d98fd02a 100644 --- a/libc-bottom-half/sources/sockets_utils.c +++ b/libc-bottom-half/sources/sockets_utils.c @@ -1,4 +1,6 @@ #include +#include +#include #include @@ -460,3 +462,23 @@ bool __wasi_sockets_utils__stream( return true; } + +int __wasi_sockets_utils__parse_port(const char *restrict port_str) +{ + char *end = NULL; + errno = 0; + long port = strtol(port_str, &end, 10); + + // Check for various possible errors: + // - the input is not a valid number + // - the entire input string is not consumed + // - the number is not within the valid port range + // - the input does not start with a digit (strtol allows leading + // whitespace and optional sign) + if (errno != 0 || end == NULL || *end != '\0' || port < 0 + || port > 65535 || !isdigit(*port_str)) { + return -1; + } + + return (int)port; +}