Skip to content

Commit

Permalink
prov/udp: detect and use MTU to set max_msg_size and inject_size
Browse files Browse the repository at this point in the history
For each interface detected by the udp provider, determine the MTU of
the interface, and use that value to set the max_msg_size field of the
fi_ep_attr and fi_tx_attr values of the fi_info element. When the MTU
cannot be determined, the MTU value assumed by previous code
versions (1500) is used.

Signed-off-by: Martin Pokorny <[email protected]>
  • Loading branch information
mpokorny committed Oct 28, 2024
1 parent f236201 commit 2d26352
Show file tree
Hide file tree
Showing 12 changed files with 150 additions and 21 deletions.
7 changes: 5 additions & 2 deletions include/freebsd/osd.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ static inline size_t ofi_ifaddr_get_speed(struct ifaddrs *ifa)
return 0;
}

static inline int ofi_ifaddr_get_mtu(const struct ifaddrs *ifa)
{
return -1;
}

static inline ssize_t ofi_process_vm_readv(pid_t pid,
const struct iovec *local_iov,
unsigned long liovcnt,
Expand Down Expand Up @@ -185,5 +190,3 @@ ofi_recvv_socket(SOCKET fd, const struct iovec *iov, size_t cnt, int flags)
}

#endif /* _FREEBSD_OSD_H_ */


2 changes: 2 additions & 0 deletions include/linux/osd.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ static inline int ofi_hugepage_enabled(void)

size_t ofi_ifaddr_get_speed(struct ifaddrs *ifa);

int ofi_ifaddr_get_mtu(const struct ifaddrs *ifa);

#ifndef __NR_process_vm_readv
# define __NR_process_vm_readv 310
#endif
Expand Down
1 change: 1 addition & 0 deletions include/ofi_net.h
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,7 @@ struct ofi_addr_list_entry {
char ipstr[INET6_ADDRSTRLEN];
union ofi_sock_ip ipaddr;
size_t speed;
int mtu;
char net_name[OFI_ADDRSTRLEN];
char ifa_name[OFI_ADDRSTRLEN];
uint64_t comm_caps;
Expand Down
5 changes: 5 additions & 0 deletions include/osx/osd.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ static inline size_t ofi_ifaddr_get_speed(struct ifaddrs *ifa)
return 0;
}

static inline int ofi_ifaddr_get_mtu(const struct ifaddrs *ifa)
{
return -1;
}

static inline int ofi_hugepage_enabled(void)
{
return 0;
Expand Down
2 changes: 1 addition & 1 deletion include/windows/ifaddrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ struct ifaddrs {

char ad_name[16];
size_t speed;
int mtu;
};

int getifaddrs(struct ifaddrs **ifap);
void freeifaddrs(struct ifaddrs *ifa);

2 changes: 2 additions & 0 deletions include/windows/osd.h
Original file line number Diff line number Diff line change
Expand Up @@ -1006,6 +1006,8 @@ static inline int ofi_is_loopback_addr(struct sockaddr *addr) {

size_t ofi_ifaddr_get_speed(struct ifaddrs *ifa);

int ofi_ifaddr_get_mtu(const struct ifaddrs *ifa);

#define file2unix_time 10000000i64
#define win2unix_epoch 116444736000000000i64
#define CLOCK_REALTIME 0
Expand Down
10 changes: 7 additions & 3 deletions prov/udp/src/udpx.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,22 +63,22 @@
#ifndef _UDPX_H_
#define _UDPX_H_


extern struct fi_provider udpx_prov;
extern struct util_prov udpx_util_prov;
extern struct fi_info udpx_info;


int udpx_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric,
void *context);
int udpx_domain_open(struct fid_fabric *fabric, struct fi_info *info,
struct fid_domain **dom, void *context);
int udpx_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr,
struct fid_eq **eq, void *context);

void udpx_util_prov_init(uint32_t version);
void udpx_util_prov_fin();

#define UDPX_FLAG_MULTI_RECV 1
#define UDPX_IOV_LIMIT 4
#define UDPX_MTU 1500

struct udpx_ep_entry {
void *context;
Expand All @@ -88,6 +88,10 @@ struct udpx_ep_entry {
uint8_t resv[sizeof(size_t) - 2];
};

#define UDPX_UDP_HEADER_SIZE 8
#define UDPX_IP_HEADER_SIZE 20
#define UDPX_MAX_MSG_SIZE(mtu) ((mtu) - (UDPX_UDP_HEADER_SIZE + UDPX_IP_HEADER_SIZE))

OFI_DECLARE_CIRQUE(struct udpx_ep_entry, udpx_rx_cirq);

struct udpx_ep;
Expand Down
72 changes: 68 additions & 4 deletions prov/udp/src/udpx_attr.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,15 @@
*/

#include "udpx.h"
#include "ofi_osd.h"

#define UDPX_TX_CAPS (OFI_TX_MSG_CAPS | FI_MULTICAST)
#define UDPX_RX_CAPS (FI_SOURCE | OFI_RX_MSG_CAPS)
#define UDPX_DOMAIN_CAPS (FI_LOCAL_COMM | FI_REMOTE_COMM)

struct fi_tx_attr udpx_tx_attr = {
.caps = UDPX_TX_CAPS,
.inject_size = 1472,
.inject_size = UDPX_MAX_MSG_SIZE(UDPX_MTU),
.size = 1024,
.iov_limit = UDPX_IOV_LIMIT
};
Expand All @@ -53,7 +54,7 @@ struct fi_ep_attr udpx_ep_attr = {
.type = FI_EP_DGRAM,
.protocol = FI_PROTO_UDP,
.protocol_version = 0,
.max_msg_size = 1472,
.max_msg_size = UDPX_MAX_MSG_SIZE(UDPX_MTU),
.tx_ctx_cnt = 1,
.rx_ctx_cnt = 1
};
Expand Down Expand Up @@ -93,6 +94,69 @@ struct fi_info udpx_info = {

struct util_prov udpx_util_prov = {
.prov = &udpx_prov,
.info = &udpx_info,
.flags = 0,
.info = NULL,
.flags = 0,
};


static int match_interface(struct slist_entry *entry, const void *infop)
{
struct ofi_addr_list_entry *addr_entry;
const struct fi_info* info = infop;

addr_entry = container_of(entry, struct ofi_addr_list_entry, entry);
return strcmp(addr_entry->net_name, info->fabric_attr->name) == 0 &&
strcmp(addr_entry->ifa_name, info->domain_attr->name) == 0;
}

static void set_mtu_from_addr_list(struct fi_info* info,
struct slist *addr_list)
{
struct ofi_addr_list_entry *addr_entry;
struct slist_entry *entry;
int max_msg_size;

entry = slist_find_first_match(addr_list, match_interface, info);
if (entry) {
addr_entry = container_of(entry,
struct ofi_addr_list_entry,
entry);
max_msg_size = UDPX_MAX_MSG_SIZE(addr_entry->mtu);
if (max_msg_size > 0) {
info->tx_attr->inject_size = max_msg_size;
info->ep_attr->max_msg_size = max_msg_size;
}
} else {
FI_DBG(&udpx_prov, FI_LOG_CORE,
"Failed to match interface (%s, %s) to "
"address for MTU size\n",
info->fabric_attr->name, info->domain_attr->name);
}
}

void udpx_util_prov_init(uint32_t version)
{

struct slist addr_list;
struct fi_info* cur;
struct fi_info* info;

if (udpx_util_prov.info == NULL) {
udpx_util_prov.info = &udpx_info;
info = fi_allocinfo();
ofi_ip_getinfo(&udpx_util_prov, version, NULL, NULL, 0, NULL,
&info);
slist_init(&addr_list);
ofi_get_list_of_addr(&udpx_prov, "iface", &addr_list);
for (cur = info; cur; cur = cur->next)
set_mtu_from_addr_list(cur, &addr_list);
*(struct fi_info**)&udpx_util_prov.info = info;
ofi_free_list_of_addr(&addr_list);
}
}

void udpx_util_prov_fin()
{
if (udpx_util_prov.info != NULL)
fi_freeinfo((struct fi_info*)udpx_util_prov.info);
}
12 changes: 9 additions & 3 deletions prov/udp/src/udpx_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,25 @@

#include <sys/types.h>

static ofi_mutex_t init_lock;

static int udpx_getinfo(uint32_t version, const char *node, const char *service,
uint64_t flags, const struct fi_info *hints,
struct fi_info **info)
{
return ofi_ip_getinfo(&udpx_util_prov, version, node, service, flags,
hints, info);
ofi_mutex_lock(&init_lock);
udpx_util_prov_init(version);
ofi_mutex_unlock(&init_lock);
return util_getinfo(&udpx_util_prov, version, node, service, flags,
hints, info);
}

static void udpx_fini(void)
{
/* yawn */
udpx_util_prov_fin();
}


struct fi_provider udpx_prov = {
.name = "udp",
.version = OFI_VERSION_DEF_PROV,
Expand All @@ -65,5 +70,6 @@ UDP_INI
fi_param_define(&udpx_prov, "iface", FI_PARAM_STRING,
"Specify interface name");

ofi_mutex_init(&init_lock);
return &udpx_prov;
}
24 changes: 16 additions & 8 deletions src/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -1942,7 +1942,8 @@ void ofi_free_list_of_addr(struct slist *addr_list)
}

static inline
void ofi_insert_loopback_addr(const struct fi_provider *prov, struct slist *addr_list)
void ofi_insert_loopback_addr(const struct fi_provider *prov,
struct slist *addr_list, int mtu)
{
struct ofi_addr_list_entry *addr_entry;

Expand All @@ -1953,6 +1954,7 @@ void ofi_insert_loopback_addr(const struct fi_provider *prov, struct slist *addr
addr_entry->comm_caps = FI_LOCAL_COMM;
addr_entry->ipaddr.sin.sin_family = AF_INET;
addr_entry->ipaddr.sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
addr_entry->mtu = mtu;
ofi_straddr_log(prov, FI_LOG_INFO, FI_LOG_CORE,
"available addr: ", &addr_entry->ipaddr);

Expand All @@ -1968,6 +1970,7 @@ void ofi_insert_loopback_addr(const struct fi_provider *prov, struct slist *addr
addr_entry->comm_caps = FI_LOCAL_COMM;
addr_entry->ipaddr.sin6.sin6_family = AF_INET6;
addr_entry->ipaddr.sin6.sin6_addr = in6addr_loopback;
addr_entry->mtu = mtu;
ofi_straddr_log(prov, FI_LOG_INFO, FI_LOG_CORE,
"available addr: ", &addr_entry->ipaddr);

Expand Down Expand Up @@ -2062,7 +2065,7 @@ void ofi_set_netmask_str(char *netstr, size_t len, struct ifaddrs *ifa)
void ofi_get_list_of_addr(const struct fi_provider *prov, const char *env_name,
struct slist *addr_list)
{
int ret;
int ret, mtu = -1;
char *iface = NULL;
struct ofi_addr_list_entry *addr_entry;
struct ifaddrs *ifaddrs, *ifa;
Expand All @@ -2089,10 +2092,13 @@ void ofi_get_list_of_addr(const struct fi_provider *prov, const char *env_name,
if (ifa->ifa_addr == NULL ||
!(ifa->ifa_flags & IFF_UP) ||
!(ifa->ifa_flags & IFF_RUNNING) ||
(ifa->ifa_flags & IFF_LOOPBACK) ||
((ifa->ifa_addr->sa_family != AF_INET) &&
(ifa->ifa_addr->sa_family != AF_INET6)))
continue;
if (ifa->ifa_flags & IFF_LOOPBACK) {
mtu = ofi_ifaddr_get_mtu(ifa);
continue;
}
if (iface && strncmp(iface, ifa->ifa_name, strlen(iface) + 1)) {
FI_DBG(prov, FI_LOG_CORE,
"Skip (%s) interface\n", ifa->ifa_name);
Expand Down Expand Up @@ -2122,9 +2128,11 @@ void ofi_get_list_of_addr(const struct fi_provider *prov, const char *env_name,
}

addr_entry->speed = ofi_ifaddr_get_speed(ifa);
addr_entry->mtu = ofi_ifaddr_get_mtu(ifa);
FI_INFO(prov, FI_LOG_CORE, "Available addr: %s, "
"iface name: %s, speed: %zu\n",
addr_entry->ipstr, ifa->ifa_name, addr_entry->speed);
"iface name: %s, speed: %zu, mtu: %d\n",
addr_entry->ipstr, ifa->ifa_name, addr_entry->speed,
addr_entry->mtu);

slist_insert_before_first_match(addr_list, ofi_compare_addr_entry,
&addr_entry->entry);
Expand All @@ -2136,7 +2144,7 @@ void ofi_get_list_of_addr(const struct fi_provider *prov, const char *env_name,
/* Always add loopback address at the end */
if (!iface || !strncmp(iface, "lo", strlen(iface) + 1) ||
!strncmp(iface, "loopback", strlen(iface) + 1))
ofi_insert_loopback_addr(prov, addr_list);
ofi_insert_loopback_addr(prov, addr_list, mtu);
}

#elif defined HAVE_MIB_IPADDRTABLE
Expand Down Expand Up @@ -2182,7 +2190,7 @@ void ofi_get_list_of_addr(const struct fi_provider *prov, const char *env_name,
}

/* Always add loopback address at the end */
ofi_insert_loopback_addr(prov, addr_list);
ofi_insert_loopback_addr(prov, addr_list, -1);

out:
if (iptbl != &_iptbl)
Expand All @@ -2194,7 +2202,7 @@ void ofi_get_list_of_addr(const struct fi_provider *prov, const char *env_name,
void ofi_get_list_of_addr(const struct fi_provider *prov, const char *env_name,
struct slist *addr_list)
{
ofi_insert_loopback_addr(prov, addr_list);
ofi_insert_loopback_addr(prov, addr_list, -1);
}
#endif

Expand Down
28 changes: 28 additions & 0 deletions src/linux/osd.c
Original file line number Diff line number Diff line change
Expand Up @@ -257,3 +257,31 @@ size_t ofi_ifaddr_get_speed(struct ifaddrs *ifa)
}

#endif /* HAVE_ETHTOOL */

int ofi_ifaddr_get_mtu(const struct ifaddrs *ifa)
{
FILE *fp;
char *mtu_filename;
int mtu;

if (asprintf(&mtu_filename, "/sys/class/net/%s/mtu",
ifa->ifa_name) == -1)
return 0;

fp = fopen(mtu_filename, "r");
if (!fp)
goto err1;

if (fscanf(fp, "%d", &mtu) != 1)
goto err2;

fclose(fp);
free(mtu_filename);

return mtu;
err2:
fclose(fp);
err1:
free(mtu_filename);
return 0;
}
6 changes: 6 additions & 0 deletions src/windows/osd.c
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ int getifaddrs(struct ifaddrs **ifap)
(*addr6) = *(struct sockaddr_in6 *) pSockAddr;
}
fa->speed = aa->TransmitLinkSpeed;
fa->mtu = (int)aa->Mtu;
/* Generate fake Unix-like device names */
sprintf_s(fa->ad_name, sizeof(fa->ad_name), "eth%d", i++);
}
Expand All @@ -497,6 +498,11 @@ size_t ofi_ifaddr_get_speed(struct ifaddrs *ifa)
return ifa->speed;
}

int ofi_ifaddr_get_mtu(const struct ifaddrs *ifa)
{
return ifa->mtu;
}

void freeifaddrs(struct ifaddrs *ifa)
{
while (ifa) {
Expand Down

0 comments on commit 2d26352

Please sign in to comment.