Skip to content

Commit

Permalink
Add lpf IPoIB support
Browse files Browse the repository at this point in the history
  • Loading branch information
TimoRoth committed Jun 30, 2024
1 parent 4cd8b78 commit 1028b36
Show file tree
Hide file tree
Showing 4 changed files with 263 additions and 14 deletions.
3 changes: 3 additions & 0 deletions src/lib/dhcp/hwaddr.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ struct HWAddr {
/// @brief Size of an ethernet hardware address.
static const size_t ETHERNET_HWADDR_LEN = 6;

/// @brief Size of an infiniband hardware address.
static const size_t INFINIBAND_HWADDR_LEN = 20;

/// @brief Maximum size of a hardware address.
static const size_t MAX_HWADDR_LEN = 20;

Expand Down
151 changes: 137 additions & 14 deletions src/lib/dhcp/pkt_filter_lpf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,98 @@ struct sock_filter dhcp_sock_filter [] = {
BPF_STMT(BPF_RET + BPF_K, 0),
};

/// The following structure defines a Berkeley Packet Filter program to perform
/// packet filtering. The program operates on IPoIB pseudo packets. To help with
/// interpretation of the program, for the types of packets we are interested
/// in, the header layout is:
///
/// 20 bytes Source Interface Address
/// 2 bytes Packet Type
/// 2 bytes Reserved/Unused
///
/// The rest is identical to aboves Ethernet-Based packets
///
/// Each instruction is preceded with the comments giving the instruction
/// number within a BPF program, in the following format: #123.

struct sock_filter dhcp_sock_filter_ib [] = {
// Make sure this is an IP packet: check the half-word (two bytes)
// at offset 20 in the packet (the IPoIB pseudo packet type). If it
// is, advance to the next instruction. If not, advance 11
// instructions (which takes execution to the last instruction in
// the sequence: "drop it").
// #0
BPF_STMT(BPF_LD + BPF_H + BPF_ABS, IPOIB_PACKET_TYPE_OFFSET),
// #1
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ETHERTYPE_IP, 0, 11),

// Make sure it's a UDP packet. The IP protocol is at offset
// 9 in the IP header so, adding the IPoIB packet header size
// of 24 bytes gives an absolute byte offset in the packet of 33.
// #2
BPF_STMT(BPF_LD + BPF_B + BPF_ABS,
IPOIB_HEADER_LEN + IP_PROTO_TYPE_OFFSET),
// #3
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 9),

// Make sure this isn't a fragment by checking that the fragment
// offset field in the IP header is zero. This field is the
// least-significant 13 bits in the bytes at offsets 6 and 7 in
// the IP header, so the half-word at offset 30 (6 + size of
// IPoIB header) is loaded and an appropriate mask applied.
// #4
BPF_STMT(BPF_LD + BPF_H + BPF_ABS, IPOIB_HEADER_LEN + IP_FLAGS_OFFSET),
// #5
BPF_JUMP(BPF_JMP + BPF_JSET + BPF_K, 0x1fff, 7, 0),

// Check the packet's destination address. The program will only
// allow the packets sent to the broadcast address or unicast
// to the specific address on the interface. By default, this
// address is set to 0 and must be set to the specific value
// when the raw socket is created and the program is attached
// to it. The caller must assign the address to the
// prog.bf_insns[8].k in the network byte order.
// #6
BPF_STMT(BPF_LD + BPF_W + BPF_ABS,
IPOIB_HEADER_LEN + IP_DEST_ADDR_OFFSET),
// If this is a broadcast address, skip the next check.
// #7
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0xffffffff, 1, 0),
// If this is not broadcast address, compare it with the unicast
// address specified for the interface.
// #8
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x00000000, 0, 4),

// Get the IP header length. This is achieved by the following
// (special) instruction that, given the offset of the start
// of the IP header (offset 24) loads the IP header length.
// #9
BPF_STMT(BPF_LDX + BPF_B + BPF_MSH, IPOIB_HEADER_LEN),

// Make sure it's to the right port. The following instruction
// adds the previously extracted IP header length to the given
// offset to locate the correct byte. The given offset of 26
// comprises the length of the IPoIB header (24) plus the offset
// of the UDP destination port (2) within the UDP header.
// #10
BPF_STMT(BPF_LD + BPF_H + BPF_IND, IPOIB_HEADER_LEN + UDP_DEST_PORT),
// The following instruction tests against the default DHCP server port,
// but the action port is actually set in PktFilterBPF::openSocket().
// N.B. The code in that method assumes that this instruction is at
// offset 11 in the program. If this is changed, openSocket() must be
// updated.
// #11
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DHCP4_SERVER_PORT, 0, 1),

// If we passed all the tests, ask for the whole packet.
// #12
BPF_STMT(BPF_RET + BPF_K, (u_int)-1),

// Otherwise, drop it.
// #13
BPF_STMT(BPF_RET + BPF_K, 0),
};

}

using namespace isc::util;
Expand Down Expand Up @@ -169,16 +261,30 @@ PktFilterLPF::openSocket(Iface& iface,
struct sock_fprog filter_program;
memset(&filter_program, 0, sizeof(filter_program));

filter_program.filter = dhcp_sock_filter;
filter_program.len = sizeof(dhcp_sock_filter) / sizeof(struct sock_filter);
if (iface.getHWType() == HTYPE_INFINIBAND) {
filter_program.filter = dhcp_sock_filter_ib;
filter_program.len = sizeof(dhcp_sock_filter_ib) / sizeof(struct sock_filter);

// Configure the filter program to receive unicast packets sent to the
// specified address. The program will also allow packets sent to the
// 255.255.255.255 broadcast address.
dhcp_sock_filter_ib[8].k = addr.toUint32();

// Configure the filter program to receive unicast packets sent to the
// specified address. The program will also allow packets sent to the
// 255.255.255.255 broadcast address.
dhcp_sock_filter[8].k = addr.toUint32();
// Override the default port value.
dhcp_sock_filter_ib[11].k = port;
} else {
filter_program.filter = dhcp_sock_filter;
filter_program.len = sizeof(dhcp_sock_filter) / sizeof(struct sock_filter);

// Configure the filter program to receive unicast packets sent to the
// specified address. The program will also allow packets sent to the
// 255.255.255.255 broadcast address.
dhcp_sock_filter[8].k = addr.toUint32();

// Override the default port value.
dhcp_sock_filter[11].k = port;
}

// Override the default port value.
dhcp_sock_filter[11].k = port;
// Apply the filter.
if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &filter_program,
sizeof(filter_program)) < 0) {
Expand Down Expand Up @@ -315,7 +421,21 @@ PktFilterLPF::receive(Iface& iface, const SocketInfo& socket_info) {
Pkt4Ptr dummy_pkt = Pkt4Ptr(new Pkt4(DHCPDISCOVER, 0));

// Decode ethernet, ip and udp headers.
decodeEthernetHeader(buf, dummy_pkt);
if (iface.getHWType() == HTYPE_INFINIBAND) {
decodeIPoIBHeader(buf, dummy_pkt);

// The IPoIB header does not contain the local address.
// Set it from the interface instead.
if (iface.getMacLen() != HWAddr::INFINIBAND_HWADDR_LEN) {
isc_throw(SocketReadError,
"Invalid local hardware address size for IPoIB interface.");
}
HWAddrPtr hwaddr(new HWAddr(iface.getMac(), iface.getMacLen(),
iface.getHWType()));
dummy_pkt->setLocalHWAddr(hwaddr);
} else {
decodeEthernetHeader(buf, dummy_pkt);
}
decodeIpUdpHeader(buf, dummy_pkt);

// Read the DHCP data.
Expand Down Expand Up @@ -374,11 +494,14 @@ PktFilterLPF::send(const Iface& iface, uint16_t sockfd, const Pkt4Ptr& pkt) {
pkt->setLocalHWAddr(hwaddr);
}


// Ethernet frame header.
// Note that we don't validate whether HW addresses in 'pkt'
// are valid because they are checked by the function called.
writeEthernetHeader(pkt, buf);
if (iface.getHWType() == HTYPE_INFINIBAND) {
writeIPoIBHeader(iface, pkt, buf);
} else {
// Ethernet frame header.
// Note that we don't validate whether HW addresses in 'pkt'
// are valid because they are checked by the function called.
writeEthernetHeader(pkt, buf);
}

// IP and UDP header
writeIpUdpHeader(pkt, buf);
Expand Down
86 changes: 86 additions & 0 deletions src/lib/dhcp/protocol_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@

using namespace isc::asiolink;
using namespace isc::util;
using namespace isc::dhcp;

namespace {

static HWAddr zero_ib_hwaddr(&std::vector<uint8_t>(HWAddr::INFINIBAND_HWADDR_LEN)[0],
HWAddr::INFINIBAND_HWADDR_LEN, HTYPE_INFINIBAND);

}

namespace isc {
namespace dhcp {
Expand Down Expand Up @@ -58,6 +66,39 @@ decodeEthernetHeader(InputBuffer& buf, Pkt4Ptr& pkt) {
buf.setPosition(start_pos + ETHERNET_HEADER_LEN);
}

void
decodeIPoIBHeader(InputBuffer& buf, Pkt4Ptr& pkt) {
// The size of the buffer to be parsed must not be lower
// then the size of the IPoIB frame header.
if (buf.getLength() - buf.getPosition() < IPOIB_HEADER_LEN) {
isc_throw(InvalidPacketHeader, "size of ethernet header in received "
<< "packet is invalid, expected at least "
<< IPOIB_HEADER_LEN << " bytes, received "
<< buf.getLength() - buf.getPosition() << " bytes");
}
// Packet object must not be NULL. We want to output some values
// to this object.
if (!pkt) {
isc_throw(BadValue, "NULL packet object provided when parsing ethernet"
" frame header");
}

// The size of the single address is always lower then the size of
// the header that holds this address. Otherwise, it is a programming
// error that we want to detect in the compilation time.
BOOST_STATIC_ASSERT(IPOIB_HEADER_LEN > HWAddr::INFINIBAND_HWADDR_LEN);

// Remember initial position.
size_t start_pos = buf.getPosition();

// Read the source HW address.
std::vector<uint8_t> src_addr;
buf.readVector(src_addr, HWAddr::INFINIBAND_HWADDR_LEN);
pkt->setRemoteHWAddr(HWTYPE_INFINIBAND, HWAddr::INFINIBAND_HWADDR_LEN, src_addr);
// Move the buffer read pointer to the end of the Ethernet frame header.
buf.setPosition(start_pos + IPOIB_HEADER_LEN);
}

void
decodeIpUdpHeader(InputBuffer& buf, Pkt4Ptr& pkt) {
// The size of the buffer must be at least equal to the minimal size of
Expand Down Expand Up @@ -161,6 +202,51 @@ writeEthernetHeader(const Pkt4Ptr& pkt, OutputBuffer& out_buf) {
out_buf.writeUint16(ETHERNET_TYPE_IP);
}

void
writeIPoIBHeader(const Iface& iface, const Pkt4Ptr& pkt, OutputBuffer& out_buf) {
// Set destination HW address.
HWAddrPtr remote_addr = pkt->getRemoteHWAddr();
if (remote_addr) {
if (remote_addr->hwaddr_.size() != HWAddr::INFINIBAND_HWADDR_LEN) {
isc_throw(BadValue, "invalid size of the remote HW address "
<< remote_addr->hwaddr_.size() << " when constructing"
<< " an ethernet frame header; expected size is"
<< " " << HWAddr::INFINIBAND_HWADDR_LEN);
} else if ((!pkt->isRelayed() &&
(pkt->getFlags() & Pkt4::FLAG_BROADCAST_MASK)) ||
*remote_addr == zero_ib_hwaddr) {
// We also broadcast if the received hwaddr is full zero.
// This happens on some IB drivers which don't provide the remote
// hwaddr to userspace.
// Generally, according to the RFC, all IPoIB clients MUST request
// broadcast anyway, but better to be safe and handle non-compliant
// clients.
if (iface.getBcastMacLen() != HWAddr::INFINIBAND_HWADDR_LEN) {
isc_throw(BadValue, "invalid size of the bcast HW address "
<< iface.getBcastMacLen() << " when constructing"
<< " an ethernet frame header; expected size is"
<< " " << HWAddr::INFINIBAND_HWADDR_LEN);
}
out_buf.writeData(iface.getBcastMac(),
HWAddr::INFINIBAND_HWADDR_LEN);
} else {
out_buf.writeData(&remote_addr->hwaddr_[0],
HWAddr::INFINIBAND_HWADDR_LEN);
}
} else {
// HW address has not been specified. This is possible when receiving
// packet through a logical interface (e.g. lo). In such cases, we
// don't want to fail but rather provide a default HW address, which
// consists of zeros.
out_buf.writeData(&zero_ib_hwaddr.hwaddr_[0], HWAddr::INFINIBAND_HWADDR_LEN);
}

// Type IP.
out_buf.writeUint16(ETHERNET_TYPE_IP);
// Reserved
out_buf.writeUint16(0);
}

void
writeIpUdpHeader(const Pkt4Ptr& pkt, util::OutputBuffer& out_buf) {

Expand Down
37 changes: 37 additions & 0 deletions src/lib/dhcp/protocol_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#define PROTOCOL_UTIL_H

#include <dhcp/pkt4.h>
#include <dhcp/iface_mgr.h>
#include <util/buffer.h>

#include <stdint.h>
Expand Down Expand Up @@ -39,6 +40,12 @@ static const size_t ETHERNET_PACKET_TYPE_OFFSET = 12;
/// and locations on different OSes.
static const uint16_t ETHERNET_TYPE_IP = 0x0800;

/// Size of the IPoIB pseude frame header.
static const size_t IPOIB_HEADER_LEN = 24;
/// Offset of the 2-byte word in the IPoIB pseudo packet which
/// holds the type of the protocol it encapsulates.
static const size_t IPOIB_PACKET_TYPE_OFFSET = 20;

/// Minimal IPv4 header length.
static const size_t MIN_IP_HEADER_LEN = 20;
/// Offset in the IP header where the flags field starts.
Expand Down Expand Up @@ -75,6 +82,25 @@ static const size_t UDP_DEST_PORT = 2;
/// @throw BadValue if pkt object is NULL.
void decodeEthernetHeader(util::InputBuffer& buf, Pkt4Ptr& pkt);

/// @brief Decode the IPoIB pseudo header.
///
/// This function reads IPoIB pesudo frame header from the provided
/// buffer at the current read position. The source HW address
/// is read from the header and assigned as client address in
/// the pkt object. The buffer read pointer is set to the end
/// of the IPoIB frame header if read was successful.
///
/// @warning This function does not check that the provided 'pkt'
/// pointer is valid. Caller must make sure that pointer is
/// allocated.
///
/// @param buf input buffer holding header to be parsed.
/// @param [out] pkt packet object receiving HW source address read from header.
///
/// @throw InvalidPacketHeader if packet header is truncated
/// @throw BadValue if pkt object is NULL.
void decodeIPoIBHeader(util::InputBuffer& buf, Pkt4Ptr& pkt);

/// @brief Decode IP and UDP header.
///
/// This function reads IP and UDP headers from the provided buffer
Expand Down Expand Up @@ -105,6 +131,17 @@ void decodeIpUdpHeader(util::InputBuffer& buf, Pkt4Ptr& pkt);
void writeEthernetHeader(const Pkt4Ptr& pkt,
util::OutputBuffer& out_buf);

/// @brief Writes IPoIB pseudo frame header into a buffer.
///
/// @warning This function does not check that the provided 'pkt'
/// pointer is valid. Caller must make sure that pointer is
/// allocated.
///
/// @param pkt packet object holding source and destination HW address.
/// @param [out] out_buf buffer where a header is written.
void writeIPoIBHeader(const Iface& iface, const Pkt4Ptr& pkt,
util::OutputBuffer& out_buf);

/// @brief Writes both IP and UDP header into output buffer
///
/// This utility function assembles IP and UDP packet headers for the
Expand Down

0 comments on commit 1028b36

Please sign in to comment.