From cc3eb42bc959bca284df6ff80d723cf256bdf1e6 Mon Sep 17 00:00:00 2001 From: Tom Barbette Date: Mon, 7 Dec 2015 10:19:16 +0100 Subject: [PATCH] Support Zero-Copy for Netmap This commit adds zero-copy through buffer swapping with extra buffer allocated on startup. Netmap Buffers are now organized in pool much like click pool through a completed implementation of Luigi Rizzo's NetmapBufQ. A shared linked list allows multiple thread-local NetmapBufQ to exchange batches of buffers. The amount of extra buffers allocated on startup is set using NetmapInfo, which is now a real element. NetmapBufQ and other old NetmapInfo methods (renamed more accurately NetmapDevice) are moved to lib/netmapdevice.cc Also there was confusion between HAVE_NET_NETMAP_H and HAVE_NETMAP, and the ELEMENT_REQUIRES(netmap). It's not really needed to have part of the netmap subsystem built without HAVE_NETMAP, so HAVE_NET_NETMAP_H is changed by HAVE_NETMAP. This is a first, minimal change just to introduce Zero Copy, it provides improvement but the send and receive method are nearly untouched and not re-worked in this commit. --- config-userlevel.h.in | 6 +- configure | 6 +- elements/userlevel/fromdevice.cc | 17 +- elements/userlevel/fromdevice.hh | 10 +- elements/userlevel/netmapinfo.cc | 138 ++---------- elements/userlevel/netmapinfo.hh | 144 +----------- elements/userlevel/todevice.cc | 3 +- elements/userlevel/todevice.hh | 2 +- elements/userlevel/todpdkdevice.cc | 4 +- include/click/netmapdevice.hh | 227 +++++++++++++++++++ lib/dpdkdevice.cc | 4 - lib/driver.cc | 5 + lib/netmapdevice.cc | 350 +++++++++++++++++++++++++++++ m4/click.m4 | 6 +- test/tools/mkmindriver-01.testie | 1 - 15 files changed, 640 insertions(+), 283 deletions(-) create mode 100644 include/click/netmapdevice.hh create mode 100644 lib/netmapdevice.cc diff --git a/config-userlevel.h.in b/config-userlevel.h.in index 1518766be9..d7a15a2d2c 100644 --- a/config-userlevel.h.in +++ b/config-userlevel.h.in @@ -113,9 +113,6 @@ /* Define if you have the header file. */ #undef HAVE_NETDB_H -/* Define if you have the header file. */ -#undef HAVE_NET_NETMAP_H - /* Define if you have the header file. */ #undef HAVE_NETPACKET_PACKET_H @@ -214,6 +211,9 @@ /* Define if a Click user-level driver might run multiple threads. */ #undef HAVE_USER_MULTITHREAD +/* Define if Netmap support is enabled. */ +#undef HAVE_NETMAP + /* Define if a Click user-level driver uses Intel DPDK. */ #undef HAVE_DPDK diff --git a/configure b/configure index 5cdb8f362e..a1a690ec7e 100755 --- a/configure +++ b/configure @@ -10705,11 +10705,15 @@ $as_echo "$ac_cv_working_net_netmap_h" >&6; } CPPFLAGS="$saveflags" if test "$HAVE_NETMAP" = yes -a "$use_netmap" != no; then -$as_echo "#define HAVE_NET_NETMAP_H 1" >>confdefs.h +$as_echo "#define HAVE_NETMAP 1" >>confdefs.h + EXTRA_DRIVER_OBJS="netmapdevice.o $EXTRA_DRIVER_OBJS" + else + HAVE_NETMAP=no fi + if test "$HAVE_PCAP" != yes -a "$HAVE_NETMAP" != yes -a "$ac_cv_under_linux" != yes; then { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: ========================================= diff --git a/elements/userlevel/fromdevice.cc b/elements/userlevel/fromdevice.cc index 76e2b12a3c..64360ab6fb 100644 --- a/elements/userlevel/fromdevice.cc +++ b/elements/userlevel/fromdevice.cc @@ -472,9 +472,15 @@ FromDevice::emit_packet(WritablePacket *p, int extra_len, const Timestamp &ts) else checked_output_push(1, p); } + +void +FromDevice::emit_packet_arg(WritablePacket *p, int extra_len, const Timestamp &ts, void* arg) { + FromDevice* fd = static_cast(arg); + fd->emit_packet(p, extra_len, ts); +} #endif -#if FROMDEVICE_ALLOW_PCAP || FROMDEVICE_ALLOW_NETMAP +#if FROMDEVICE_ALLOW_PCAP CLICK_ENDDECLS extern "C" { void @@ -497,7 +503,6 @@ FromDevice_get_packet(u_char* clientdata, CLICK_DECLS #endif - void FromDevice::selected(int, int) { @@ -507,8 +512,7 @@ FromDevice::selected(int, int) #if FROMDEVICE_ALLOW_NETMAP if (_method == method_netmap) { // Read and push() at most one burst of packets. - int r = _netmap.dispatch(_burst, - reinterpret_cast(FromDevice_get_packet), (u_char *) this); + int r = _netmap.receive(_burst, _headroom, emit_packet_arg, this); if (r > 0) { _count += r; _task.reschedule(); @@ -570,8 +574,7 @@ FromDevice::run_task(Task *) # if FROMDEVICE_ALLOW_NETMAP if (_method == method_netmap) { // Read and push() at most one burst of packets. - r = _netmap.dispatch(_burst, - reinterpret_cast(FromDevice_get_packet), (u_char *) this); + r = _netmap.receive(_burst,_headroom,emit_packet_arg, this); if (r < 0 && ++_pcap_complaints < 5) ErrorHandler::default_handler()->error("%p{element}: %s", this, "nm_dispatch failed"); @@ -652,5 +655,5 @@ FromDevice::add_handlers() } CLICK_ENDDECLS -ELEMENT_REQUIRES(userlevel FakePcap KernelFilter NetmapInfo) +ELEMENT_REQUIRES(userlevel FakePcap KernelFilter) EXPORT_ELEMENT(FromDevice) diff --git a/elements/userlevel/fromdevice.hh b/elements/userlevel/fromdevice.hh index ef23bd1475..6fcf2efee4 100644 --- a/elements/userlevel/fromdevice.hh +++ b/elements/userlevel/fromdevice.hh @@ -18,9 +18,9 @@ int pcap_setnonblock(pcap_t *p, int nonblock, char *errbuf); } #endif -#if HAVE_NET_NETMAP_H +#if HAVE_NETMAP # define FROMDEVICE_ALLOW_NETMAP 1 -# include "elements/userlevel/netmapinfo.hh" +# include #endif #if FROMDEVICE_ALLOW_NETMAP || FROMDEVICE_ALLOW_PCAP @@ -187,7 +187,6 @@ class FromDevice : public Element { public: #else inline int fd() const { return -1; } #endif - void selected(int fd, int mask); #if FROMDEVICE_ALLOW_PCAP @@ -203,7 +202,7 @@ class FromDevice : public Element { public: #endif #if FROMDEVICE_ALLOW_NETMAP - const NetmapInfo *netmap() const { return _method == method_netmap ? &_netmap : 0; } + const NetmapDevice *netmap() const { return _method == method_netmap ? &_netmap : 0; } #endif #if FROMDEVICE_ALLOW_NETMAP || FROMDEVICE_ALLOW_PCAP @@ -222,13 +221,14 @@ class FromDevice : public Element { public: #endif #if FROMDEVICE_ALLOW_PCAP || FROMDEVICE_ALLOW_NETMAP void emit_packet(WritablePacket *p, int extra_len, const Timestamp &ts); + static void emit_packet_arg(WritablePacket *p, int extra_len, const Timestamp &ts, void* arg); #endif #if FROMDEVICE_ALLOW_PCAP pcap_t *_pcap; int _pcap_complaints; #endif #if FROMDEVICE_ALLOW_NETMAP - NetmapInfo _netmap; + NetmapDevice _netmap; int netmap_dispatch(); #endif #if FROMDEVICE_ALLOW_PCAP || FROMDEVICE_ALLOW_NETMAP diff --git a/elements/userlevel/netmapinfo.cc b/elements/userlevel/netmapinfo.cc index 67d2c5636a..f17a50f4c7 100644 --- a/elements/userlevel/netmapinfo.cc +++ b/elements/userlevel/netmapinfo.cc @@ -1,9 +1,8 @@ // -*- mode: c++; c-basic-offset: 4 -*- /* * netmapinfo.{cc,hh} -- library for interfacing with netmap - * Eddie Kohler, Luigi Rizzo * - * Copyright (c) 2012 Eddie Kohler + * Copyright (c) 2015 Tom Barbette * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -17,130 +16,29 @@ */ #include -#include -#if HAVE_NET_NETMAP_H -#define NETMAP_WITH_LIBS +#include +#include #include "netmapinfo.hh" -#include -#include -#include -#include -#include -CLICK_DECLS - -/* - * keep a list of netmap ports so matching the name we - * can recycle the regions - */ -static Spinlock netmap_memory_lock; -//static struct NetmapInfo *netmap_ports; -int -NetmapInfo::open(const String &ifname, - bool always_error, ErrorHandler *errh) -{ - click_chatter("%s ifname %s\n", __FUNCTION__, ifname.c_str()); - ErrorHandler *initial_errh = always_error ? errh : ErrorHandler::silent_handler(); +CLICK_DECLS - netmap_memory_lock.acquire(); - // for the time being, just a new block - do { - desc = nm_open(ifname.c_str(), NULL, 0, NULL); - if (desc == NULL) { - initial_errh->error("nm_open(%s): %s", ifname.c_str(), strerror(errno)); - break; +int NetmapInfo::configure(Vector &conf, ErrorHandler *errh) { + if (instance) { + return errh->error("You cannot place multiple instances of NetmapInfo !"); } - click_chatter("%s %s memsize %d mem %p buf_start %p buf_end %p", - __FUNCTION__, desc->req.nr_name, - desc->memsize, desc->mem, desc->buf_start, desc->buf_end); - bufq.init(desc->buf_start, desc->buf_end, - desc->some_ring->nr_buf_size); - /* eventually try to match the region */ - destructor_arg = this; - click_chatter("private mapping for %s\n", ifname.c_str()); - } while (0); - netmap_memory_lock.release(); - return desc ? desc->fd : -1; -} + instance = this; + if (Args(conf, this, errh) + .read_p("EXTRA_BUFFER", NetmapDevice::global_alloc) + .complete() < 0) + return -1; -void -NetmapInfo::initialize_rings_rx(int timestamp) -{ - click_chatter("%s timestamp %d\n", __FUNCTION__, timestamp); - if (timestamp >= 0) { - int flags = (timestamp > 0 ? NR_TIMESTAMP : 0); - for (unsigned i = desc->first_rx_ring; i <= desc->last_rx_ring; ++i) - NETMAP_RXRING(desc->nifp, i)->flags = flags; - } + return 0; } -void -NetmapInfo::initialize_rings_tx() -{ - click_chatter("%s\n", __FUNCTION__); -} - -int -NetmapInfo::dispatch(int count, nm_cb_t cb, u_char *arg) -{ - return nm_dispatch(desc, count, cb, arg); -} - -bool -NetmapInfo::send_packet(Packet *p, int noutputs) -{ - int ret = nm_inject(desc, p->data(), p->length()); - if (0) click_chatter("%s buf %p size %d returns %d\n", - __FUNCTION__, p->data(), p->length(), ret); - return ret > 0 ? 0 : -1; -#if 0 - // we can do a smart nm_inject - for (unsigned ri = desc->first_tx_ring; ri <= desc->last_tx_ring; ++ri) { - struct netmap_ring *ring = NETMAP_TXRING(desc->nifp, ri); - if (nm_ring_empty(ring)) - continue; - unsigned cur = ring->cur; - unsigned buf_idx = ring->slot[cur].buf_idx; - if (buf_idx < 2) - continue; - unsigned char *buf = (unsigned char *) NETMAP_BUF(ring, buf_idx); - uint32_t p_length = p->length(); - if (NetmapInfo::is_netmap_buffer(p) - && !p->shared() - && p->buffer() == p->data() - && (char *)p->buffer() >= desc->buf_start - && (char *)p->buffer() < desc->buf_end - && noutputs == 0) { - // put the original buffer in the freelist - NetmapInfo::buffer_destructor(buf, 0, (void *)this); - // now enqueue - ring->slot[cur].buf_idx = NETMAP_BUF_IDX(ring, (char *) p->buffer()); - ring->slot[cur].flags |= NS_BUF_CHANGED; - // and make sure nobody uses this packet - p->reset_buffer(); - } else - memcpy(buf, p->data(), p_length); - ring->slot[cur].len = p_length; - __asm__ volatile("" : : : "memory"); - ring->head = ring->cur = nm_ring_next(ring, cur); - return 0; - } - errno = ENOBUFS; - return -1; -#endif -} -void -NetmapInfo::close(int fd) -{ - click_chatter("fd %d interface %s\n", - fd, desc->req.nr_name); - netmap_memory_lock.acquire(); - // unlink from the list ? - nm_close(desc); - desc = 0; - netmap_memory_lock.release(); -} +NetmapInfo* NetmapInfo::instance = 0; CLICK_ENDDECLS -#endif -ELEMENT_PROVIDES(NetmapInfo) + +ELEMENT_REQUIRES(userlevel netmap) +EXPORT_ELEMENT(NetmapInfo) +ELEMENT_MT_SAFE(NetmapInfo) diff --git a/elements/userlevel/netmapinfo.hh b/elements/userlevel/netmapinfo.hh index b52ba94221..cf418aa6b3 100644 --- a/elements/userlevel/netmapinfo.hh +++ b/elements/userlevel/netmapinfo.hh @@ -1,149 +1,21 @@ #ifndef CLICK_NETMAPINFO_HH -#define CLICK_NETMAPINFO_HH 1 +#define CLICK_NETMAPINFO_HH -#if HAVE_NET_NETMAP_H -#include -#include -#include +#include +#include -// XXX bug in netmap_user.h , the prototype should be available - -#ifndef NETMAP_WITH_LIBS -typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d); -#endif - -#include -#include CLICK_DECLS -/* a queue of netmap buffers, by index */ -class NetmapBufQ { - unsigned char *buf_start; /* base address */ - unsigned int buf_size; - unsigned int max_index; /* error checking */ - unsigned char *buf_end; /* error checking */ - - unsigned int head; /* index of first buffer */ - unsigned int tail; /* index of last buffer */ - unsigned int count; /* how many ? */ - - public: - inline unsigned int insert(unsigned int idx) { - if (idx >= max_index) { - return 1; // error - } - unsigned int *p = reinterpret_cast(buf_start + - idx * buf_size); - // prepend - *p = head; - if (head == 0) { - tail = idx; - } - head = idx; - count++; - return 0; - } - inline unsigned int insert_p(unsigned char *p) { - if (p < buf_start || p >= buf_end) - return 1; - return insert((p - buf_start) / buf_size); - } - inline unsigned int extract() { - if (count == 0) - return 0; - unsigned int idx = head; - unsigned int *p = reinterpret_cast(buf_start + - idx * buf_size); - head = *p; - count--; - return idx; - } - inline unsigned char * extract_p() { - unsigned int idx = extract(); - return (idx == 0) ? 0 : buf_start + idx * buf_size; - } - inline int init (void *beg, void *end, uint32_t _size) { - click_chatter("Initializing NetmapBufQ %p size %d mem %p %p\n", - this, _size, beg, end); - head = tail = max_index = 0; - count = 0; - buf_size = 0; - buf_start = buf_end = 0; - if (_size == 0 || _size > 0x10000 || - beg == 0 || end == 0 || end < beg) { - click_chatter("NetmapBufQ %p bad args: size %d mem %p %p\n", - this, _size, beg, end); - return 1; - } - buf_size = _size; - buf_start = reinterpret_cast(beg); - buf_end = reinterpret_cast(end); - max_index = (buf_end - buf_start) / buf_size; - // check max_index overflow ? - return 0; - } -}; - -/* a netmap port as returned by nm_open */ -class NetmapInfo { public: +class NetmapInfo : public Element { public: + const char *class_name() const { return "NetmapInfo"; } - struct nm_desc *desc; - class NetmapInfo *parent; /* same pool */ - class NetmapBufQ bufq; /* free buffer queue */ - - // to recycle buffers, - // nmr.arg3 is the number of extra buffers - // nifp->ni_bufs_head is the index of the first buffer. - unsigned int active_users; // we do not close until done. - - NetmapInfo *destructor_arg; // either this or parent's main_mem - - int open(const String &ifname, - bool always_error, ErrorHandler *errh); - void initialize_rings_rx(int timestamp); - void initialize_rings_tx(); - void close(int fd); - // send a packet, possibly using zerocopy if noutputs == 0 - // and other conditions apply - bool send_packet(Packet *p, int noutputs); - - int dispatch(int burst, nm_cb_t cb, u_char *arg); - -#if 0 - // XXX return a buffer to the ring - bool refill(struct netmap_ring *ring) { - if (buffers) { - unsigned char *buf = buffers; - buffers = *reinterpret_cast(buffers); - unsigned res1idx = ring->head; - ring->slot[res1idx].buf_idx = NETMAP_BUF_IDX(ring, (char *) buf); - ring->slot[res1idx].flags |= NS_BUF_CHANGED; - ring->head = nm_ring_next(ring, res1idx); - return true; - } else - return false; - } -#endif + int configure_phase() const { return CONFIGURE_PHASE_FIRST; } - static bool is_netmap_buffer(Packet *p) { - return p->buffer_destructor() == buffer_destructor; - } + int configure(Vector &conf, ErrorHandler *errh); - /* - * the destructor appends the buffer to the freelist in the ring, - * using the first field as pointer. - */ - static void buffer_destructor(unsigned char *buf, size_t, void *arg) { - NetmapInfo *x = reinterpret_cast(arg); - click_chatter("%s ni %p buf %p\n", __FUNCTION__, - x, buf); - if (x->bufq.insert_p(buf)) { - // XXX error - } - } + static NetmapInfo* instance; }; CLICK_ENDDECLS -#endif // HAVE_NETMAP_H #endif diff --git a/elements/userlevel/todevice.cc b/elements/userlevel/todevice.cc index 58114e47bb..838ee2ff53 100644 --- a/elements/userlevel/todevice.cc +++ b/elements/userlevel/todevice.cc @@ -279,7 +279,6 @@ ToDevice::cleanup(CleanupStage) #endif } - /* * Linux select marks datagram fd's as writeable when the socket * buffer has enough space to do a send (sock_writeable() in @@ -298,7 +297,7 @@ ToDevice::send_packet(Packet *p) #if TODEVICE_ALLOW_NETMAP if (_method == method_netmap) { - if (_netmap.send_packet(p, noutputs())) { // fail + if (_netmap.send_packet(p, noutputs() == 0)) { // fail errno = ENOBUFS; r = -1; } else diff --git a/elements/userlevel/todevice.hh b/elements/userlevel/todevice.hh index 371795b6e4..aafe1f0fb6 100644 --- a/elements/userlevel/todevice.hh +++ b/elements/userlevel/todevice.hh @@ -118,7 +118,7 @@ class ToDevice : public Element { public: int _fd; #endif #if TODEVICE_ALLOW_NETMAP - NetmapInfo _netmap; + NetmapDevice _netmap; #endif enum { method_default, method_netmap, method_linux, method_pcap, method_devbpf, method_pcapfd }; int _method; diff --git a/elements/userlevel/todpdkdevice.cc b/elements/userlevel/todpdkdevice.cc index f30e87439e..91a889a961 100644 --- a/elements/userlevel/todpdkdevice.cc +++ b/elements/userlevel/todpdkdevice.cc @@ -122,9 +122,9 @@ void ToDPDKDevice::add_handlers() inline struct rte_mbuf* get_mbuf(Packet* p, bool create=true) { struct rte_mbuf* mbuf = 0; - if (likely(DPDKDevice::is_dpdk_packet(p))) { + if (likely(DPDKDevice::is_dpdk_packet(p) && !p->shared())) { mbuf = (struct rte_mbuf *) p->destructor_argument(); - p->set_buffer_destructor(DPDKDevice::fake_free_pkt); + p->reset_buffer(); } else if (create) { mbuf = rte_pktmbuf_alloc(DPDKDevice::get_mpool(rte_socket_id())); memcpy((void*) rte_pktmbuf_mtod(mbuf, unsigned char *), p->data(), diff --git a/include/click/netmapdevice.hh b/include/click/netmapdevice.hh new file mode 100644 index 0000000000..b2a9b05221 --- /dev/null +++ b/include/click/netmapdevice.hh @@ -0,0 +1,227 @@ +// -*- c-basic-offset: 4; related-file-name: "../../lib/netmapdevice.cc" -*- +#ifndef CLICK_NETMAPDEVICE_HH +#define CLICK_NETMAPDEVICE_HH + +#if HAVE_NETMAP && CLICK_USERLEVEL + +#include +#include +#define NETMAP_WITH_LIBS +#include + +// XXX bug in netmap_user.h , the prototype should be available + +#ifndef NETMAP_WITH_LIBS +typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d); +#endif + +#include +#include +#include +#include +#include +#include +#include + +CLICK_DECLS + +#define NETMAP_PACKET_POOL_SIZE 2048 +#define BUFFER_PTR(idx) reinterpret_cast(buf_start + idx * buf_size) +#define BUFFER_NEXT_LIST(idx) *(((uint32_t*)BUFFER_PTR(idx)) + 1) + +/* a queue of netmap buffers, by index*/ +class NetmapBufQ { +public: + + NetmapBufQ(); + ~NetmapBufQ(); + + inline void expand(); + + inline void insert(uint32_t idx); + inline void insert_p(unsigned char *p); + inline void insert_all(uint32_t idx, bool check_size); + + inline uint32_t extract(); + inline unsigned char * extract_p(); + + inline int count_buffers(uint32_t idx); + + inline int count() const { + return _count; + }; + + //Static functions + static int static_initialize(struct nm_desc* nmd); + static uint32_t static_cleanup(); + + static void global_insert_all(uint32_t idx, int count); + + inline static unsigned int buffer_size() { + return buf_size; + } + + static void buffer_destructor(unsigned char *buf, size_t, void *) { + NetmapBufQ::local_pool()->insert_p(buf); + } + + inline static bool is_netmap_packet(Packet* p) { + return (p->buffer_destructor() == buffer_destructor); + } + + inline static bool is_valid_netmap_packet(Packet* p) { + return ((p->buffer_destructor() == buffer_destructor) && + p->buffer()>=buf_start && p->buffer() < buf_end); + } + + inline static NetmapBufQ* local_pool() { + return NetmapBufQ::netmap_buf_pools[click_current_cpu_id()]; + } + +private : + uint32_t _head; /* index of first buffer */ + int _count; /* how many ? */ + + //Static attributes (shared between all queues) + static unsigned char *buf_start; /* base address */ + static unsigned char *buf_end; /* error checking */ + static unsigned int buf_size; + static uint32_t max_index; /* error checking */ + + static Spinlock global_buffer_lock; + //The global netmap buffer list is used to exchange batch of buffers between threads + //The second uint32_t in the buffer is used to point to the next batch + static uint32_t global_buffer_list; + + static int messagelimit; + static NetmapBufQ** netmap_buf_pools; + +} __attribute__((aligned(64))); + +/** + * A Netmap interface + */ +class NetmapDevice { +public: + int open(const String &ifname, + bool always_error, ErrorHandler *errh); + void initialize_rings_rx(int timestamp); + void initialize_rings_tx(); + + int receive(int cnt, int headroom, void (*)(WritablePacket *, int, const Timestamp &, void* arg),void* arg); + int send_packet(Packet* p,bool allow_zc); + + + void close(int fd); + + struct nm_desc *desc; + + static void static_cleanup(); + static int global_alloc; + static struct nm_desc* some_nmd; +}; + +/* + * Inline functions + */ + +inline void NetmapBufQ::expand() { + global_buffer_lock.acquire(); + if (global_buffer_list != 0) { + //Transfer from global pool + _head = global_buffer_list; + global_buffer_list = BUFFER_NEXT_LIST(global_buffer_list); + _count = NETMAP_PACKET_POOL_SIZE; + } else { + if (messagelimit < 5) + click_chatter("No more netmap buffers !"); + messagelimit++; + } + global_buffer_lock.release(); +} + +/** + * Insert a list of netmap buffers in the queue + */ +inline void NetmapBufQ::insert_all(uint32_t idx,bool check_size = false) { + if (unlikely(idx >= max_index || idx == 0)) { + click_chatter("Error : cannot insert index %d",idx); + return; + } + + uint32_t firstidx = idx; + uint32_t *p; + while (idx > 0) { //Go to the end of the passed list + if (check_size) { + insert(idx); + } else { + p = reinterpret_cast(buf_start + + idx * buf_size); + idx = *p; + _count++; + } + } + + //Add the current list at the end of this one + *p = _head; + _head = firstidx; +} + +/** + * Return the number of buffer inside a netmap buffer list + */ +int NetmapBufQ::count_buffers(uint32_t idx) { + int count=0; + while (idx != 0) { + count++; + idx = *BUFFER_PTR(idx); + } + return count; +} + +inline void NetmapBufQ::insert(uint32_t idx) { + assert(idx > 0 && idx < max_index); + + if (_count < NETMAP_PACKET_POOL_SIZE) { + *BUFFER_PTR(idx) = _head; + _head = idx; + _count++; + } else { + assert(_count == NETMAP_PACKET_POOL_SIZE); + global_buffer_lock.acquire(); + BUFFER_NEXT_LIST(_head) = global_buffer_list; + global_buffer_list = _head; + global_buffer_lock.release(); + _head = idx; + *BUFFER_PTR(idx) = 0; + _count = 1; + } +} + +inline void NetmapBufQ::insert_p(unsigned char* buf) { + insert((buf - buf_start) / buf_size); +} + +inline uint32_t NetmapBufQ::extract() { + if (_count <= 0) { + expand(); + if (_count == 0) return 0; + } + uint32_t idx; + uint32_t *p; + idx = _head; + p = reinterpret_cast(buf_start + idx * buf_size); + + _head = *p; + _count--; + return idx; +} + +inline unsigned char* NetmapBufQ::extract_p() { + uint32_t idx = extract(); + return (idx == 0) ? 0 : buf_start + idx * buf_size; +} + +#endif + +#endif diff --git a/lib/dpdkdevice.cc b/lib/dpdkdevice.cc index 92aaeba0a6..8c57efcdaa 100644 --- a/lib/dpdkdevice.cc +++ b/lib/dpdkdevice.cc @@ -251,10 +251,6 @@ void DPDKDevice::free_pkt(unsigned char *, size_t, void *pktmbuf) rte_pktmbuf_free((struct rte_mbuf *) pktmbuf); } -void DPDKDevice::fake_free_pkt(unsigned char *, size_t, void *) -{ -} - int DPDKDevice::NB_MBUF = 65536*8; int DPDKDevice::MBUF_SIZE = 2048 + sizeof (struct rte_mbuf) + RTE_PKTMBUF_HEADROOM; diff --git a/lib/driver.cc b/lib/driver.cc index 0b4ac0ffbd..094c2e53bf 100644 --- a/lib/driver.cc +++ b/lib/driver.cc @@ -48,6 +48,8 @@ # include #endif +#include + #if CLICK_USERLEVEL || CLICK_MINIOS # include # include @@ -468,6 +470,9 @@ click_static_cleanup() Router::static_cleanup(); Packet::static_cleanup(); +#if HAVE_NETMAP + NetmapDevice::static_cleanup(); +#endif ErrorHandler::static_cleanup(); cp_va_static_cleanup(); NameInfo::static_cleanup(); diff --git a/lib/netmapdevice.cc b/lib/netmapdevice.cc new file mode 100644 index 0000000000..835521bcc9 --- /dev/null +++ b/lib/netmapdevice.cc @@ -0,0 +1,350 @@ +// -*- c-basic-offset: 4; related-file-name: "../include/click/netmapdevice.hh" -*- +/* + * netmapinfo.{cc,hh} -- library for interfacing with netmap + * Eddie Kohler, Luigi Rizzo, Tom Barbette + * + * Copyright (c) 2012 Eddie Kohler + * Copyright (c) 2015 University of Liege + * + * NetmapBufQ implementation was started by Luigi Rizzo and moved from netmapinfo.hh. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, subject to the conditions + * listed in the Click LICENSE file. These conditions include: you must + * preserve this copyright notice, and you cannot mention the copyright + * holders in advertising related to the Software without their permission. + * The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This + * notice is a summary of the Click LICENSE file; the license in that file is + * legally binding. + */ + +#include +#include +#include +#include + +/**************************** + * NetmapBufQ + ****************************/ +NetmapBufQ::NetmapBufQ() : _head(0),_count(0){ + +} + +NetmapBufQ::~NetmapBufQ() { + +} + +/** + * Initizlize the per-thread pools and the cleanup pool + */ +int NetmapBufQ::static_initialize(struct nm_desc* nmd) { + if (!nmd) { + click_chatter("Error:Null netmap descriptor in NetmapBufQ::static_initialize!"); + return 1; + } + + //Only initilize once + if (buf_size) + return 0; + + buf_size = nmd->some_ring->nr_buf_size; + buf_start = reinterpret_cast(nmd->buf_start); + buf_end = reinterpret_cast(nmd->buf_end); + max_index = (buf_end - buf_start) / buf_size; + + if (!netmap_buf_pools) { + netmap_buf_pools = new NetmapBufQ*[click_max_cpu_ids()]; + + for (unsigned i = 0; i < click_max_cpu_ids(); i++) { + netmap_buf_pools[i] = new NetmapBufQ(); + } + } + + return 0; +} + +/** + * Empty all NetmapBufQ and the global ring. Return all netmap buffers in a list + */ +uint32_t NetmapBufQ::static_cleanup() +{ + if (!netmap_buf_pools) + return 0; + + for (unsigned int i = 1; i < click_max_cpu_ids(); i++) { + if (netmap_buf_pools[i]) { + if (netmap_buf_pools[i]->_head) + netmap_buf_pools[0]->insert_all(netmap_buf_pools[i]->_head, false); + delete netmap_buf_pools[i]; + netmap_buf_pools[i] = NULL; + } + } + + while (global_buffer_list > 0) { + uint32_t idx=global_buffer_list; + global_buffer_list = BUFFER_NEXT_LIST(global_buffer_list); + netmap_buf_pools[0]->insert_all(idx, false); + } + + uint32_t idx = 0; + if (netmap_buf_pools[0]->_count > 0) { + if (netmap_buf_pools[0]->_count == netmap_buf_pools[0]->count_buffers(idx)) + click_chatter("Error on cleanup of netmap buffer ! Expected %d buffer, got %d",netmap_buf_pools[0]->_count,netmap_buf_pools[0]->count_buffers(idx)); + else + click_chatter("Freeing %d Netmap buffers",netmap_buf_pools[0]->_count); + idx = netmap_buf_pools[0]->_head; + netmap_buf_pools[0]->_head = 0; + netmap_buf_pools[0]->_count = 0; + } + delete netmap_buf_pools[0]; + netmap_buf_pools[0] = 0; + delete[] netmap_buf_pools; + return idx; +} + +/** + * Insert all netmap buffers inside the global list + */ +void NetmapBufQ::global_insert_all(uint32_t idx, int count) { + //Cut packets in global pools + while (count >= NETMAP_PACKET_POOL_SIZE) { + int c = 0; + BUFFER_NEXT_LIST(idx) = global_buffer_list; + global_buffer_list = idx; + uint32_t *p = 0; + while (c < NETMAP_PACKET_POOL_SIZE) { + p = reinterpret_cast((unsigned char *)buf_start + + idx * buf_size); + idx = *p; + c++; + } + *p = 0; + count -= NETMAP_PACKET_POOL_SIZE; + } + + //Add remaining buffer to the local pool + if (count > 0) { + NetmapBufQ::local_pool()->insert_all(idx, true); + } +} + +/*************************** + * NetmapDevice + ***************************/ + +/* + * keep a list of netmap ports so matching the name we + * can recycle the regions + */ +static Spinlock netmap_memory_lock; + +int +NetmapDevice::open(const String &ifname, + bool always_error, ErrorHandler *errh) +{ + click_chatter("%s ifname %s\n", __FUNCTION__, ifname.c_str()); + ErrorHandler *initial_errh = always_error ? errh : ErrorHandler::silent_handler(); + + netmap_memory_lock.acquire(); + do { + struct nm_desc* base_nmd = (struct nm_desc*)calloc(1,sizeof(struct nm_desc)); + + base_nmd->self = base_nmd; + strcpy(base_nmd->req.nr_name,&(ifname.c_str()[7])); + if (NetmapDevice::some_nmd != NULL) { //Having same netmap space is a lot easier... + base_nmd->mem = NetmapDevice::some_nmd->mem; + base_nmd->memsize = NetmapDevice::some_nmd->memsize; + base_nmd->req.nr_arg2 = NetmapDevice::some_nmd->req.nr_arg2; + base_nmd->req.nr_arg3 = 0; + base_nmd->done_mmap = NetmapDevice::some_nmd->done_mmap; + desc = nm_open(ifname.c_str(), NULL, NM_OPEN_NO_MMAP | NM_OPEN_IFNAME, base_nmd); + } else { + base_nmd->req.nr_arg3 = NetmapDevice::global_alloc; + if (base_nmd->req.nr_arg3 % NETMAP_PACKET_POOL_SIZE != 0) + base_nmd->req.nr_arg3 = ((base_nmd->req.nr_arg3 / NETMAP_PACKET_POOL_SIZE) + 1) * NETMAP_PACKET_POOL_SIZE; + //Ensure we have at least a batch per thread + 1 + if (NETMAP_PACKET_POOL_SIZE * ((unsigned)click_nthreads + 1) > base_nmd->req.nr_arg3) + base_nmd->req.nr_arg3 = NETMAP_PACKET_POOL_SIZE * (click_nthreads + 1); + desc = nm_open(ifname.c_str(), NULL, NM_OPEN_IFNAME | NM_OPEN_ARG3, base_nmd); + NetmapDevice::some_nmd = desc; + } + + if (desc == NULL) { + initial_errh->error("nm_open(%s): %s", ifname.c_str(), strerror(errno)); + break; + } + click_chatter("%s %s memsize %d mem %p buf_start %p buf_end %p", + __FUNCTION__, desc->req.nr_name, + desc->memsize, desc->mem, desc->buf_start, desc->buf_end); + + /* eventually try to match the region */ + click_chatter("private mapping for %s\n", ifname.c_str()); + } while (0); + + //Allocate packet pools if not already done + NetmapBufQ::static_initialize(desc); + + if (desc->req.nr_arg3 > 0) { + click_chatter("Allocated %d buffers from Netmap buffer pool",desc->req.nr_arg3); + NetmapBufQ::global_insert_all(desc->nifp->ni_bufs_head,desc->req.nr_arg3); + desc->nifp->ni_bufs_head = 0; + desc->req.nr_arg3 = 0; + } + + netmap_memory_lock.release(); + return desc ? desc->fd : -1; +} + +void +NetmapDevice::initialize_rings_rx(int timestamp) +{ + click_chatter("%s timestamp %d\n", __FUNCTION__, timestamp); + if (timestamp >= 0) { + int flags = (timestamp > 0 ? NR_TIMESTAMP : 0); + for (unsigned i = desc->first_rx_ring; i <= desc->last_rx_ring; ++i) + NETMAP_RXRING(desc->nifp, i)->flags = flags; + } +} + +void +NetmapDevice::initialize_rings_tx() +{ + click_chatter("%s\n", __FUNCTION__); +} + +int +NetmapDevice::receive(int cnt, int headroom, void (*emit_packet)(WritablePacket *, int, const Timestamp &, void* arg), void* arg) +{ + int n = desc->last_rx_ring - desc->first_rx_ring + 1; + int c, got = 0, ri = desc->cur_rx_ring; + + if (cnt == 0) + cnt = -1; + /* cnt == -1 means infinite, but rings have a finite amount + * of buffers and the int is large enough that we never wrap, + * so we can omit checking for -1 + */ + for (c=0; c < n && cnt != got; c++) { + /* compute current ring to use */ + struct netmap_ring *ring; + + ri = desc->cur_rx_ring + c; + if (ri > desc->last_rx_ring) + ri = desc->first_rx_ring; + ring = NETMAP_RXRING(desc->nifp, ri); + for ( ; !nm_ring_empty(ring) && cnt != got; got++) { + u_int i = ring->cur; + u_int idx = ring->slot[i].buf_idx; + u_int new_buf = NetmapBufQ::local_pool()->extract(); + u_char *buf = (u_char *)NETMAP_BUF(ring, idx); + u_int len = ring->slot[i].len; + WritablePacket *p; + + if (new_buf) { + ring->slot[i].buf_idx = new_buf; + ring->slot[i].flags |= NS_BUF_CHANGED; + p = Packet::make(buf,len,NetmapBufQ::buffer_destructor,0); + __builtin_prefetch(buf); + } else { + p = Packet::make(headroom, buf, len, 0); + } + Timestamp ts = Timestamp::uninitialized_t(); + #if TIMESTAMP_NANOSEC && defined(PCAP_TSTAMP_PRECISION_NANO) + if (_pcap_nanosec) + ts = Timestamp::make_nsec(ring->ts.tv_sec, ring->ts.tv_usec); + else + #endif + ts = Timestamp::make_usec(ring->ts.tv_sec, ring->ts.tv_usec); + + emit_packet(p, len, ts, arg); + + ring->head = ring->cur = nm_ring_next(ring, i); + } + } + desc->cur_rx_ring = ri; + return got; +} + + + +int NetmapDevice::send_packet(Packet* p,bool allow_zc) { + // we can do a smart nm_inject + for (unsigned ri = desc->first_tx_ring; ri <= desc->last_tx_ring; ++ri) { + struct netmap_ring *ring = NETMAP_TXRING(desc->nifp, ri); + if (nm_ring_empty(ring)) + continue; + unsigned cur = ring->cur; + unsigned buf_idx = ring->slot[cur].buf_idx; + if (buf_idx < 2) + continue; + unsigned char *buf = (unsigned char *) NETMAP_BUF(ring, buf_idx); + uint32_t p_length = p->length(); + if (NetmapBufQ::is_valid_netmap_packet(p) + && !p->shared() + && allow_zc) { + //Get the buffer currently in the ring and put it in the buffer queue + NetmapBufQ::local_pool()->insert(ring->slot[cur].buf_idx); + + //Replace the ring buffer by the one from the packet + ring->slot[cur].buf_idx = NETMAP_BUF_IDX(ring, (char *) p->buffer()); + ring->slot[cur].flags |= NS_BUF_CHANGED; + if (cur % 32 == 0) + ring->slot[cur].flags |= NS_REPORT; + + p->reset_buffer(); + } else + memcpy(buf, p->data(), p_length); + ring->slot[cur].len = p_length; + __asm__ volatile("" : : : "memory"); + ring->head = ring->cur = nm_ring_next(ring, cur); + return 0; + } + errno = ENOBUFS; + return -1; +} + + + +void +NetmapDevice::close(int fd) +{ + click_chatter("fd %d interface %s\n", + fd, desc->req.nr_name); + if (desc != NetmapDevice::some_nmd) { + netmap_memory_lock.acquire(); + // unlink from the list ? + nm_close(desc); + desc = 0; + netmap_memory_lock.release(); + } +} + +void NetmapDevice::static_cleanup() { + uint32_t idx = NetmapBufQ::static_cleanup(); + if (idx != 0) { + if (some_nmd) { + some_nmd->nifp->ni_bufs_head = idx; + nm_close(some_nmd); + some_nmd = 0; + } else { + click_chatter("No NMD set and netmap packet not released !"); + } + } +} + +NetmapBufQ** NetmapBufQ::netmap_buf_pools = 0; +unsigned int NetmapBufQ::buf_size = 0; +unsigned char* NetmapBufQ::buf_start = 0; +unsigned char* NetmapBufQ::buf_end = 0; +uint32_t NetmapBufQ::max_index = 0; + +Spinlock NetmapBufQ::global_buffer_lock; +uint32_t NetmapBufQ::global_buffer_list = 0; + +int NetmapBufQ::messagelimit = 0; + +int NetmapDevice::global_alloc = 32768; +struct nm_desc* NetmapDevice::some_nmd = 0; + +CLICK_ENDDECLS diff --git a/m4/click.m4 b/m4/click.m4 index 31ff886699..c86a94d410 100644 --- a/m4/click.m4 +++ b/m4/click.m4 @@ -455,9 +455,13 @@ AC_DEFUN([CLICK_CHECK_NETMAP], [ CPPFLAGS="$saveflags" if test "$HAVE_NETMAP" = yes -a "$use_netmap" != no; then - AC_DEFINE([HAVE_NET_NETMAP_H], [1], [Define if you have the header file.]) + AC_DEFINE([HAVE_NETMAP], [1], [Define if Netmap support is enabled.]) + EXTRA_DRIVER_OBJS="netmapdevice.o $EXTRA_DRIVER_OBJS" + else + HAVE_NETMAP=no fi AC_SUBST(NETMAP_INCLUDES) + AC_SUBST(EXTRA_DRIVER_OBJS) ]) diff --git a/test/tools/mkmindriver-01.testie b/test/tools/mkmindriver-01.testie index 8535ca1049..fd769df126 100644 --- a/test/tools/mkmindriver-01.testie +++ b/test/tools/mkmindriver-01.testie @@ -23,7 +23,6 @@ elements/userlevel/controlsocket.cc "elements/userlevel/controlsocket.hh" Contro elements/userlevel/fakepcap.cc "elements/userlevel/fakepcap.hh" elements/userlevel/fromdevice.cc "elements/userlevel/fromdevice.hh" FromDevice-FromDevice elements/userlevel/kernelfilter.cc "elements/userlevel/kernelfilter.hh" KernelFilter-KernelFilter -elements/userlevel/netmapinfo.cc "elements/userlevel/netmapinfo.hh" elements/userlevel/todump.cc "elements/userlevel/todump.hh" ToDump-ToDump %ignorex