dect
/
libpcap
Archived
13
0
Fork 0

Zero-copy BPF support, from Christian Peron.

This commit is contained in:
guy 2008-09-16 00:20:23 +00:00
parent 0a9a829a65
commit 73e8fef161
6 changed files with 452 additions and 13 deletions

View File

@ -24,6 +24,7 @@ Additional people who have contributed patches:
Chris G. Demetriou <cgd at netbsd dot org>
Chris Lightfoot <cwrl at users dot sourceforge dot net>
Chris Pepper <pepper at mail dot reppep dot com>
Christian Peron <csjp at freebsd dot org>
Daniele Orlandi <daniele at orlandi dot com>
Darren Reed <darrenr at reed dot wattle dot id dot au>
David Kaelbling <drk at sgi dot com>

View File

@ -161,6 +161,9 @@
/* Define to 1 if you have the `vsnprintf' function. */
#undef HAVE_VSNPRINTF
/* define if the system supports zerocopy BPF */
#undef HAVE_ZEROCOPY_BPF
/* define if your compiler has __attribute__ */
#undef HAVE___ATTRIBUTE__

65
configure vendored
View File

@ -4832,7 +4832,7 @@ fi
done
if test "$ac_cv_header_net_pfvar_h" == yes; then
if test "$ac_cv_header_net_pfvar_h" = yes; then
#
# Check for various PF actions.
#
@ -6304,8 +6304,8 @@ fi
{ echo "$as_me:$LINENO: result: $V_PCAP" >&5
echo "${ECHO_T}$V_PCAP" >&6; }
if test "$V_PCAP" = dlpi
then
case "$V_PCAP" in
dlpi)
#
# Checks to see if Solaris has the public libdlpi(3LIB) library.
# Note: The existence of /usr/include/libdlpi.h does not mean it is the
@ -6393,8 +6393,67 @@ else
fi
LDFLAGS=$saved_ldflags
;;
bpf)
{ echo "$as_me:$LINENO: checking whether the system supports zerocopy BPF" >&5
echo $ECHO_N "checking whether the system supports zerocopy BPF... $ECHO_C" >&6; }
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <net/if.h>
#include <net/bpf.h>
int
main ()
{
return (BIOCROTZBUF + BPF_BUFMODE_ZBUF);
;
return 0;
}
_ACEOF
rm -f conftest.$ac_objext
if { (ac_try="$ac_compile"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_compile") 2>conftest.er1
ac_status=$?
grep -v '^ *+' conftest.er1 >conftest.err
rm -f conftest.er1
cat conftest.err >&5
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && {
test -z "$ac_c_werror_flag" ||
test ! -s conftest.err
} && test -s conftest.$ac_objext; then
{ echo "$as_me:$LINENO: result: yes" >&5
echo "${ECHO_T}yes" >&6; }
cat >>confdefs.h <<\_ACEOF
#define HAVE_ZEROCOPY_BPF 1
_ACEOF
else
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
{ echo "$as_me:$LINENO: result: no" >&5
echo "${ECHO_T}no" >&6; }
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
;;
esac
if test "$V_PCAP" = null
then
#

View File

@ -1,4 +1,4 @@
dnl @(#) $Header: /tcpdump/master/libpcap/configure.in,v 1.154 2008-08-06 08:29:07 guy Exp $ (LBL)
dnl @(#) $Header: /tcpdump/master/libpcap/configure.in,v 1.155 2008-09-16 00:20:23 guy Exp $ (LBL)
dnl
dnl Copyright (c) 1994, 1995, 1996, 1997
dnl The Regents of the University of California. All rights reserved.
@ -6,7 +6,7 @@ dnl
dnl Process this file with autoconf to produce a configure script.
dnl
AC_REVISION($Revision: 1.154 $)
AC_REVISION($Revision: 1.155 $)
AC_PREREQ(2.50)
AC_INIT(pcap.c)
@ -37,7 +37,7 @@ AC_CHECK_HEADERS(sys/ioccom.h sys/sockio.h limits.h paths.h)
AC_CHECK_HEADERS(net/pfvar.h, , , [#include <sys/types.h>
#include <sys/socket.h>
#include <net/if.h>])
if test "$ac_cv_header_net_pfvar_h" == yes; then
if test "$ac_cv_header_net_pfvar_h" = yes; then
#
# Check for various PF actions.
#
@ -264,8 +264,8 @@ else
fi
AC_MSG_RESULT($V_PCAP)
if test "$V_PCAP" = dlpi
then
case "$V_PCAP" in
dlpi)
#
# Checks to see if Solaris has the public libdlpi(3LIB) library.
# Note: The existence of /usr/include/libdlpi.h does not mean it is the
@ -285,7 +285,24 @@ then
AC_DEFINE(HAVE_LIBDLPI,1,[if libdlpi exists]),
V_PCAP=dlpi)
LDFLAGS=$saved_ldflags
fi
;;
bpf)
AC_MSG_CHECKING(whether the system supports zerocopy BPF)
AC_TRY_COMPILE(
[#include <sys/socket.h>
#include <sys/ioctl.h>
#include <net/if.h>
#include <net/bpf.h>],
[return (BIOCROTZBUF + BPF_BUFMODE_ZBUF);],
[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_ZEROCOPY_BPF, 1,
[define if the system supports zerocopy BPF])
],
AC_MSG_RESULT(no))
;;
esac
dnl
dnl Now figure out how we get a list of interfaces and addresses,

View File

@ -20,7 +20,7 @@
*/
#ifndef lint
static const char rcsid[] _U_ =
"@(#) $Header: /tcpdump/master/libpcap/pcap-bpf.c,v 1.111 2008-07-01 08:02:33 guy Exp $ (LBL)";
"@(#) $Header: /tcpdump/master/libpcap/pcap-bpf.c,v 1.112 2008-09-16 00:20:23 guy Exp $ (LBL)";
#endif
#ifdef HAVE_CONFIG_H
@ -28,6 +28,9 @@ static const char rcsid[] _U_ =
#endif
#include <sys/param.h> /* optionally get BSD define */
#ifdef HAVE_ZEROCOPY_BPF
#include <sys/mman.h>
#endif
#include <sys/time.h>
#include <sys/timeb.h>
#include <sys/socket.h>
@ -35,6 +38,10 @@ static const char rcsid[] _U_ =
#include <sys/ioctl.h>
#include <sys/utsname.h>
#ifdef HAVE_ZEROCOPY_BPF
#include <machine/atomic.h>
#endif
#include <net/if.h>
#ifdef _AIX
@ -158,6 +165,227 @@ static int pcap_setfilter_bpf(pcap_t *p, struct bpf_program *fp);
static int pcap_setdirection_bpf(pcap_t *, pcap_direction_t);
static int pcap_set_datalink_bpf(pcap_t *p, int dlt);
#ifdef HAVE_ZEROCOPY_BPF
/*
* For zerocopy bpf, we need to override the setnonblock/getnonblock routines
* so we don't call select(2) if the pcap handle is in non-blocking mode. We
* preserve the timeout supplied by pcap_open functions to make sure it
* does not get clobbered if the pcap handle moves between blocking and non-
* blocking mode.
*/
static int
pcap_getnonblock_zbuf(pcap_t *p, char *errbuf)
{
/*
* Use a negative value for the timeout to represent that the
* pcap handle is in non-blocking mode.
*/
return (p->md.timeout < 0);
}
static int
pcap_setnonblock_zbuf(pcap_t *p, int nonblock, char *errbuf)
{
/*
* Map each value to the corresponding 2's complement, to
* preserve the timeout value provided with pcap_set_timeout.
* (from pcap-linux.c).
*/
if (nonblock) {
if (p->md.timeout > 0)
p->md.timeout = p->md.timeout * -1 - 1;
} else
if (p->md.timeout < 0)
p->md.timeout = (p->md.timeout + 1) * -1;
return (0);
}
/*
* Zero-copy specific close method. Un-map the shred buffers then call
* pcap_close_common.
*/
static void
pcap_close_zbuf(pcap_t *p)
{
/*
* Check to see if this pcap instance was using the zerocopy buffer
* mode. If it was, delete the mappings. Note that p->buffer
* gets initialized to one of the mmaped regions in this case, so
* do not try and free it directly.
*
* If the regular buffer mode was selected, then it is safe to free
* this memory.
*/
if (p->md.zerocopy == 0) {
pcap_cleanup_live_common(p);
return;
}
if (p->md.zbuf1 != MAP_FAILED && p->md.zbuf1 != NULL)
(void) munmap(p->md.zbuf1, p->md.zbufsize);
if (p->md.zbuf2 != MAP_FAILED && p->md.zbuf2 != NULL)
(void) munmap(p->md.zbuf2, p->md.zbufsize);
p->buffer = NULL;
pcap_cleanup_live_common(p);
}
/*
* Zero-copy BPF buffer routines to check for and acknowledge BPF data in
* shared memory buffers.
*
* pcap_next_zbuf_shm(): Check for a newly available shared memory buffer,
* and set up p->buffer and cc to reflect one if available. Notice that if
* there was no prior buffer, we select zbuf1 as this will be the first
* buffer filled for a fresh BPF session.
*/
static int
pcap_next_zbuf_shm(pcap_t *p, int *cc)
{
struct bpf_zbuf_header *bzh;
if (p->md.zbuffer == p->md.zbuf2 || p->md.zbuffer == NULL) {
bzh = (struct bpf_zbuf_header *)p->md.zbuf1;
if (bzh->bzh_user_gen !=
atomic_load_acq_int(&bzh->bzh_kernel_gen)) {
p->md.bzh = bzh;
p->md.zbuffer = (u_char *)p->md.zbuf1;
p->buffer = p->md.zbuffer + sizeof(*bzh);
*cc = bzh->bzh_kernel_len;
return (1);
}
} else if (p->md.zbuffer == p->md.zbuf1) {
bzh = (struct bpf_zbuf_header *)p->md.zbuf2;
if (bzh->bzh_user_gen !=
atomic_load_acq_int(&bzh->bzh_kernel_gen)) {
p->md.bzh = bzh;
p->md.zbuffer = (u_char *)p->md.zbuf2;
p->buffer = p->md.zbuffer + sizeof(*bzh);
*cc = bzh->bzh_kernel_len;
return (1);
}
}
*cc = 0;
return (0);
}
/*
* pcap_next_zbuf() -- Similar to pcap_next_zbuf_shm(), except wait using
* select() for data or a timeout, and possibly force rotation of the buffer
* in the event we time out or are in immediate mode. Invoke the shared
* memory check before doing system calls in order to avoid doing avoidable
* work.
*/
static int
pcap_next_zbuf(pcap_t *p, int *cc)
{
struct bpf_zbuf bz;
struct timeval tv;
struct timespec cur;
fd_set r_set;
int data, r;
int expire, tmout;
#define TSTOMILLI(ts) (((ts)->tv_sec * 1000) + ((ts)->tv_nsec / 1000000))
/*
* Start out by seeing whether anything is waiting by checking the
* next shared memory buffer for data.
*/
data = pcap_next_zbuf_shm(p, cc);
if (data)
return (data);
/*
* If a previous sleep was interrupted due to signal delivery, make
* sure that the timeout gets adjusted accordingly. This requires
* that we analyze when the timeout should be been expired, and
* subtract the current time from that. If after this operation,
* our timeout is less then or equal to zero, handle it like a
* regular timeout.
*/
tmout = p->md.timeout;
if (tmout)
(void) clock_gettime(CLOCK_MONOTONIC, &cur);
if (p->md.interrupted && p->md.timeout) {
expire = TSTOMILLI(&p->md.firstsel) + p->md.timeout;
tmout = expire - TSTOMILLI(&cur);
#undef TSTOMILLI
if (tmout <= 0) {
p->md.interrupted = 0;
data = pcap_next_zbuf_shm(p, cc);
if (data)
return (data);
if (ioctl(p->fd, BIOCROTZBUF, &bz) < 0) {
(void) snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
"BIOCROTZBUF: %s", strerror(errno));
return (-1);
}
return (pcap_next_zbuf_shm(p, cc));
}
}
/*
* No data in the buffer, so must use select() to wait for data or
* the next timeout. Note that we only call select if the handle
* is in blocking mode.
*/
if (p->md.timeout >= 0) {
FD_ZERO(&r_set);
FD_SET(p->fd, &r_set);
if (tmout != 0) {
tv.tv_sec = tmout / 1000;
tv.tv_usec = (tmout * 1000) % 1000000;
}
r = select(p->fd + 1, &r_set, NULL, NULL,
p->md.timeout != 0 ? &tv : NULL);
if (r < 0 && errno == EINTR) {
if (!p->md.interrupted && p->md.timeout) {
p->md.interrupted = 1;
p->md.firstsel = cur;
}
return (0);
} else if (r < 0) {
(void) snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
"select: %s", strerror(errno));
return (-1);
}
}
p->md.interrupted = 0;
/*
* Check again for data, which may exist now that we've either been
* woken up as a result of data or timed out. Try the "there's data"
* case first since it doesn't require a system call.
*/
data = pcap_next_zbuf_shm(p, cc);
if (data)
return (data);
/*
* Try forcing a buffer rotation to dislodge timed out or immediate
* data.
*/
if (ioctl(p->fd, BIOCROTZBUF, &bz) < 0) {
(void) snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
"BIOCROTZBUF: %s", strerror(errno));
return (-1);
}
return (pcap_next_zbuf_shm(p, cc));
}
/*
* Notify kernel that we are done with the buffer. We don't reset zbuffer so
* that we know which buffer to use next time around.
*/
static int
pcap_ack_zbuf(pcap_t *p)
{
atomic_store_rel_int(&p->md.bzh->bzh_user_gen,
p->md.bzh->bzh_kernel_gen);
p->md.bzh = NULL;
p->buffer = NULL;
return (0);
}
#endif
pcap_t *
pcap_create(const char *device, char *ebuf)
{
@ -173,6 +401,11 @@ pcap_create(const char *device, char *ebuf)
return (NULL);
p->activate_op = pcap_activate_bpf;
#ifdef HAVE_ZEROCOPY_BPF
p->cleanup_op = pcap_close_zbuf;
p->setnonblock_op = pcap_setnonblock_zbuf;
p->getnonblock_op = pcap_getnonblock_zbuf;
#endif
p->can_set_rfmon_op = pcap_can_set_rfmon_bpf;
return (p);
}
@ -507,6 +740,9 @@ pcap_read_bpf(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
#ifdef PCAP_FDDIPAD
register int pad;
#endif
#ifdef HAVE_ZEROCOPY_BPF
int i;
#endif
again:
/*
@ -523,6 +759,25 @@ pcap_read_bpf(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
}
cc = p->cc;
if (p->cc == 0) {
/*
* When reading without zero-copy from a file descriptor, we
* use a single buffer and return a length of data in the
* buffer. With zero-copy, we update the p->buffer pointer
* to point at whatever underlying buffer contains the next
* data and update cc to reflect the data found in the
* buffer.
*/
#ifdef HAVE_ZEROCOPY_BPF
if (p->md.zerocopy) {
if (p->buffer != NULL)
pcap_ack_zbuf(p);
i = pcap_next_zbuf(p, &cc);
if (i == 0)
goto again;
if (i < 0)
return (-1);
} else
#endif
cc = read(p->fd, (char *)p->buffer, p->bufsize);
if (cc < 0) {
/* Don't choke when we get ptraced */
@ -954,6 +1209,18 @@ pcap_cleanup_bpf(pcap_t *p)
p->md.must_clear = 0;
}
#ifdef HAVE_ZEROCOPY_BPF
/*
* In zero-copy mode, p->buffer is just a pointer into one of the two
* memory-mapped buffers, so no need to free it.
*/
if (p->md.zerocopy) {
if (p->md.zbuf1 != MAP_FAILED && p->md.zbuf1 != NULL)
munmap(p->md.zbuf1, p->md.zbufsize);
if (p->md.zbuf2 != MAP_FAILED && p->md.zbuf2 != NULL)
munmap(p->md.zbuf2, p->md.zbufsize);
}
#endif
if (p->md.device != NULL) {
free(p->md.device);
p->md.device = NULL;
@ -1073,6 +1340,10 @@ pcap_activate_bpf(pcap_t *p)
struct bpf_program total_prog;
struct utsname osinfo;
int have_osinfo = 0;
#ifdef HAVE_ZEROCOPY_BPF
struct bpf_zbuf bz;
u_int bufmode, zbufmax;
#endif
fd = bpf_open(p);
if (fd < 0) {
@ -1189,7 +1460,63 @@ pcap_activate_bpf(pcap_t *p)
}
}
#endif /* __APPLE__ */
#ifdef HAVE_ZEROCOPY_BPF
/*
* If the BPF extension to set buffer mode is present, try setting
* the mode to zero-copy. If that fails, use regular buffering. If
* it succeeds but other setup fails, return an error to the user.
*/
bufmode = BPF_BUFMODE_ZBUF;
if (ioctl(fd, BIOCSETBUFMODE, (caddr_t)&bufmode) == 0) {
p->md.zerocopy = 1;
/*
* How to pick a buffer size: first, query the maximum buffer
* size supported by zero-copy. This also lets us quickly
* determine whether the kernel generally supports zero-copy.
* Then, query the default buffer size, which reflects kernel
* policy for a desired default. Round to the nearest page
* size.
*/
if (ioctl(fd, BIOCGETZMAX, (caddr_t)&zbufmax) < 0) {
snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "BIOCGETZMAX: %s",
pcap_strerror(errno));
goto bad;
}
if ((ioctl(fd, BIOCGBLEN, (caddr_t)&v) < 0) || v < 32768)
v = 32768;
#ifndef roundup
#define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) /* to any y */
#endif
p->md.zbufsize = roundup(v, getpagesize());
if (p->md.zbufsize > zbufmax)
p->md.zbufsize = zbufmax;
p->md.zbuf1 = mmap(NULL, p->md.zbufsize, PROT_READ | PROT_WRITE,
MAP_ANON, -1, 0);
p->md.zbuf2 = mmap(NULL, p->md.zbufsize, PROT_READ | PROT_WRITE,
MAP_ANON, -1, 0);
if (p->md.zbuf1 == MAP_FAILED || p->md.zbuf2 == MAP_FAILED) {
snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "mmap: %s",
pcap_strerror(errno));
goto bad;
}
bzero(&bz, sizeof(bz));
bz.bz_bufa = p->md.zbuf1;
bz.bz_bufb = p->md.zbuf2;
bz.bz_buflen = p->md.zbufsize;
if (ioctl(fd, BIOCSETZBUF, (caddr_t)&bz) < 0) {
snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "BIOCSETZBUF: %s",
pcap_strerror(errno));
goto bad;
}
(void)strncpy(ifr.ifr_name, p->opt.source, sizeof(ifr.ifr_name));
if (ioctl(fd, BIOCSETIF, (caddr_t)&ifr) < 0) {
snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "BIOCSETIF: %s: %s",
p->opt.source, pcap_strerror(errno));
goto bad;
}
v = p->md.zbufsize - sizeof(struct bpf_zbuf_header);
} else
#endif
/*
* Set the buffer size.
*/
@ -1508,7 +1835,11 @@ pcap_activate_bpf(pcap_t *p)
}
#endif
/* set timeout */
if (p->md.timeout != 0) {
#ifdef HAVE_ZEROCOPY_BPF
if (p->md.timeout != 0 && !p->md.zerocopy) {
#else
if (p->md.timeout) {
#endif
/*
* XXX - is this seconds/nanoseconds in AIX?
* (Treating it as such doesn't fix the timeout
@ -1599,6 +1930,9 @@ pcap_activate_bpf(pcap_t *p)
goto bad;
}
p->bufsize = v;
#ifdef HAVE_ZEROCOPY_BPF
if (!p->md.zerocopy) {
#endif
p->buffer = (u_char *)malloc(p->bufsize);
if (p->buffer == NULL) {
snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "malloc: %s",
@ -1611,6 +1945,9 @@ pcap_activate_bpf(pcap_t *p)
* problems we have experienced from AIX BPF. */
memset(p->buffer, 0x0, p->bufsize);
#endif
#ifdef HAVE_ZEROCOPY_BPF
}
#endif
/*
* If there's no filter program installed, there's

View File

@ -30,7 +30,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#) $Header: /tcpdump/master/libpcap/pcap-int.h,v 1.93 2008-08-06 07:49:19 guy Exp $ (LBL)
* @(#) $Header: /tcpdump/master/libpcap/pcap-int.h,v 1.94 2008-09-16 00:20:23 guy Exp $ (LBL)
*/
#ifndef pcap_int_h
@ -152,6 +152,28 @@ struct pcap_md {
* Same as in linux above, introduce
* generally? */
#endif /* HAVE_DAG_API */
#ifdef HAVE_ZEROCOPY_BPF
/*
* Zero-copy read buffer -- for zero-copy BPF. 'buffer' above will
* alternative between these two actual mmap'd buffers as required.
* As there is a header on the front size of the mmap'd buffer, only
* some of the buffer is exposed to libpcap as a whole via bufsize;
* zbufsize is the true size. zbuffer tracks the current zbuf
* assocated with buffer so that it can be used to decide which the
* next buffer to read will be.
*/
u_char *zbuf1, *zbuf2, *zbuffer;
u_int zbufsize;
u_int zerocopy;
u_int interrupted;
struct timespec firstsel;
/*
* If there's currently a buffer being actively processed, then it is
* referenced here; 'buffer' is also pointed at it, but offset by the
* size of the header.
*/
struct bpf_zbuf_header *bzh;
#endif /* HAVE_ZEROCOPY_BPF */
};
/*