Strengthen the heuristics for pcap subtypes.

When trying to guess what type of capture a file is, look for as many
bogosities (caplen > len, microseconds >= 10^6/nanoseconds >= 10^9,
too-high caplen, too-high original len, caplen > snapshort length), to
increase the chances of guessing correctly.

(Every time somebody uses 0xa1b2c3d4 as the magic number for a capture
file that isn't standard pcap format, God kills a kitten.  Please, think
of the kittens.)

Change-Id: I3f397d598ed61dc82e2832be30452ebe8ace98e8
Reviewed-on: https://code.wireshark.org/review/3808
Reviewed-by: Guy Harris <guy@alum.mit.edu>
This commit is contained in:
Guy Harris 2014-08-24 01:06:35 -07:00
parent 28bea6f796
commit bb0c17546f
1 changed files with 209 additions and 230 deletions

View File

@ -56,24 +56,20 @@ typedef struct {
#endif
/* Try to read the first two records of the capture file. */
typedef enum {
THIS_FORMAT, /* the reads succeeded, assume it's this format */
BAD_READ, /* the file is probably not valid */
OTHER_FORMAT /* the file may be valid, but not in this format */
} libpcap_try_t;
static libpcap_try_t libpcap_try(wtap *wth, int *err);
static int libpcap_try(wtap *wth, int *err);
static int libpcap_try_header(wtap *wth, FILE_T fh, int *err, gchar **err_info,
struct pcaprec_ss990915_hdr *hdr);
static gboolean libpcap_read(wtap *wth, int *err, gchar **err_info,
gint64 *data_offset);
static gboolean libpcap_seek_read(wtap *wth, gint64 seek_off,
struct wtap_pkthdr *phdr, Buffer *buf, int *err, gchar **err_info);
static int libpcap_read_header(wtap *wth, FILE_T fh, int *err, gchar **err_info,
struct pcaprec_ss990915_hdr *hdr);
static void adjust_header(wtap *wth, struct pcaprec_hdr *hdr);
static gboolean libpcap_read_packet(wtap *wth, FILE_T fh,
struct wtap_pkthdr *phdr, Buffer *buf, int *err, gchar **err_info);
static gboolean libpcap_dump(wtap_dumper *wdh, const struct wtap_pkthdr *phdr,
const guint8 *pd, int *err);
static int libpcap_read_header(wtap *wth, FILE_T fh, int *err, gchar **err_info,
struct pcaprec_ss990915_hdr *hdr);
int libpcap_open(wtap *wth, int *err, gchar **err_info)
{
@ -86,6 +82,28 @@ int libpcap_open(wtap *wth, int *err, gchar **err_info)
int file_encap;
gint64 first_packet_offset;
libpcap_t *libpcap;
static const int subtypes_modified[] = {
WTAP_FILE_TYPE_SUBTYPE_PCAP_SS991029,
WTAP_FILE_TYPE_SUBTYPE_PCAP_SS990915
};
#define N_SUBTYPES_MODIFIED G_N_ELEMENTS(subtypes_modified)
static const int subtypes_standard[] = {
WTAP_FILE_TYPE_SUBTYPE_PCAP,
WTAP_FILE_TYPE_SUBTYPE_PCAP_SS990417,
WTAP_FILE_TYPE_SUBTYPE_PCAP_NOKIA
};
#define N_SUBTYPES_STANDARD G_N_ELEMENTS(subtypes_standard)
static const int subtypes_nsec[] = {
WTAP_FILE_TYPE_SUBTYPE_PCAP_NSEC
};
#define N_SUBTYPES_NSEC G_N_ELEMENTS(subtypes_nsec)
#define MAX_FIGURES_OF_MERIT \
MAX(MAX(N_SUBTYPES_MODIFIED, N_SUBTYPES_STANDARD), N_SUBTYPES_NSEC)
int figures_of_merit[MAX_FIGURES_OF_MERIT];
const int *subtypes;
int n_subtypes;
int best_subtype;
int i;
/* Read in the number that should be at the start of a "libpcap" file */
errno = WTAP_ERR_CANT_READ;
@ -348,133 +366,76 @@ int libpcap_open(wtap *wth, int *err, gchar **err_info)
if (modified) {
/*
* Well, we have the magic number from Alexey's
* later two patches.
*
* Try ss991029, the last of his patches, first.
* later two patches. Try the subtypes for that.
*/
wth->file_type_subtype = WTAP_FILE_TYPE_SUBTYPE_PCAP_SS991029;
first_packet_offset = file_tell(wth->fh);
switch (libpcap_try(wth, err)) {
case BAD_READ:
/*
* Well, we couldn't even read it.
* Give up.
*/
return -1;
case THIS_FORMAT:
/*
* Well, it looks as if it might be 991029.
* Put the seek pointer back, and finish.
*/
if (file_seek(wth->fh, first_packet_offset, SEEK_SET, err) == -1) {
return -1;
}
goto done;
case OTHER_FORMAT:
/*
* Try the next format.
*/
break;
}
/*
* Well, it's not completely unreadable,
* but it's not ss991029. Try ss990915;
* there are no other types to try after that,
* so we put the seek pointer back and treat
* it as 990915.
*/
wth->file_type_subtype = WTAP_FILE_TYPE_SUBTYPE_PCAP_SS990915;
if (file_seek(wth->fh, first_packet_offset, SEEK_SET, err) == -1) {
return -1;
}
subtypes = subtypes_modified;
n_subtypes = N_SUBTYPES_MODIFIED;
} else {
/*
* Well, we have the standard magic number.
*
* Try the standard format first.
*/
if(wth->tsprecision == WTAP_FILE_TSPREC_NSEC) {
wth->file_type_subtype = WTAP_FILE_TYPE_SUBTYPE_PCAP_NSEC;
if (wth->tsprecision == WTAP_FILE_TSPREC_NSEC) {
/*
* We have nanosecond-format libpcap's magic
* number. Try the subtypes for that.
*/
subtypes = subtypes_nsec;
n_subtypes = N_SUBTYPES_NSEC;
} else {
wth->file_type_subtype = WTAP_FILE_TYPE_SUBTYPE_PCAP;
/*
* We have the regular libpcap magic number.
* Try the subtypes for that.
*/
subtypes = subtypes_standard;
n_subtypes = N_SUBTYPES_STANDARD;
}
first_packet_offset = file_tell(wth->fh);
switch (libpcap_try(wth, err)) {
}
case BAD_READ:
/*
* Try all the subtypes.
*/
first_packet_offset = file_tell(wth->fh);
for (i = 0; i < n_subtypes; i++) {
wth->file_type_subtype = subtypes[i];
figures_of_merit[i] = libpcap_try(wth, err);
if (figures_of_merit[i] == -1) {
/*
* Well, we couldn't even read it.
* Give up.
*/
return -1;
case THIS_FORMAT:
}
if (figures_of_merit[i] == 0) {
/*
* Well, it looks as if it might be a standard
* libpcap file.
* This format doesn't have any issues.
* Put the seek pointer back, and finish.
*/
if (file_seek(wth->fh, first_packet_offset, SEEK_SET, err) == -1) {
return -1;
}
goto done;
case OTHER_FORMAT:
/*
* Try the next format.
*/
break;
}
/*
* Well, it's not completely unreadable, but it's not
* a standard file. Put the seek pointer back and try
* ss990417.
* OK, we've recorded the figure of merit for this one;
* go back to the first packet and try the next one.
*/
wth->file_type_subtype = WTAP_FILE_TYPE_SUBTYPE_PCAP_SS990417;
if (file_seek(wth->fh, first_packet_offset, SEEK_SET, err) == -1) {
return -1;
}
switch (libpcap_try(wth, err)) {
case BAD_READ:
/*
* Well, we couldn't even read it.
* Give up.
*/
return -1;
case THIS_FORMAT:
/*
* Well, it looks as if it might be ss990417.
* Put the seek pointer back, and finish.
*/
if (file_seek(wth->fh, first_packet_offset, SEEK_SET, err) == -1) {
return -1;
}
goto done;
case OTHER_FORMAT:
/*
* Try the next format.
*/
break;
}
}
/*
* OK, none are perfect; let's see which one is least bad.
*/
best_subtype = INT_MAX;
for (i = 0; i < n_subtypes; i++) {
/*
* Well, it's not completely unreadable,
* but it's not a standard file *nor* is it ss990417.
* Try it as a Nokia file; there are no other types
* to try after that, so we put the seek pointer back
* and treat it as a Nokia file.
* Is this subtype better than the last one we saw?
*/
wth->file_type_subtype = WTAP_FILE_TYPE_SUBTYPE_PCAP_NOKIA;
if (file_seek(wth->fh, first_packet_offset, SEEK_SET, err) == -1) {
return -1;
if (figures_of_merit[i] < best_subtype) {
/*
* Yes. Choose it until we find a better one.
*/
wth->file_type_subtype = subtypes[i];
best_subtype = figures_of_merit[i];
}
}
@ -488,7 +449,8 @@ done:
* If this is a Nokia capture, treat 13 as WTAP_ENCAP_ATM_PDUS,
* rather than as what we normally treat it.
*/
if (wth->file_type_subtype == WTAP_FILE_TYPE_SUBTYPE_PCAP_NOKIA && hdr.network == 13)
if (wth->file_type_subtype == WTAP_FILE_TYPE_SUBTYPE_PCAP_NOKIA &&
hdr.network == 13)
wth->file_encap = WTAP_ENCAP_ATM_PDUS;
if (wth->file_encap == WTAP_ENCAP_ERF) {
@ -502,8 +464,10 @@ done:
}
/* Try to read the first two records of the capture file. */
static libpcap_try_t libpcap_try(wtap *wth, int *err)
static int libpcap_try(wtap *wth, int *err)
{
int ret;
/*
* pcaprec_ss990915_hdr is the largest header type.
*/
@ -513,7 +477,8 @@ static libpcap_try_t libpcap_try(wtap *wth, int *err)
/*
* Attempt to read the first record's header.
*/
if (libpcap_read_header(wth, wth->fh, err, NULL, &first_rec_hdr) == -1) {
ret = libpcap_try_header(wth, wth->fh, err, NULL, &first_rec_hdr);
if (ret == -1) {
if (*err == 0 || *err == WTAP_ERR_SHORT_READ) {
/*
* EOF or short read - assume the file is in this
@ -522,24 +487,17 @@ static libpcap_try_t libpcap_try(wtap *wth, int *err)
* they will presumably get the same EOF or short
* read.
*/
return THIS_FORMAT;
}
if (*err == WTAP_ERR_BAD_FILE) {
/*
* The first record is bogus, so this is probably
* a corrupt file. Assume the file is in this
* format. When our client tries to read the
* first packet they will presumably get the
* same bogus record.
*/
return THIS_FORMAT;
return 0;
}
return ret;
}
if (ret != 0) {
/*
* Some other error, e.g. an I/O error; just give up.
* Probably a mismatch; return the figure of merit
* (demerit?).
*/
return BAD_READ;
return ret;
}
/*
@ -547,12 +505,13 @@ static libpcap_try_t libpcap_try(wtap *wth, int *err)
* that the header is sane.
*/
if (file_seek(wth->fh, first_rec_hdr.hdr.incl_len, SEEK_CUR, err) == -1)
return BAD_READ;
return -1;
/*
* Now attempt to read the second record's header.
*/
if (libpcap_read_header(wth, wth->fh, err, NULL, &second_rec_hdr) == -1) {
ret = libpcap_try_header(wth, wth->fh, err, NULL, &second_rec_hdr);
if (ret == -1) {
if (*err == 0 || *err == WTAP_ERR_SHORT_READ) {
/*
* EOF or short read - assume the file is in this
@ -561,32 +520,105 @@ static libpcap_try_t libpcap_try(wtap *wth, int *err)
* they will presumably get the same EOF or short
* read.
*/
return THIS_FORMAT;
return 0;
}
if (*err == WTAP_ERR_BAD_FILE) {
/*
* The second record is bogus; maybe it's a
* Capture File From Hell, and what looks like
* the "header" of the next packet is actually
* random junk from the middle of a packet.
* Try the next format; if we run out of formats,
* it probably *is* a corrupt file.
*/
return OTHER_FORMAT;
}
/*
* Some other error, e.g. an I/O error; just give up.
*/
return BAD_READ;
return ret;
}
/*
* OK, the first two records look OK; assume this is the
* right format.
*/
return THIS_FORMAT;
return ret;
}
/* Read the header of the next packet.
Return -1 on an I/O error, 0 on success, or a positive number if the
header looks corrupt. The higher the positive number, the more things
are wrong with the header; this is used by the heuristics that try to
guess what type of file it is, with the type with the fewest problems
being chosen. */
static int libpcap_try_header(wtap *wth, FILE_T fh, int *err, gchar **err_info,
struct pcaprec_ss990915_hdr *hdr)
{
int ret;
if (!libpcap_read_header(wth, fh, err, err_info, hdr))
return -1;
ret = 0; /* start out presuming everything's OK */
switch (wth->file_type_subtype) {
case WTAP_FILE_TYPE_SUBTYPE_PCAP_NSEC:
case WTAP_FILE_TYPE_SUBTYPE_PCAP_AIX:
/*
* Nanosecond resolution; treat fractions-of-a-second
* values >= 1 000 000 000 as an indication that
* the header format might not be what we think it is.
*/
if (hdr->hdr.ts_usec >= 1000000000)
ret++;
break;
default:
/*
* Microsecond resolution; treat fractions-of-a-second
* values >= 1 000 000 as an indication that the header
* format might not be what we think it is.
*/
if (hdr->hdr.ts_usec >= 1000000)
ret++;
break;
}
if (hdr->hdr.incl_len > WTAP_MAX_PACKET_SIZE) {
/*
* Probably either a corrupt capture file or a file
* of a type different from the one we're trying.
*/
ret++;
}
if (hdr->hdr.orig_len > 64*1024*1024) {
/*
* In theory I guess the on-the-wire packet size can be
* arbitrarily large, and it can certainly be larger than the
* maximum snapshot length which bounds the snapshot size,
* but any file claiming 64MB in a single packet is *probably*
* corrupt, and treating them as such makes the heuristics
* much more reliable. See, for example,
*
* https://bugs.wireshark.org/bugzilla/show_bug.cgi?id=9634
*
* (64MB is an arbitrary size at this point).
*/
ret++;
}
if (hdr->hdr.incl_len > wth->snapshot_length) {
/*
* This is not a fatal error, and packets that have one
* such packet probably have thousands. For discussion,
* see
* https://www.wireshark.org/lists/wireshark-dev/201307/msg00076.html
* and related messages.
*
* The packet contents will be copied to a Buffer, which
* expands as necessary to hold the contents; we don't have
* to worry about fixed-length buffers allocated based on
* the original snapshot length.
*
* We just treat this as an indication that we might be
* trying the wrong file type here.
*/
ret++;
}
if (hdr->hdr.incl_len > hdr->hdr.orig_len) {
/*
* Another hint that this might be the wrong file type.
*/
ret++;
}
return ret;
}
/* Read the next packet */
@ -622,15 +654,23 @@ libpcap_read_packet(wtap *wth, FILE_T fh, struct wtap_pkthdr *phdr,
struct pcaprec_ss990915_hdr hdr;
guint packet_size;
guint orig_size;
int bytes_read;
int phdr_len;
libpcap_t *libpcap;
bytes_read = libpcap_read_header(wth, fh, err, err_info, &hdr);
if (bytes_read == -1) {
if (!libpcap_read_header(wth, fh, err, err_info, &hdr))
return FALSE;
if (hdr.hdr.incl_len > WTAP_MAX_PACKET_SIZE) {
/*
* We failed to read the header.
* Probably a corrupt capture file; return an error,
* so that our caller doesn't blow up trying to allocate
* space for an immensely-large packet.
*/
*err = WTAP_ERR_BAD_FILE;
if (err_info != NULL) {
*err_info = g_strdup_printf("pcap: File has %u-byte packet, bigger than maximum of %u",
hdr.hdr.incl_len, WTAP_MAX_PACKET_SIZE);
}
return FALSE;
}
@ -701,13 +741,14 @@ libpcap_read_packet(wtap *wth, FILE_T fh, struct wtap_pkthdr *phdr,
/* Read the header of the next packet.
Return -1 on an error, or the number of bytes of header read on success. */
Return FALSE on an error, TRUE on success. */
static int libpcap_read_header(wtap *wth, FILE_T fh, int *err, gchar **err_info,
struct pcaprec_ss990915_hdr *hdr)
{
int bytes_to_read, bytes_read;
int bytes_to_read, bytes_read;
guint32 temp;
libpcap_t *libpcap;
/* Read record header. */
errno = WTAP_ERR_CANT_READ;
switch (wth->file_type_subtype) {
@ -740,80 +781,16 @@ static int libpcap_read_header(wtap *wth, FILE_T fh, int *err, gchar **err_info,
if (*err == 0 && bytes_read != 0) {
*err = WTAP_ERR_SHORT_READ;
}
return -1;
return FALSE;
}
adjust_header(wth, &hdr->hdr);
if (hdr->hdr.incl_len > WTAP_MAX_PACKET_SIZE) {
/*
* Probably a corrupt capture file; return an error,
* so that our caller doesn't blow up trying to allocate
* space for an immensely-large packet, and so that
* the code to try to guess what type of libpcap file
* this is can tell when it's not the type we're guessing
* it is.
*/
*err = WTAP_ERR_BAD_FILE;
if (err_info != NULL) {
*err_info = g_strdup_printf("pcap: File has %u-byte packet, bigger than maximum of %u",
hdr->hdr.incl_len, WTAP_MAX_PACKET_SIZE);
}
return -1;
}
if (hdr->hdr.orig_len > 64*1024*1024) {
/*
* In theory I guess the on-the-wire packet size can be
* arbitrarily large, and it can certainly be larger than the
* maximum snapshot length which bounds the snapshot size,
* but any file claiming 64MB in a single packet is *probably*
* corrupt, and treating them as such makes the heuristics
* much more reliable. See, for example,
*
* https://bugs.wireshark.org/bugzilla/show_bug.cgi?id=9634
*
* (64MB is an arbitrary size at this point).
*/
*err = WTAP_ERR_BAD_FILE;
if (err_info != NULL) {
*err_info = g_strdup_printf("pcap: File claims packet was %u bytes on the wire",
hdr->hdr.orig_len);
}
return -1;
}
/* Disabling because this is not a fatal error, and packets that have
* one such packet probably have thousands. For discussion, see
* https://www.wireshark.org/lists/wireshark-dev/201307/msg00076.html
* and related messages.
*
* The packet contents will be copied to a Buffer, which expands
* as necessary to hold the contents; we don't have to worry
* about fixed-length buffers allocated based on the original
* snapshot length. */
#if 0
if (hdr->hdr.incl_len > wth->snapshot_length) {
g_warning("pcap: File has packet larger than file's snapshot length.");
}
#endif
return bytes_read;
}
static void
adjust_header(wtap *wth, struct pcaprec_hdr *hdr)
{
guint32 temp;
libpcap_t *libpcap;
libpcap = (libpcap_t *)wth->priv;
if (libpcap->byte_swapped) {
/* Byte-swap the record header fields. */
hdr->ts_sec = GUINT32_SWAP_LE_BE(hdr->ts_sec);
hdr->ts_usec = GUINT32_SWAP_LE_BE(hdr->ts_usec);
hdr->incl_len = GUINT32_SWAP_LE_BE(hdr->incl_len);
hdr->orig_len = GUINT32_SWAP_LE_BE(hdr->orig_len);
hdr->hdr.ts_sec = GUINT32_SWAP_LE_BE(hdr->hdr.ts_sec);
hdr->hdr.ts_usec = GUINT32_SWAP_LE_BE(hdr->hdr.ts_usec);
hdr->hdr.incl_len = GUINT32_SWAP_LE_BE(hdr->hdr.incl_len);
hdr->hdr.orig_len = GUINT32_SWAP_LE_BE(hdr->hdr.orig_len);
}
/* Swap the "incl_len" and "orig_len" fields, if necessary. */
@ -823,7 +800,7 @@ adjust_header(wtap *wth, struct pcaprec_hdr *hdr)
break;
case MAYBE_SWAPPED:
if (hdr->incl_len <= hdr->orig_len) {
if (hdr->hdr.incl_len <= hdr->hdr.orig_len) {
/*
* The captured length is <= the actual length,
* so presumably they weren't swapped.
@ -833,11 +810,13 @@ adjust_header(wtap *wth, struct pcaprec_hdr *hdr)
/* FALLTHROUGH */
case SWAPPED:
temp = hdr->orig_len;
hdr->orig_len = hdr->incl_len;
hdr->incl_len = temp;
temp = hdr->hdr.orig_len;
hdr->hdr.orig_len = hdr->hdr.incl_len;
hdr->hdr.incl_len = temp;
break;
}
return TRUE;
}
/* Returns 0 if we could write the specified encapsulation type,