2000-01-13 07:09:20 +00:00
/* file_wrappers.c
*
* Wiretap Library
2001-11-13 23:55:44 +00:00
* Copyright ( c ) 1998 by Gilbert Ramirez < gram @ alumni . rice . edu >
2002-08-28 20:30:45 +00:00
*
2018-02-07 11:26:45 +00:00
* SPDX - License - Identifier : GPL - 2.0 - or - later
2000-01-13 07:09:20 +00:00
*/
2011-04-08 00:28:37 +00:00
/* file_access interface based heavily on zlib gzread.c and gzlib.c from zlib
* Copyright ( C ) 1995 - 2010 Jean - loup Gailly and Mark Adler
* under licence :
*
2018-02-12 13:07:48 +00:00
* SPDX - License - Identifier : Zlib
2011-04-08 00:28:37 +00:00
*
2014-09-10 16:07:04 +00:00
*/
2011-04-08 00:28:37 +00:00
2021-10-16 09:33:34 +00:00
# include "config.h"
# include "file_wrappers.h"
2000-01-13 07:09:20 +00:00
2021-08-19 12:48:52 +00:00
# include <assert.h>
2000-01-13 07:09:20 +00:00
# include <errno.h>
2005-11-07 02:45:19 +00:00
# include <string.h>
2000-05-19 23:07:04 +00:00
# include "wtap-int.h"
2021-10-16 09:33:34 +00:00
2008-05-22 15:46:27 +00:00
# include <wsutil/file_util.h>
2005-11-06 22:43:25 +00:00
2016-04-01 00:32:56 +00:00
# ifdef HAVE_ZLIB
2015-12-16 09:37:00 +00:00
# define ZLIB_CONST
2011-06-09 22:46:00 +00:00
# include <zlib.h>
2016-04-01 00:32:56 +00:00
# endif /* HAVE_ZLIB */
2011-06-09 22:46:00 +00:00
2021-08-19 12:48:52 +00:00
# ifdef HAVE_ZSTD
# include <zstd.h>
# endif
# ifdef HAVE_LZ4
2021-09-07 19:40:44 +00:00
# include <lz4.h>
# if LZ4_VERSION_NUMBER >= 10703
# define USE_LZ4
2021-08-19 12:48:52 +00:00
# include <lz4frame.h>
# endif
2021-09-07 19:40:44 +00:00
# endif
2021-08-19 12:48:52 +00:00
2011-04-10 17:01:13 +00:00
/*
2018-01-14 23:47:22 +00:00
* See RFC 1952 :
*
* https : //tools.ietf.org/html/rfc1952
*
* for a description of the gzip file format .
2011-04-10 17:01:13 +00:00
*
* Some other compressed file formats we might want to support :
*
2019-07-28 04:20:27 +00:00
* XZ format : https : //tukaani.org/xz/
2011-04-10 17:01:13 +00:00
*
2019-07-28 04:20:27 +00:00
* Bzip2 format : https : //www.sourceware.org/bzip2/
2017-12-27 02:21:18 +00:00
*
2019-07-28 04:20:27 +00:00
* Lzip format : https : //www.nongnu.org/lzip/
2011-04-10 17:01:13 +00:00
*/
2018-11-20 00:06:38 +00:00
/*
* List of compression types supported .
*/
static struct compression_type {
wtap_compression_type type ;
const char * extension ;
const char * description ;
} compression_types [ ] = {
# ifdef HAVE_ZLIB
{ WTAP_GZIP_COMPRESSED , " gz " , " gzip compressed " } ,
2021-09-11 00:46:54 +00:00
# endif
# ifdef HAVE_ZSTD
{ WTAP_ZSTD_COMPRESSED , " zst " , " zstd compressed " } ,
# endif
# ifdef USE_LZ4
{ WTAP_LZ4_COMPRESSED , " lz4 " , " lz4 compressed " } ,
2018-11-20 00:06:38 +00:00
# endif
{ WTAP_UNCOMPRESSED , NULL , NULL }
} ;
2021-09-11 00:46:54 +00:00
static wtap_compression_type file_get_compression_type ( FILE_T stream ) ;
2018-11-20 00:06:38 +00:00
wtap_compression_type
wtap_get_compression_type ( wtap * wth )
{
2021-09-11 00:46:54 +00:00
return file_get_compression_type ( ( wth - > fh = = NULL ) ? wth - > random_fh : wth - > fh ) ;
2018-11-20 00:06:38 +00:00
}
const char *
wtap_compression_type_description ( wtap_compression_type compression_type )
{
for ( struct compression_type * p = compression_types ;
p - > type ! = WTAP_UNCOMPRESSED ; p + + ) {
if ( p - > type = = compression_type )
return p - > description ;
}
return NULL ;
}
const char *
wtap_compression_type_extension ( wtap_compression_type compression_type )
{
for ( struct compression_type * p = compression_types ;
p - > type ! = WTAP_UNCOMPRESSED ; p + + ) {
if ( p - > type = = compression_type )
return p - > extension ;
}
return NULL ;
}
GSList *
wtap_get_all_compression_type_extensions_list ( void )
{
GSList * extensions ;
extensions = NULL ; /* empty list, to start with */
for ( struct compression_type * p = compression_types ;
p - > type ! = WTAP_UNCOMPRESSED ; p + + )
extensions = g_slist_prepend ( extensions , ( gpointer ) p - > extension ) ;
return extensions ;
}
2011-04-08 00:28:37 +00:00
/* #define GZBUFSIZE 8192 */
# define GZBUFSIZE 4096
2013-01-06 20:36:33 +00:00
/* values for wtap_reader compression */
typedef enum {
2021-09-11 00:46:54 +00:00
UNKNOWN , /* unknown - look for a compression header */
2014-09-10 16:07:04 +00:00
UNCOMPRESSED , /* uncompressed - copy input directly */
ZLIB , /* decompress a zlib stream */
2021-08-19 12:48:52 +00:00
GZIP_AFTER_HEADER ,
ZSTD ,
LZ4 ,
2013-01-06 20:36:33 +00:00
} compression_t ;
2021-09-02 08:36:29 +00:00
/*
* We limit the size of our input and output buffers to 2 ^ 30 bytes ,
* because :
*
* 1 ) on Windows with MSVC , the return value of _read ( ) is int ,
* so the biggest read you can do is INT_MAX , and the biggest
* power of 2 below that is 2 ^ 30 ;
*
* 2 ) the " avail_in " and " avail_out " values in a z_stream structure
* in zlib are uInts , and those are unsigned ints , and that
* imposes a limit on the buffer size when we ' re reading a
* gzipped file .
*
* Thus , we use guint for the buffer sizes , offsets , amount available
* from the buffer , etc .
*
* If we want an even bigger buffer for uncompressed data , or for
* some other form of compression , then the guint - sized values should
* be in structure values used only for reading gzipped files , and
* other values should be used for uncompressed data or data
* compressed using other algorithms ( e . g . , in a union ) .
*/
# define MAX_READ_BUF_SIZE (1U << 30)
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
struct wtap_reader_buf {
guint8 * buf ; /* buffer */
guint8 * next ; /* next byte to deliver from buffer */
guint avail ; /* number of bytes available to deliver at next */
} ;
2011-04-10 16:53:32 +00:00
struct wtap_reader {
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
int fd ; /* file descriptor */
gint64 raw_pos ; /* current position in file (just to not call lseek()) */
gint64 pos ; /* current position in uncompressed data */
guint size ; /* buffer size */
struct wtap_reader_buf in ; /* input buffer, containing compressed data */
struct wtap_reader_buf out ; /* output buffer, containing uncompressed data */
gboolean eof ; /* TRUE if end of input file reached */
gint64 start ; /* where the gzip data started, for rewinding */
gint64 raw ; /* where the raw data started, for seeking */
compression_t compression ; /* type of compression, if any */
2021-09-11 00:46:54 +00:00
compression_t last_compression ; /* last known compression type */
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
gboolean is_compressed ; /* FALSE if completely uncompressed, TRUE otherwise */
2014-09-10 16:07:04 +00:00
/* seek request */
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
gint64 skip ; /* amount to skip (already rewound if backwards) */
gboolean seek_pending ; /* TRUE if seek request pending */
2014-09-10 16:07:04 +00:00
/* error information */
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
int err ; /* error code */
const char * err_info ; /* additional error information string for some errors */
2014-09-10 16:07:04 +00:00
2016-04-01 00:32:56 +00:00
# ifdef HAVE_ZLIB
2014-09-10 16:07:04 +00:00
/* zlib inflate stream */
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
z_stream strm ; /* stream structure in-place (not a pointer) */
gboolean dont_check_crc ; /* TRUE if we aren't supposed to check the CRC */
2011-04-10 16:53:32 +00:00
# endif
2014-09-10 16:07:04 +00:00
/* fast seeking */
GPtrArray * fast_seek ;
void * fast_seek_cur ;
2021-08-19 12:48:52 +00:00
# ifdef HAVE_ZSTD
ZSTD_DCtx * zstd_dctx ;
# endif
2021-09-07 19:40:44 +00:00
# ifdef USE_LZ4
2021-08-19 12:48:52 +00:00
LZ4F_dctx * lz4_dctx ;
# endif
2011-04-10 16:53:32 +00:00
} ;
2011-04-08 00:28:37 +00:00
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
/* Current read offset within a buffer. */
static guint
offset_in_buffer ( struct wtap_reader_buf * buf )
2011-04-08 00:28:37 +00:00
{
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
/* buf->next points to the next byte to read, and buf->buf points
to the first byte in the buffer , so the difference between them
is the offset .
This will fit in an unsigned int , because it can ' t be bigger
than the size of the buffer , which is an unsigned int . */
return ( guint ) ( buf - > next - buf - > buf ) ;
}
/* Number of bytes of data that are in a buffer. */
static guint
bytes_in_buffer ( struct wtap_reader_buf * buf )
{
/* buf->next + buf->avail points just past the last byte of data in
the buffer .
Thus , ( buf - > next + buf - > avail ) - buf - > buf is the number of bytes
of data in the buffer .
2021-09-02 08:36:29 +00:00
This will fit in an guint , because it can ' t be bigger
than the size of the buffer , which is a guint . */
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
return ( guint ) ( ( buf - > next + buf - > avail ) - buf - > buf ) ;
}
/* Reset a buffer, discarding all data in the buffer, so we read into
it starting at the beginning . */
static void
buf_reset ( struct wtap_reader_buf * buf )
{
buf - > next = buf - > buf ;
buf - > avail = 0 ;
}
static int
buf_read ( FILE_T state , struct wtap_reader_buf * buf )
{
guint space_left , to_read ;
unsigned char * read_ptr ;
2014-09-10 16:07:04 +00:00
ssize_t ret ;
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
/* How much space is left at the end of the buffer?
XXX - the output buffer actually has state - > size * 2 bytes . */
space_left = state - > size - bytes_in_buffer ( buf ) ;
if ( space_left = = 0 ) {
/* There's no space left, so we start fresh at the beginning
of the buffer . */
buf_reset ( buf ) ;
read_ptr = buf - > buf ;
to_read = state - > size ;
} else {
/* There's some space left; try to read as much data as we
can into that space . We may get less than that if we ' re
reading from a pipe or if we ' re near the end of the file . */
read_ptr = buf - > next + buf - > avail ;
to_read = space_left ;
}
ret = ws_read ( state - > fd , read_ptr , to_read ) ;
2014-09-10 16:07:04 +00:00
if ( ret < 0 ) {
state - > err = errno ;
state - > err_info = NULL ;
return - 1 ;
}
if ( ret = = 0 )
state - > eof = TRUE ;
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
state - > raw_pos + = ret ;
2021-12-20 02:33:15 +00:00
buf - > avail + = ( guint ) ret ;
2014-09-10 16:07:04 +00:00
return 0 ;
2011-04-08 00:28:37 +00:00
}
static int /* gz_avail */
fill_in_buffer ( FILE_T state )
{
2018-01-15 00:05:32 +00:00
if ( state - > err ! = 0 )
2014-09-10 16:07:04 +00:00
return - 1 ;
2018-01-15 00:05:32 +00:00
if ( ! state - > eof ) {
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
if ( buf_read ( state , & state - > in ) < 0 )
2014-09-10 16:07:04 +00:00
return - 1 ;
}
return 0 ;
2011-04-08 00:28:37 +00:00
}
2011-04-12 02:40:14 +00:00
# define ZLIB_WINSIZE 32768
struct fast_seek_point {
2014-09-10 16:07:04 +00:00
gint64 out ; /* corresponding offset in uncompressed data */
gint64 in ; /* offset in input file of first full byte */
2011-04-12 02:40:14 +00:00
2014-09-10 16:07:04 +00:00
compression_t compression ;
union {
struct {
2011-04-29 07:49:55 +00:00
# ifdef HAVE_INFLATEPRIME
2014-09-10 16:07:04 +00:00
int bits ; /* number of bits (1-7) from byte at in - 1, or 0 */
2011-04-29 07:49:55 +00:00
# endif
2014-09-10 16:07:04 +00:00
unsigned char window [ ZLIB_WINSIZE ] ; /* preceding 32K of uncompressed data */
2011-04-12 02:40:14 +00:00
2014-09-10 16:07:04 +00:00
/* be gentle with Z_STREAM_END, 8 bytes more... Another solution would be to comment checks out */
guint32 adler ;
guint32 total_out ;
} zlib ;
} data ;
2011-04-12 02:40:14 +00:00
} ;
struct zlib_cur_seek_point {
2014-09-10 16:07:04 +00:00
unsigned char window [ ZLIB_WINSIZE ] ; /* preceding 32K of uncompressed data */
unsigned int pos ;
unsigned int have ;
2011-04-12 02:40:14 +00:00
} ;
# define SPAN G_GINT64_CONSTANT(1048576)
static struct fast_seek_point *
fast_seek_find ( FILE_T file , gint64 pos )
{
2014-09-10 16:07:04 +00:00
struct fast_seek_point * smallest = NULL ;
struct fast_seek_point * item ;
guint low , i , max ;
if ( ! file - > fast_seek )
return NULL ;
for ( low = 0 , max = file - > fast_seek - > len ; low < max ; ) {
i = ( low + max ) / 2 ;
item = ( struct fast_seek_point * ) file - > fast_seek - > pdata [ i ] ;
if ( pos < item - > out )
max = i ;
else if ( pos > item - > out ) {
smallest = item ;
low = i + 1 ;
} else {
return item ;
}
}
return smallest ;
2011-04-12 02:40:14 +00:00
}
static void
2013-01-06 20:36:33 +00:00
fast_seek_header ( FILE_T file , gint64 in_pos , gint64 out_pos ,
2014-09-10 16:07:04 +00:00
compression_t compression )
2011-04-12 02:40:14 +00:00
{
2014-09-10 16:07:04 +00:00
struct fast_seek_point * item = NULL ;
2011-04-12 02:40:14 +00:00
2014-09-10 16:07:04 +00:00
if ( file - > fast_seek - > len ! = 0 )
item = ( struct fast_seek_point * ) file - > fast_seek - > pdata [ file - > fast_seek - > len - 1 ] ;
2011-04-12 02:40:14 +00:00
2014-09-10 16:07:04 +00:00
if ( ! item | | item - > out < out_pos ) {
struct fast_seek_point * val = g_new ( struct fast_seek_point , 1 ) ;
val - > in = in_pos ;
val - > out = out_pos ;
val - > compression = compression ;
2011-04-12 02:40:14 +00:00
2014-09-10 16:07:04 +00:00
g_ptr_array_add ( file - > fast_seek , val ) ;
}
2011-04-12 02:40:14 +00:00
}
static void
2016-03-28 11:37:24 +00:00
fast_seek_reset (
2016-04-01 00:32:56 +00:00
# ifdef HAVE_ZLIB
2016-03-28 11:37:24 +00:00
FILE_T state )
# else
FILE_T state _U_ )
# endif
2011-04-12 02:40:14 +00:00
{
2016-04-01 00:32:56 +00:00
# ifdef HAVE_ZLIB
2018-01-15 00:05:32 +00:00
if ( state - > compression = = ZLIB & & state - > fast_seek_cur ! = NULL ) {
2014-09-10 16:07:04 +00:00
struct zlib_cur_seek_point * cur = ( struct zlib_cur_seek_point * ) state - > fast_seek_cur ;
2011-04-12 02:40:14 +00:00
2014-09-10 16:07:04 +00:00
cur - > have = 0 ;
}
2011-04-12 02:40:14 +00:00
# endif
}
2016-04-01 00:32:56 +00:00
# ifdef HAVE_ZLIB
2011-04-08 00:28:37 +00:00
2011-04-20 21:36:23 +00:00
/* Get next byte from input, or -1 if end or error.
*
* Note :
*
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
* 1 ) errors from buf_read ( ) , and thus from fill_in_buffer ( ) , are
2014-09-10 16:07:04 +00:00
* " sticky " , and fill_in_buffer ( ) won ' t do any reading if there ' s
* an error ;
2011-04-20 21:36:23 +00:00
*
2014-09-10 16:07:04 +00:00
* 2 ) GZ_GETC ( ) returns - 1 on an EOF ;
2011-04-20 21:36:23 +00:00
*
* so it ' s safe to make multiple GZ_GETC ( ) calls and only check the
* last one for an error . */
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
# define GZ_GETC() ((state->in.avail == 0 && fill_in_buffer(state) == -1) ? -1 : \
( state - > in . avail = = 0 ? - 1 : \
( state - > in . avail - - , * ( state - > in . next ) + + ) ) )
2011-04-08 00:28:37 +00:00
2011-04-20 21:36:23 +00:00
/* Get a one-byte integer and return 0 on success and the value in *ret.
Otherwise - 1 is returned , state - > err is set , and * ret is not modified . */
static int
gz_next1 ( FILE_T state , guint8 * ret )
{
2014-09-10 16:07:04 +00:00
int ch ;
ch = GZ_GETC ( ) ;
if ( ch = = - 1 ) {
if ( state - > err = = 0 ) {
/* EOF */
state - > err = WTAP_ERR_SHORT_READ ;
state - > err_info = NULL ;
}
return - 1 ;
}
* ret = ch ;
return 0 ;
2011-04-20 21:36:23 +00:00
}
/* Get a two-byte little-endian integer and return 0 on success and the value
in * ret . Otherwise - 1 is returned , state - > err is set , and * ret is not
modified . */
static int
gz_next2 ( FILE_T state , guint16 * ret )
{
2014-09-10 16:07:04 +00:00
guint16 val ;
int ch ;
val = GZ_GETC ( ) ;
ch = GZ_GETC ( ) ;
if ( ch = = - 1 ) {
if ( state - > err = = 0 ) {
/* EOF */
state - > err = WTAP_ERR_SHORT_READ ;
state - > err_info = NULL ;
}
return - 1 ;
}
val + = ( guint16 ) ch < < 8 ;
* ret = val ;
return 0 ;
2011-04-20 21:36:23 +00:00
}
2011-04-08 00:28:37 +00:00
/* Get a four-byte little-endian integer and return 0 on success and the value
2011-04-20 21:36:23 +00:00
in * ret . Otherwise - 1 is returned , state - > err is set , and * ret is not
modified . */
2011-04-08 00:28:37 +00:00
static int
gz_next4 ( FILE_T state , guint32 * ret )
{
2014-09-10 16:07:04 +00:00
guint32 val ;
int ch ;
val = GZ_GETC ( ) ;
val + = ( unsigned ) GZ_GETC ( ) < < 8 ;
val + = ( guint32 ) GZ_GETC ( ) < < 16 ;
ch = GZ_GETC ( ) ;
if ( ch = = - 1 ) {
if ( state - > err = = 0 ) {
/* EOF */
state - > err = WTAP_ERR_SHORT_READ ;
state - > err_info = NULL ;
}
return - 1 ;
}
val + = ( guint32 ) ch < < 24 ;
* ret = val ;
return 0 ;
2011-04-08 00:28:37 +00:00
}
2011-04-20 21:36:23 +00:00
/* Skip the specified number of bytes and return 0 on success. Otherwise -1
is returned . */
static int
gz_skipn ( FILE_T state , size_t n )
{
2014-09-10 16:07:04 +00:00
while ( n ! = 0 ) {
if ( GZ_GETC ( ) = = - 1 ) {
if ( state - > err = = 0 ) {
/* EOF */
state - > err = WTAP_ERR_SHORT_READ ;
state - > err_info = NULL ;
}
return - 1 ;
}
n - - ;
}
return 0 ;
2011-04-20 21:36:23 +00:00
}
/* Skip a null-terminated string and return 0 on success. Otherwise -1
is returned . */
static int
gz_skipzstr ( FILE_T state )
{
2014-09-10 16:07:04 +00:00
int ch ;
/* It's null-terminated, so scan until we read a byte with
the value 0 or get an error . */
while ( ( ch = GZ_GETC ( ) ) > 0 )
;
if ( ch = = - 1 ) {
if ( state - > err = = 0 ) {
/* EOF */
state - > err = WTAP_ERR_SHORT_READ ;
state - > err_info = NULL ;
}
return - 1 ;
}
return 0 ;
2011-04-20 21:36:23 +00:00
}
2011-04-12 02:40:14 +00:00
static void
zlib_fast_seek_add ( FILE_T file , struct zlib_cur_seek_point * point , int bits , gint64 in_pos , gint64 out_pos )
{
2014-09-10 16:07:04 +00:00
/* it's for sure after gzip header, so file->fast_seek->len != 0 */
struct fast_seek_point * item = ( struct fast_seek_point * ) file - > fast_seek - > pdata [ file - > fast_seek - > len - 1 ] ;
2011-04-12 02:40:14 +00:00
2011-04-29 07:49:55 +00:00
# ifndef HAVE_INFLATEPRIME
2014-09-10 16:07:04 +00:00
if ( bits )
return ;
2011-04-29 07:49:55 +00:00
# endif
2014-09-10 16:07:04 +00:00
/* Glib has got Balanced Binary Trees (GTree) but I couldn't find a way to do quick search for nearest (and smaller) value to seek (It's what fast_seek_find() do)
* Inserting value in middle of sorted array is expensive , so we want to add only in the end .
* It ' s not big deal , cause first - read don ' t usually invoke seeking
*/
if ( item - > out + SPAN < out_pos ) {
struct fast_seek_point * val = g_new ( struct fast_seek_point , 1 ) ;
val - > in = in_pos ;
val - > out = out_pos ;
val - > compression = ZLIB ;
2011-04-29 07:49:55 +00:00
# ifdef HAVE_INFLATEPRIME
2014-09-10 16:07:04 +00:00
val - > data . zlib . bits = bits ;
2011-04-29 07:49:55 +00:00
# endif
2014-09-10 16:07:04 +00:00
if ( point - > pos ! = 0 ) {
unsigned int left = ZLIB_WINSIZE - point - > pos ;
memcpy ( val - > data . zlib . window , point - > window + point - > pos , left ) ;
memcpy ( val - > data . zlib . window + left , point - > window , point - > pos ) ;
} else
memcpy ( val - > data . zlib . window , point - > window , ZLIB_WINSIZE ) ;
/*
* XXX - strm . adler is a uLong in at least some versions
* of zlib , and uLong is an unsigned long in at least
* some of those versions , which means it ' s 64 - bit
* on LP64 platforms , even though the checksum is
* 32 - bit . We assume the actual Adler checksum
* is in the lower 32 bits of strm . adler ; as the
* checksum in the file is only 32 bits , we save only
* those lower 32 bits , and cast away any additional
* bits to squelch warnings .
*
* The same applies to strm . total_out .
*/
val - > data . zlib . adler = ( guint32 ) file - > strm . adler ;
val - > data . zlib . total_out = ( guint32 ) file - > strm . total_out ;
g_ptr_array_add ( file - > fast_seek , val ) ;
}
2011-04-12 02:40:14 +00:00
}
static void /* gz_decomp */
2011-04-08 00:28:37 +00:00
zlib_read ( FILE_T state , unsigned char * buf , unsigned int count )
{
2014-09-10 16:07:04 +00:00
int ret = 0 ; /* XXX */
guint32 crc , len ;
z_streamp strm = & ( state - > strm ) ;
unsigned char * buf2 = buf ;
unsigned int count2 = count ;
strm - > avail_out = count ;
strm - > next_out = buf ;
/* fill output buffer up to end of deflate stream or error */
do {
/* get more input for inflate() */
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
if ( state - > in . avail = = 0 & & fill_in_buffer ( state ) = = - 1 )
2014-09-10 16:07:04 +00:00
break ;
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
if ( state - > in . avail = = 0 ) {
2014-09-10 16:07:04 +00:00
/* EOF */
state - > err = WTAP_ERR_SHORT_READ ;
state - > err_info = NULL ;
break ;
}
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
strm - > avail_in = state - > in . avail ;
strm - > next_in = state - > in . next ;
2014-09-10 16:07:04 +00:00
/* decompress and handle errors */
2012-05-19 13:27:43 +00:00
# ifdef Z_BLOCK
2014-09-10 16:07:04 +00:00
ret = inflate ( strm , Z_BLOCK ) ;
2012-05-19 13:27:43 +00:00
# else
2014-09-10 16:07:04 +00:00
ret = inflate ( strm , Z_NO_FLUSH ) ;
2012-05-19 13:27:43 +00:00
# endif
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
state - > in . avail = strm - > avail_in ;
2015-12-16 09:37:00 +00:00
# ifdef z_const
DIAG_OFF ( cast - qual )
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
state - > in . next = ( unsigned char * ) strm - > next_in ;
2015-12-16 09:37:00 +00:00
DIAG_ON ( cast - qual )
# else
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
state - > in . next = strm - > next_in ;
2015-12-16 09:37:00 +00:00
# endif
2014-09-10 16:07:04 +00:00
if ( ret = = Z_STREAM_ERROR ) {
state - > err = WTAP_ERR_DECOMPRESS ;
state - > err_info = strm - > msg ;
break ;
}
if ( ret = = Z_NEED_DICT ) {
state - > err = WTAP_ERR_DECOMPRESS ;
state - > err_info = " preset dictionary needed " ;
break ;
}
if ( ret = = Z_MEM_ERROR ) {
/* This means "not enough memory". */
state - > err = ENOMEM ;
state - > err_info = NULL ;
break ;
}
if ( ret = = Z_DATA_ERROR ) { /* deflate stream invalid */
state - > err = WTAP_ERR_DECOMPRESS ;
state - > err_info = strm - > msg ;
break ;
}
/*
* XXX - Z_BUF_ERROR ?
*/
strm - > adler = crc32 ( strm - > adler , buf2 , count2 - strm - > avail_out ) ;
2012-05-19 13:27:43 +00:00
# ifdef Z_BLOCK
2018-01-15 00:05:32 +00:00
if ( state - > fast_seek_cur ! = NULL ) {
2014-09-10 16:07:04 +00:00
struct zlib_cur_seek_point * cur = ( struct zlib_cur_seek_point * ) state - > fast_seek_cur ;
unsigned int ready = count2 - strm - > avail_out ;
if ( ready < ZLIB_WINSIZE ) {
guint left = ZLIB_WINSIZE - cur - > pos ;
if ( ready > = left ) {
memcpy ( cur - > window + cur - > pos , buf2 , left ) ;
if ( ready ! = left )
memcpy ( cur - > window , buf2 + left , ready - left ) ;
cur - > pos = ready - left ;
cur - > have + = ready ;
} else {
memcpy ( cur - > window + cur - > pos , buf2 , ready ) ;
cur - > pos + = ready ;
cur - > have + = ready ;
}
if ( cur - > have > = ZLIB_WINSIZE )
cur - > have = ZLIB_WINSIZE ;
} else {
memcpy ( cur - > window , buf2 + ( ready - ZLIB_WINSIZE ) , ZLIB_WINSIZE ) ;
cur - > pos = 0 ;
cur - > have = ZLIB_WINSIZE ;
}
if ( cur - > have > = ZLIB_WINSIZE & & ret ! = Z_STREAM_END & & ( strm - > data_type & 128 ) & & ! ( strm - > data_type & 64 ) )
zlib_fast_seek_add ( state , cur , ( strm - > data_type & 7 ) , state - > raw_pos - strm - > avail_in , state - > pos + ( count - strm - > avail_out ) ) ;
}
2012-05-19 13:27:43 +00:00
# endif
2014-09-10 16:07:04 +00:00
buf2 = ( buf2 + count2 - strm - > avail_out ) ;
count2 = strm - > avail_out ;
} while ( strm - > avail_out & & ret ! = Z_STREAM_END ) ;
/* update available output and crc check value */
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
state - > out . next = buf ;
state - > out . avail = count - strm - > avail_out ;
2014-09-10 16:07:04 +00:00
/* Check gzip trailer if at end of deflate stream.
We don ' t fail immediately here , we just set an error
indication , so that we try to process what data we
got before the error . The next attempt to read
something past that data will get the error . */
if ( ret = = Z_STREAM_END ) {
if ( gz_next4 ( state , & crc ) ! = - 1 & &
gz_next4 ( state , & len ) ! = - 1 ) {
if ( crc ! = strm - > adler & & ! state - > dont_check_crc ) {
state - > err = WTAP_ERR_DECOMPRESS ;
state - > err_info = " bad CRC " ;
2015-05-09 01:30:35 +00:00
} else if ( len ! = ( strm - > total_out & 0xffffffffUL ) ) {
2014-09-10 16:07:04 +00:00
state - > err = WTAP_ERR_DECOMPRESS ;
state - > err_info = " length field wrong " ;
}
}
2021-09-11 00:46:54 +00:00
state - > last_compression = state - > compression ;
2014-09-10 16:07:04 +00:00
state - > compression = UNKNOWN ; /* ready for next stream, once have is 0 */
g_free ( state - > fast_seek_cur ) ;
state - > fast_seek_cur = NULL ;
}
2011-04-08 00:28:37 +00:00
}
# endif
static int
gz_head ( FILE_T state )
{
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
guint already_read ;
2014-09-10 16:07:04 +00:00
/* get some data in the input buffer */
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
if ( state - > in . avail = = 0 ) {
2014-09-10 16:07:04 +00:00
if ( fill_in_buffer ( state ) = = - 1 )
return - 1 ;
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
if ( state - > in . avail = = 0 )
2014-09-10 16:07:04 +00:00
return 0 ;
}
/* look for the gzip magic header bytes 31 and 139 */
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
if ( state - > in . next [ 0 ] = = 31 ) {
state - > in . avail - - ;
state - > in . next + + ;
2019-12-05 06:19:20 +00:00
/* Make sure the byte after the first byte is present */
if ( state - > in . avail = = 0 & & fill_in_buffer ( state ) = = - 1 ) {
/* Read error. */
2014-09-10 16:07:04 +00:00
return - 1 ;
2019-12-05 06:19:20 +00:00
}
if ( state - > in . avail ! = 0 ) {
if ( state - > in . next [ 0 ] = = 139 ) {
/*
* We have what looks like the ID1 and ID2 bytes of a gzip
* header .
* Continue processing the file .
*
* XXX - some capture file formats ( I ' M LOOKING AT YOU ,
* ENDACE ! ) can have 31 in the first byte of the file
* and 139 in the second byte of the file . For now , in
* those cases , you lose .
*/
2018-01-14 21:07:17 +00:00
# ifdef HAVE_ZLIB
2019-12-05 06:19:20 +00:00
guint8 cm ;
guint8 flags ;
guint16 len ;
guint16 hcrc ;
2014-09-10 16:07:04 +00:00
2019-12-05 06:19:20 +00:00
state - > in . avail - - ;
state - > in . next + + ;
2014-09-10 16:07:04 +00:00
2019-12-05 06:19:20 +00:00
/* read rest of header */
2014-09-10 16:07:04 +00:00
2019-12-05 06:19:20 +00:00
/* compression method (CM) */
if ( gz_next1 ( state , & cm ) = = - 1 )
return - 1 ;
if ( cm ! = 8 ) {
state - > err = WTAP_ERR_DECOMPRESS ;
state - > err_info = " unknown compression method " ;
2014-09-10 16:07:04 +00:00
return - 1 ;
2019-12-05 06:19:20 +00:00
}
2014-09-10 16:07:04 +00:00
2019-12-05 06:19:20 +00:00
/* flags (FLG) */
if ( gz_next1 ( state , & flags ) = = - 1 ) {
/* Read error. */
2014-09-10 16:07:04 +00:00
return - 1 ;
2019-12-05 06:19:20 +00:00
}
if ( flags & 0xe0 ) { /* reserved flag bits */
state - > err = WTAP_ERR_DECOMPRESS ;
state - > err_info = " reserved flag bits set " ;
2014-09-10 16:07:04 +00:00
return - 1 ;
2019-12-05 06:19:20 +00:00
}
/* modification time (MTIME) */
if ( gz_skipn ( state , 4 ) = = - 1 ) {
/* Read error. */
2014-09-10 16:07:04 +00:00
return - 1 ;
2019-12-05 06:19:20 +00:00
}
/* extra flags (XFL) */
if ( gz_skipn ( state , 1 ) = = - 1 ) {
/* Read error. */
2014-09-10 16:07:04 +00:00
return - 1 ;
2019-12-05 06:19:20 +00:00
}
/* operating system (OS) */
if ( gz_skipn ( state , 1 ) = = - 1 ) {
/* Read error. */
return - 1 ;
}
2014-09-10 16:07:04 +00:00
2019-12-05 06:19:20 +00:00
if ( flags & 4 ) {
/* extra field - get XLEN */
if ( gz_next2 ( state , & len ) = = - 1 ) {
/* Read error. */
return - 1 ;
}
/* skip the extra field */
if ( gz_skipn ( state , len ) = = - 1 ) {
/* Read error. */
return - 1 ;
}
}
if ( flags & 8 ) {
/* file name */
if ( gz_skipzstr ( state ) = = - 1 ) {
/* Read error. */
return - 1 ;
}
}
if ( flags & 16 ) {
/* comment */
if ( gz_skipzstr ( state ) = = - 1 ) {
/* Read error. */
return - 1 ;
}
}
if ( flags & 2 ) {
/* header crc */
if ( gz_next2 ( state , & hcrc ) = = - 1 ) {
/* Read error. */
return - 1 ;
}
/* XXX - check the CRC? */
}
/* set up for decompression */
inflateReset ( & ( state - > strm ) ) ;
state - > strm . adler = crc32 ( 0L , Z_NULL , 0 ) ;
state - > compression = ZLIB ;
state - > is_compressed = TRUE ;
2012-05-19 13:27:43 +00:00
# ifdef Z_BLOCK
2019-12-05 06:19:20 +00:00
if ( state - > fast_seek ) {
struct zlib_cur_seek_point * cur = g_new ( struct zlib_cur_seek_point , 1 ) ;
2014-09-10 16:07:04 +00:00
2019-12-05 06:19:20 +00:00
cur - > pos = cur - > have = 0 ;
g_free ( state - > fast_seek_cur ) ;
state - > fast_seek_cur = cur ;
fast_seek_header ( state , state - > raw_pos - state - > in . avail , state - > pos , GZIP_AFTER_HEADER ) ;
}
2018-01-14 21:07:17 +00:00
# endif /* Z_BLOCK */
2019-12-05 06:19:20 +00:00
return 0 ;
2018-01-14 21:07:17 +00:00
# else /* HAVE_ZLIB */
2019-12-05 06:19:20 +00:00
state - > err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED ;
state - > err_info = " reading gzip-compressed files isn't supported " ;
return - 1 ;
2018-01-14 21:07:17 +00:00
# endif /* HAVE_ZLIB */
2019-12-05 06:19:20 +00:00
}
/*
* Not a gzip file . " Unget " the first character ; either :
*
* 1 ) we read both of the first two bytes into the
* buffer with the first ws_read , so we can just back
* up by one byte ;
*
* 2 ) we only read the first byte into the buffer with
* the first ws_read ( e . g . , because we ' re reading from
2020-10-10 23:42:05 +00:00
* a pipe and only the first byte had been written to
2019-12-05 06:19:20 +00:00
* the pipe at that point ) , and read the second byte
* into the buffer after the first byte in the
* fill_in_buffer call , so we now have two bytes in
* the buffer , and can just back up by one byte .
*/
state - > in . avail + + ;
state - > in . next - - ;
2014-09-10 16:07:04 +00:00
}
}
2011-04-08 00:28:37 +00:00
# ifdef HAVE_LIBXZ
2014-09-10 16:07:04 +00:00
/* { 0xFD, '7', 'z', 'X', 'Z', 0x00 } */
/* FD 37 7A 58 5A 00 */
2011-04-08 00:28:37 +00:00
# endif
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
2021-08-19 12:48:52 +00:00
if ( state - > in . avail > = 4
& & state - > in . buf [ 0 ] = = 0x28 & & state - > in . buf [ 1 ] = = 0xb5
& & state - > in . buf [ 2 ] = = 0x2f & & state - > in . buf [ 3 ] = = 0xfd ) {
# ifdef HAVE_ZSTD
2021-09-01 21:16:59 +00:00
const size_t ret = ZSTD_initDStream ( state - > zstd_dctx ) ;
2021-08-19 12:48:52 +00:00
if ( ZSTD_isError ( ret ) ) {
state - > err = WTAP_ERR_DECOMPRESS ;
state - > err_info = ZSTD_getErrorName ( ret ) ;
return - 1 ;
}
state - > compression = ZSTD ;
state - > is_compressed = TRUE ;
return 0 ;
# else
state - > err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED ;
state - > err_info = " reading zstd-compressed files isn't supported " ;
return - 1 ;
# endif
}
if ( state - > in . avail > = 4
& & state - > in . buf [ 0 ] = = 0x04 & & state - > in . buf [ 1 ] = = 0x22
& & state - > in . buf [ 2 ] = = 0x4d & & state - > in . buf [ 3 ] = = 0x18 ) {
2021-09-07 19:40:44 +00:00
# ifdef USE_LZ4
2021-09-15 19:05:22 +00:00
# if LZ4_VERSION_NUMBER >= 10800
2021-08-19 12:48:52 +00:00
LZ4F_resetDecompressionContext ( state - > lz4_dctx ) ;
2021-09-15 19:05:22 +00:00
# else
LZ4F_freeDecompressionContext ( state - > lz4_dctx ) ;
const LZ4F_errorCode_t ret = LZ4F_createDecompressionContext ( & state - > lz4_dctx , LZ4F_VERSION ) ;
if ( LZ4F_isError ( ret ) ) {
state - > err = WTAP_ERR_INTERNAL ;
state - > err_info = LZ4F_getErrorName ( ret ) ;
return - 1 ;
}
# endif
2021-08-19 12:48:52 +00:00
state - > compression = LZ4 ;
state - > is_compressed = TRUE ;
return 0 ;
# else
state - > err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED ;
state - > err_info = " reading lz4-compressed files isn't supported " ;
return - 1 ;
# endif
}
2014-09-10 16:07:04 +00:00
if ( state - > fast_seek )
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
fast_seek_header ( state , state - > raw_pos - state - > in . avail - state - > out . avail , state - > pos , UNCOMPRESSED ) ;
2014-09-10 16:07:04 +00:00
/* doing raw i/o, save start of raw data for seeking, copy any leftover
input to output - - this assumes that the output buffer is larger than
the input buffer , which also assures space for gzungetc ( ) */
state - > raw = state - > pos ;
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
state - > out . next = state - > out . buf ;
/* not a compressed file -- copy everything we've read into the
input buffer to the output buffer and fall to raw i / o */
already_read = bytes_in_buffer ( & state - > in ) ;
if ( already_read ! = 0 ) {
memcpy ( state - > out . buf , state - > in . buf , already_read ) ;
state - > out . avail = already_read ;
/* Now discard everything in the input buffer */
buf_reset ( & state - > in ) ;
2014-09-10 16:07:04 +00:00
}
state - > compression = UNCOMPRESSED ;
return 0 ;
2011-04-08 00:28:37 +00:00
}
static int /* gz_make */
fill_out_buffer ( FILE_T state )
{
2021-09-11 00:46:54 +00:00
if ( state - > compression = = UNKNOWN ) { /* look for compression header */
2014-09-10 16:07:04 +00:00
if ( gz_head ( state ) = = - 1 )
return - 1 ;
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
if ( state - > out . avail ! = 0 ) /* got some data from gz_head() */
2014-09-10 16:07:04 +00:00
return 0 ;
}
if ( state - > compression = = UNCOMPRESSED ) { /* straight copy */
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
if ( buf_read ( state , & state - > out ) < 0 )
2014-09-10 16:07:04 +00:00
return - 1 ;
}
2016-04-01 00:32:56 +00:00
# ifdef HAVE_ZLIB
2014-09-10 16:07:04 +00:00
else if ( state - > compression = = ZLIB ) { /* decompress */
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
zlib_read ( state , state - > out . buf , state - > size < < 1 ) ;
2014-09-10 16:07:04 +00:00
}
2021-08-19 12:48:52 +00:00
# endif
# ifdef HAVE_ZSTD
else if ( state - > compression = = ZSTD ) {
2021-09-11 00:46:54 +00:00
ws_assert ( state - > out . avail = = 0 ) ;
2021-08-19 12:48:52 +00:00
if ( state - > in . avail = = 0 & & fill_in_buffer ( state ) = = - 1 )
return - 1 ;
ZSTD_outBuffer output = { state - > out . buf , state - > size < < 1 , 0 } ;
ZSTD_inBuffer input = { state - > in . next , state - > in . avail , 0 } ;
const size_t ret = ZSTD_decompressStream ( state - > zstd_dctx , & output , & input ) ;
if ( ZSTD_isError ( ret ) ) {
state - > err = WTAP_ERR_DECOMPRESS ;
state - > err_info = ZSTD_getErrorName ( ret ) ;
return - 1 ;
}
state - > in . next = state - > in . next + input . pos ;
2021-09-02 08:36:29 +00:00
state - > in . avail - = ( guint ) input . pos ;
2021-08-19 12:48:52 +00:00
state - > out . next = output . dst ;
2021-09-01 21:16:59 +00:00
state - > out . avail = ( guint ) output . pos ;
2021-08-19 12:48:52 +00:00
if ( ret = = 0 ) {
2021-09-11 00:46:54 +00:00
state - > last_compression = state - > compression ;
2021-08-19 12:48:52 +00:00
state - > compression = UNKNOWN ;
}
}
# endif
2021-09-07 19:40:44 +00:00
# ifdef USE_LZ4
2021-08-19 12:48:52 +00:00
else if ( state - > compression = = LZ4 ) {
2021-09-11 00:46:54 +00:00
ws_assert ( state - > out . avail = = 0 ) ;
2021-08-19 12:48:52 +00:00
if ( state - > in . avail = = 0 & & fill_in_buffer ( state ) = = - 1 )
return - 1 ;
size_t outBufSize = state - > size < < 1 ;
size_t inBufSize = state - > in . avail ;
const size_t ret = LZ4F_decompress ( state - > lz4_dctx , state - > out . buf , & outBufSize , state - > in . next , & inBufSize , NULL ) ;
if ( LZ4F_isError ( ret ) ) {
state - > err = WTAP_ERR_DECOMPRESS ;
state - > err_info = LZ4F_getErrorName ( ret ) ;
return - 1 ;
}
2021-09-02 08:36:29 +00:00
/*
* We assume LZ4F_decompress ( ) will not set inBufSize to a
* value > state - > in . avail .
*/
2021-08-19 12:48:52 +00:00
state - > in . next = state - > in . next + inBufSize ;
2021-09-02 08:36:29 +00:00
state - > in . avail - = ( guint ) inBufSize ;
2021-08-19 12:48:52 +00:00
state - > out . next = state - > out . buf ;
2021-09-01 21:16:59 +00:00
state - > out . avail = ( guint ) outBufSize ;
2021-08-19 12:48:52 +00:00
if ( ret = = 0 ) {
2021-09-11 00:46:54 +00:00
state - > last_compression = state - > compression ;
2021-08-19 12:48:52 +00:00
state - > compression = UNKNOWN ;
}
}
2011-04-08 00:28:37 +00:00
# endif
2014-09-10 16:07:04 +00:00
return 0 ;
2011-04-08 00:28:37 +00:00
}
static int
gz_skip ( FILE_T state , gint64 len )
{
2014-09-10 16:07:04 +00:00
guint n ;
/* skip over len bytes or reach end-of-file, whichever comes first */
while ( len )
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
if ( state - > out . avail ! = 0 ) {
2014-09-10 16:07:04 +00:00
/* We have stuff in the output buffer; skip over
it . */
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
n = ( gint64 ) state - > out . avail > len ? ( unsigned ) len : state - > out . avail ;
state - > out . avail - = n ;
state - > out . next + = n ;
2014-09-10 16:07:04 +00:00
state - > pos + = n ;
len - = n ;
2018-01-15 00:05:32 +00:00
} else if ( state - > err ! = 0 ) {
2014-09-10 16:07:04 +00:00
/* We have nothing in the output buffer, and
we have an error that may not have been
reported yet ; that means we can ' t generate
any more data into the output buffer , so
return an error indication . */
return - 1 ;
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
} else if ( state - > eof & & state - > in . avail = = 0 ) {
2014-09-10 16:07:04 +00:00
/* We have nothing in the output buffer, and
we ' re at the end of the input ; just return . */
break ;
} else {
/* We have nothing in the output buffer, and
we can generate more data ; get more output ,
looking for header if required . */
if ( fill_out_buffer ( state ) = = - 1 )
return - 1 ;
}
return 0 ;
2011-04-08 00:28:37 +00:00
}
static void
gz_reset ( FILE_T state )
{
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
buf_reset ( & state - > out ) ; /* no output data available */
2014-09-10 16:07:04 +00:00
state - > eof = FALSE ; /* not at end of file */
2021-09-11 00:46:54 +00:00
state - > compression = UNKNOWN ; /* look for compression header */
2014-09-10 16:07:04 +00:00
state - > seek_pending = FALSE ; /* no seek request pending */
state - > err = 0 ; /* clear error */
state - > err_info = NULL ;
state - > pos = 0 ; /* no uncompressed data yet */
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
buf_reset ( & state - > in ) ; /* no input data yet */
2011-04-08 00:28:37 +00:00
}
FILE_T
2012-06-01 08:05:12 +00:00
file_fdopen ( int fd )
2011-04-08 00:28:37 +00:00
{
2019-03-10 18:11:32 +00:00
/*
* XXX - we now check whether we have st_blksize in struct stat ;
* it ' s not available on all platforms .
*
* I ' m not sure why we ' re testing _STATBUF_ST_BLKSIZE ; it ' s not
* set on all platforms that have st_blksize in struct stat .
* ( Not all platforms have st_blksize in struct stat . )
*
* Is there some reason * not * to make the buffer size the maximum
* of GBUFSIZE and st_blksize ? On most UN * Xes , the standard I / O
* library does I / O with st_blksize as the buffer size ; on others ,
* and on Windows , it ' s a 4 K buffer size . If st_blksize is bigger
* than GBUFSIZE ( which is currently 4 KB ) , that ' s probably a
* hint that reading in st_blksize chunks is considered a good
* idea ( e . g . , an 8 K / 1 K Berkeley fast file system with st_blksize
* being 8 K , or APFS , where st_blksize is big on at least some
* versions of macOS ) .
*/
# ifdef _STATBUF_ST_BLKSIZE
2014-09-10 16:07:04 +00:00
ws_statb64 st ;
2011-04-08 00:28:37 +00:00
# endif
2021-09-02 08:36:29 +00:00
# ifdef HAVE_ZSTD
size_t zstd_buf_size ;
# endif
guint want = GZBUFSIZE ;
2014-09-10 16:07:04 +00:00
FILE_T state ;
2021-09-07 19:40:44 +00:00
# ifdef USE_LZ4
2021-08-19 12:48:52 +00:00
size_t ret ;
2021-09-02 08:39:06 +00:00
# endif
2011-04-08 00:28:37 +00:00
2014-09-10 16:07:04 +00:00
if ( fd = = - 1 )
return NULL ;
2011-04-08 00:28:37 +00:00
2014-09-10 16:07:04 +00:00
/* allocate FILE_T structure to return */
2018-02-22 11:34:01 +00:00
state = ( FILE_T ) g_try_malloc0 ( sizeof * state ) ;
2014-09-10 16:07:04 +00:00
if ( state = = NULL )
return NULL ;
2011-04-08 00:28:37 +00:00
2014-09-10 16:07:04 +00:00
state - > fast_seek_cur = NULL ;
state - > fast_seek = NULL ;
2011-04-12 02:40:14 +00:00
2014-09-10 16:07:04 +00:00
/* open the file with the appropriate mode (or just use fd) */
state - > fd = fd ;
2011-04-08 00:28:37 +00:00
2014-09-10 16:07:04 +00:00
/* we don't yet know whether it's compressed */
state - > is_compressed = FALSE ;
2021-09-11 00:46:54 +00:00
state - > last_compression = UNKNOWN ;
2012-05-24 05:05:29 +00:00
2014-09-10 16:07:04 +00:00
/* save the current position for rewinding (only if reading) */
state - > start = ws_lseek64 ( state - > fd , 0 , SEEK_CUR ) ;
if ( state - > start = = - 1 ) state - > start = 0 ;
state - > raw_pos = state - > start ;
2011-04-08 00:28:37 +00:00
2014-09-10 16:07:04 +00:00
/* initialize stream */
gz_reset ( state ) ;
2011-04-08 00:28:37 +00:00
# ifdef _STATBUF_ST_BLKSIZE
2021-09-02 08:36:29 +00:00
/*
* See what I / O size the file system recommends using , and if
* it ' s bigger than what we ' re using and isn ' t too big , use
* it .
*/
2014-09-10 16:07:04 +00:00
if ( ws_fstat64 ( fd , & st ) > = 0 ) {
/*
* Yes , st_blksize can be bigger than an int ; apparently ,
* it ' s a long on LP64 Linux , for example .
*
2021-09-02 08:36:29 +00:00
* If the value is too big to fit into a guint ,
* just use the maximum read buffer size .
2021-10-18 23:22:33 +00:00
*
* On top of that , the Single UNIX Speification says that
* st_blksize is of type blksize_t , which is a * signed *
* integer type , and , at minimum , macOS 11.6 and Linux 5.14 .11 ' s
* include / uapi / asm - generic / stat . h define it as such .
*
* However , other OSes might make it unsigned , and older versions
* of OSes that currently make it signed might make it unsigned ,
* so we try to avoid warnings from that .
*
* We cast MAX_READ_BUF_SIZE to long in order to avoid the
* warning , although it might introduce warnings on platforms
* where st_blocksize is unsigned ; we ' ll deal with that if
* it ever shows up as an issue .
*
* MAX_READ_BUF_SIZE is < the largest * signed * 32 - bt integer ,
* so casting it to long won ' t turn it into a negative number .
* ( We only support 32 - bit and 64 - bit 2 ' s - complement platforms . )
2014-09-10 16:07:04 +00:00
*/
2021-10-18 16:39:07 +00:00
if ( st . st_blksize < = ( long ) MAX_READ_BUF_SIZE )
2021-09-02 08:36:29 +00:00
want = ( guint ) st . st_blksize ;
else
want = MAX_READ_BUF_SIZE ;
2014-09-10 16:07:04 +00:00
/* XXX, verify result? */
}
2011-04-08 00:28:37 +00:00
# endif
2021-08-19 12:48:52 +00:00
# ifdef HAVE_ZSTD
/* we should have separate input and output buf sizes */
2021-09-02 08:36:29 +00:00
zstd_buf_size = ZSTD_DStreamInSize ( ) ;
if ( zstd_buf_size > want ) {
if ( zstd_buf_size < = MAX_READ_BUF_SIZE )
want = ( guint ) zstd_buf_size ;
else
want = MAX_READ_BUF_SIZE ;
}
zstd_buf_size = ZSTD_DStreamOutSize ( ) ;
if ( zstd_buf_size > want ) {
if ( zstd_buf_size < = MAX_READ_BUF_SIZE )
want = ( guint ) zstd_buf_size ;
else
want = MAX_READ_BUF_SIZE ;
}
2021-08-19 12:48:52 +00:00
# endif
2014-09-10 16:07:04 +00:00
/* allocate buffers */
2021-09-02 08:36:29 +00:00
state - > in . buf = ( unsigned char * ) g_try_malloc ( want ) ;
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
state - > in . next = state - > in . buf ;
state - > in . avail = 0 ;
2021-09-02 08:36:29 +00:00
state - > out . buf = ( unsigned char * ) g_try_malloc ( want < < 1 ) ;
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
state - > out . next = state - > out . buf ;
state - > out . avail = 0 ;
2014-09-10 16:07:04 +00:00
state - > size = want ;
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
if ( state - > in . buf = = NULL | | state - > out . buf = = NULL ) {
2021-08-19 12:48:52 +00:00
goto err ;
2014-09-10 16:07:04 +00:00
}
2006-11-05 22:46:44 +00:00
2016-04-01 00:32:56 +00:00
# ifdef HAVE_ZLIB
2014-09-10 16:07:04 +00:00
/* allocate inflate memory */
state - > strm . zalloc = Z_NULL ;
state - > strm . zfree = Z_NULL ;
state - > strm . opaque = Z_NULL ;
state - > strm . avail_in = 0 ;
state - > strm . next_in = Z_NULL ;
if ( inflateInit2 ( & ( state - > strm ) , - 15 ) ! = Z_OK ) { /* raw inflate */
2021-08-19 12:48:52 +00:00
goto err ;
2014-09-10 16:07:04 +00:00
}
/* for now, assume we should check the crc */
state - > dont_check_crc = FALSE ;
2011-04-08 00:28:37 +00:00
# endif
2021-08-19 12:48:52 +00:00
# ifdef HAVE_ZSTD
state - > zstd_dctx = ZSTD_createDCtx ( ) ;
if ( state - > zstd_dctx = = NULL ) {
goto err ;
}
# endif
2021-09-07 19:40:44 +00:00
# ifdef USE_LZ4
2021-08-19 12:48:52 +00:00
ret = LZ4F_createDecompressionContext ( & state - > lz4_dctx , LZ4F_VERSION ) ;
if ( LZ4F_isError ( ret ) ) {
goto err ;
}
# endif
2014-09-10 16:07:04 +00:00
/* return stream */
return state ;
2021-08-19 12:48:52 +00:00
err :
# ifdef HAVE_ZLIB
inflateEnd ( & state - > strm ) ;
# endif
# ifdef HAVE_ZSTD
ZSTD_freeDCtx ( state - > zstd_dctx ) ;
# endif
2021-09-07 19:40:44 +00:00
# ifdef USE_LZ4
2021-08-19 12:48:52 +00:00
LZ4F_freeDecompressionContext ( state - > lz4_dctx ) ;
# endif
g_free ( state - > out . buf ) ;
g_free ( state - > in . buf ) ;
g_free ( state ) ;
errno = ENOMEM ;
return NULL ;
2011-04-08 00:28:37 +00:00
}
2006-11-05 22:46:44 +00:00
2005-11-06 22:43:25 +00:00
FILE_T
2011-04-06 07:09:56 +00:00
file_open ( const char * path )
2005-11-06 22:43:25 +00:00
{
2014-09-10 16:07:04 +00:00
int fd ;
FILE_T ft ;
2016-04-01 00:32:56 +00:00
# ifdef HAVE_ZLIB
2014-09-10 16:07:04 +00:00
const char * suffixp ;
2011-05-09 03:48:41 +00:00
# endif
2005-11-06 22:43:25 +00:00
2014-09-10 16:07:04 +00:00
/* open file and do correct filename conversions.
XXX - do we need O_LARGEFILE ? On UN * X , if we need to do
something special to get large file support , the configure
script should have set us up with the appropriate # defines ,
so we should be getting a large - file - enabled file descriptor
here . Pre - Large File Summit UN * Xes , and possibly even some
post - LFS UN * Xes , might require O_LARGEFILE here , though .
If so , we should probably handle that in ws_open ( ) . */
if ( ( fd = ws_open ( path , O_RDONLY | O_BINARY , 0000 ) ) = = - 1 )
return NULL ;
/* open file handle */
ft = file_fdopen ( fd ) ;
if ( ft = = NULL ) {
ws_close ( fd ) ;
return NULL ;
}
2005-11-06 22:43:25 +00:00
2016-04-01 00:32:56 +00:00
# ifdef HAVE_ZLIB
2014-09-10 16:07:04 +00:00
/*
* If this file ' s name ends in " .caz " , it ' s probably a compressed
* Windows Sniffer file . The compression is gzip , but if we
* process the CRC as specified by RFC 1952 , the computed CRC
* doesn ' t match the stored CRC .
*
* Compressed Windows Sniffer files don ' t all have the same CRC
* value ; is it just random crap , or are they running the CRC on
* a different set of data than you ' re supposed to ( e . g . , not
* CRCing some of the data ) , or something such as that ?
*
* For now , we just set a flag to ignore CRC errors .
*/
suffixp = strrchr ( path , ' . ' ) ;
if ( suffixp ! = NULL ) {
if ( g_ascii_strcasecmp ( suffixp , " .caz " ) = = 0 )
ft - > dont_check_crc = TRUE ;
}
2011-05-09 03:48:41 +00:00
# endif
2014-09-10 16:07:04 +00:00
return ft ;
2005-11-06 22:43:25 +00:00
}
2012-12-05 16:19:12 +00:00
void
file_set_random_access ( FILE_T stream , gboolean random_flag _U_ , GPtrArray * seek )
2011-04-12 02:40:14 +00:00
{
2014-09-10 16:07:04 +00:00
stream - > fast_seek = seek ;
2011-04-12 02:40:14 +00:00
}
2006-11-06 00:24:24 +00:00
gint64
2011-04-08 00:28:37 +00:00
file_seek ( FILE_T file , gint64 offset , int whence , int * err )
2000-01-13 07:09:20 +00:00
{
2014-09-10 16:07:04 +00:00
struct fast_seek_point * here ;
guint n ;
2011-04-08 00:28:37 +00:00
2014-10-16 11:58:58 +00:00
if ( whence ! = SEEK_SET & & whence ! = SEEK_CUR & & whence ! = SEEK_END ) {
2021-05-23 23:46:43 +00:00
ws_assert_not_reached ( ) ;
2011-04-08 00:28:37 +00:00
/*
2014-09-10 16:07:04 +00:00
* err = EINVAL ;
return - 1 ;
*/
}
2014-10-16 11:58:58 +00:00
/* Normalize offset to a SEEK_CUR specification */
if ( whence = = SEEK_END ) {
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
/* Seek relative to the end of the file; given that we might be
reading from a compressed file , we do that by seeking to the
end of the file , making an offset relative to the end of
the file an offset relative to the current position .
XXX - we don ' t actually use this yet , but , for uncompressed
files , we could optimize it , if desired , by directly using
ws_lseek64 ( ) . */
2014-10-16 11:58:58 +00:00
if ( gz_skip ( file , G_MAXINT64 ) = = - 1 ) {
* err = file - > err ;
return - 1 ;
}
if ( offset = = 0 ) {
/* We are done */
return file - > pos ;
}
} else if ( whence = = SEEK_SET )
2014-09-10 16:07:04 +00:00
offset - = file - > pos ;
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
else if ( file - > seek_pending ) {
/* There's a forward-skip pending, so file->pos doesn't reflect
the actual file position , it represents the position from
which we ' re skipping ; update the offset to include that . */
2014-09-10 16:07:04 +00:00
offset + = file - > skip ;
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
}
2014-09-10 16:07:04 +00:00
file - > seek_pending = FALSE ;
2014-10-25 23:08:02 +00:00
/*
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
* Are we moving at all ?
2014-10-25 23:08:02 +00:00
*/
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
if ( offset = = 0 ) {
/* No. Just return the current position. */
return file - > pos ;
}
2014-10-25 23:08:02 +00:00
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
/*
2018-01-18 22:12:42 +00:00
* Are we seeking backwards ?
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
*/
2018-01-18 22:12:42 +00:00
if ( offset < 0 ) {
2014-10-25 23:08:02 +00:00
/*
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
* Yes .
2018-01-18 22:12:42 +00:00
*
* Do we have enough data before the current position in the
* buffer that we can seek backwards within the buffer ?
2014-10-25 23:08:02 +00:00
*/
2018-01-18 22:12:42 +00:00
if ( - offset < = offset_in_buffer ( & file - > out ) ) {
2014-09-10 16:07:04 +00:00
/*
2018-01-18 22:12:42 +00:00
* Yes . Adjust appropriately .
*
* offset is negative , so - offset is non - negative , and
* - offset is < = an unsigned and thus fits in an unsigned .
* Get that value and adjust appropriately .
2014-10-25 23:08:02 +00:00
*
2018-01-18 22:12:42 +00:00
* ( Casting offset to unsigned makes it positive , which
* is not what we would want , so we cast - offset instead . )
*
* XXX - this won ' t work with - offset = 2 ^ 63 , as its
* negative isn ' t a valid 64 - bit integer , but we are
* not at all likely to see files big enough to ever
* see a negative offset that large .
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
*/
2018-01-18 22:12:42 +00:00
guint adjustment = ( unsigned ) ( - offset ) ;
file - > out . avail + = adjustment ;
file - > out . next - = adjustment ;
file - > pos - = adjustment ;
return file - > pos ;
}
} else {
/*
* No . Offset is positive ; we ' re seeking forwards .
*
* Do we have enough data after the current position in the
* buffer that we can seek forwards within the buffer ?
*/
if ( offset < file - > out . avail ) {
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
/*
2018-01-18 22:12:42 +00:00
* Yes . Adjust appropriately .
2014-09-10 16:07:04 +00:00
*
2018-01-18 22:12:42 +00:00
* offset is < an unsigned and thus fits in an unsigned ,
* so we can cast it to guint safely .
2014-09-10 16:07:04 +00:00
*/
2018-01-18 22:12:42 +00:00
file - > out . avail - = ( guint ) offset ;
file - > out . next + = offset ;
file - > pos + = offset ;
return file - > pos ;
2014-09-10 16:07:04 +00:00
}
}
2014-10-25 23:08:02 +00:00
/*
2018-01-18 22:12:42 +00:00
* We ' re not seeking within the buffer . Do we have " fast seek " data
* for the location to which we will be seeking , and is the offset
* outside the span for compressed files or is this an uncompressed
* file ?
2014-10-25 23:08:02 +00:00
*
* XXX , profile
*/
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
if ( ( here = fast_seek_find ( file , file - > pos + offset ) ) & &
( offset < 0 | | offset > SPAN | | here - > compression = = UNCOMPRESSED ) ) {
2014-09-10 16:07:04 +00:00
gint64 off , off2 ;
2011-04-12 02:40:14 +00:00
2014-10-25 23:08:02 +00:00
/*
* Yes . Use that data to do the seek .
* Note that this will be true only if file_set_random_access ( )
* has been called on this file , which should never be the case
* for a pipe .
*/
2016-04-01 00:32:56 +00:00
# ifdef HAVE_ZLIB
2014-09-10 16:07:04 +00:00
if ( here - > compression = = ZLIB ) {
2011-04-29 07:49:55 +00:00
# ifdef HAVE_INFLATEPRIME
2014-09-10 16:07:04 +00:00
off = here - > in - ( here - > data . zlib . bits ? 1 : 0 ) ;
2011-04-29 07:49:55 +00:00
# else
2014-09-10 16:07:04 +00:00
off = here - > in ;
2011-04-29 07:49:55 +00:00
# endif
2014-09-10 16:07:04 +00:00
off2 = here - > out ;
} else if ( here - > compression = = GZIP_AFTER_HEADER ) {
off = here - > in ;
off2 = here - > out ;
} else
2011-04-12 02:40:14 +00:00
# endif
2014-09-10 16:07:04 +00:00
{
off2 = ( file - > pos + offset ) ;
off = here - > in + ( off2 - here - > out ) ;
}
if ( ws_lseek64 ( file - > fd , off , SEEK_SET ) = = - 1 ) {
* err = errno ;
return - 1 ;
}
fast_seek_reset ( file ) ;
file - > raw_pos = off ;
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
buf_reset ( & file - > out ) ;
2014-09-10 16:07:04 +00:00
file - > eof = FALSE ;
file - > seek_pending = FALSE ;
file - > err = 0 ;
file - > err_info = NULL ;
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
buf_reset ( & file - > in ) ;
2011-04-12 02:40:14 +00:00
2016-04-01 00:32:56 +00:00
# ifdef HAVE_ZLIB
2014-09-10 16:07:04 +00:00
if ( here - > compression = = ZLIB ) {
z_stream * strm = & file - > strm ;
2011-04-12 02:40:14 +00:00
2014-09-10 16:07:04 +00:00
inflateReset ( strm ) ;
strm - > adler = here - > data . zlib . adler ;
strm - > total_out = here - > data . zlib . total_out ;
2011-04-29 07:49:55 +00:00
# ifdef HAVE_INFLATEPRIME
2014-09-10 16:07:04 +00:00
if ( here - > data . zlib . bits ) {
FILE_T state = file ;
int ret = GZ_GETC ( ) ;
if ( ret = = - 1 ) {
if ( state - > err = = 0 ) {
/* EOF */
* err = WTAP_ERR_SHORT_READ ;
} else
* err = state - > err ;
return - 1 ;
}
( void ) inflatePrime ( strm , here - > data . zlib . bits , ret > > ( 8 - here - > data . zlib . bits ) ) ;
}
2011-04-29 07:49:55 +00:00
# endif
2014-09-10 16:07:04 +00:00
( void ) inflateSetDictionary ( strm , here - > data . zlib . window , ZLIB_WINSIZE ) ;
file - > compression = ZLIB ;
} else if ( here - > compression = = GZIP_AFTER_HEADER ) {
z_stream * strm = & file - > strm ;
inflateReset ( strm ) ;
strm - > adler = crc32 ( 0L , Z_NULL , 0 ) ;
file - > compression = ZLIB ;
} else
2011-04-12 02:40:14 +00:00
# endif
2014-09-10 16:07:04 +00:00
file - > compression = here - > compression ;
offset = ( file - > pos + offset ) - off2 ;
file - > pos = off2 ;
/* g_print("OK! %ld\n", offset); */
if ( offset ) {
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
/* Don't skip forward yet, wait until we want to read from
the file ; that way , if we do multiple seeks in a row ,
all involving forward skips , they will be combined . */
2014-09-10 16:07:04 +00:00
file - > seek_pending = TRUE ;
file - > skip = offset ;
}
return file - > pos + offset ;
}
2014-10-25 23:08:02 +00:00
/*
* Is this an uncompressed file , are we within the raw area ,
* are we either seeking backwards or seeking past the end
* of the buffer , and are we set up for random access with
* file_set_random_access ( ) ?
*
* Again , note that this will never be true on a pipe , as
* file_set_random_access ( ) should never be called if we ' re
* reading from a pipe .
*/
2014-09-10 16:07:04 +00:00
if ( file - > compression = = UNCOMPRESSED & & file - > pos + offset > = file - > raw
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
& & ( offset < 0 | | offset > = file - > out . avail )
& & ( file - > fast_seek ! = NULL ) )
2014-09-10 16:07:04 +00:00
{
2014-10-25 23:08:02 +00:00
/*
* Yes . Just seek there within the file .
*/
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
if ( ws_lseek64 ( file - > fd , offset - file - > out . avail , SEEK_CUR ) = = - 1 ) {
2014-09-10 16:07:04 +00:00
* err = errno ;
return - 1 ;
}
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
file - > raw_pos + = ( offset - file - > out . avail ) ;
buf_reset ( & file - > out ) ;
2014-09-10 16:07:04 +00:00
file - > eof = FALSE ;
file - > seek_pending = FALSE ;
file - > err = 0 ;
file - > err_info = NULL ;
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
buf_reset ( & file - > in ) ;
2014-09-10 16:07:04 +00:00
file - > pos + = offset ;
return file - > pos ;
}
2014-10-25 23:08:02 +00:00
/*
* Are we seeking backwards ?
*/
2014-09-10 16:07:04 +00:00
if ( offset < 0 ) {
2014-10-25 23:08:02 +00:00
/*
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
* Yes . We have no fast seek data , so we have to rewind and
* seek forward .
* XXX - true only for compressed files .
2014-10-25 23:08:02 +00:00
*
* Calculate the amount to skip forward after rewinding .
*/
2014-09-10 16:07:04 +00:00
offset + = file - > pos ;
if ( offset < 0 ) { /* before start of file! */
* err = EINVAL ;
return - 1 ;
}
/* rewind, then skip to offset */
/* back up and start over */
if ( ws_lseek64 ( file - > fd , file - > start , SEEK_SET ) = = - 1 ) {
* err = errno ;
return - 1 ;
}
fast_seek_reset ( file ) ;
file - > raw_pos = file - > start ;
gz_reset ( file ) ;
}
2014-10-25 23:08:02 +00:00
/*
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
* Either we ' re seeking backwards , but have rewound and now need to
* skip forwards , or we ' re seeking forwards .
2014-10-25 23:08:02 +00:00
*
* Skip what ' s in output buffer ( one less gzgetc ( ) check ) .
*/
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
n = ( gint64 ) file - > out . avail > offset ? ( unsigned ) offset : file - > out . avail ;
file - > out . avail - = n ;
file - > out . next + = n ;
2014-09-10 16:07:04 +00:00
file - > pos + = n ;
offset - = n ;
/* request skip (if not zero) */
if ( offset ) {
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
/* Don't skip forward yet, wait until we want to read from
the file ; that way , if we do multiple seeks in a row ,
all involving forward skips , they will be combined . */
2014-09-10 16:07:04 +00:00
file - > seek_pending = TRUE ;
file - > skip = offset ;
}
return file - > pos + offset ;
2000-01-25 04:49:55 +00:00
}
2006-11-05 22:46:44 +00:00
gint64
2011-04-08 00:28:37 +00:00
file_tell ( FILE_T stream )
2000-01-25 04:49:55 +00:00
{
2014-09-10 16:07:04 +00:00
/* return position */
return stream - > pos + ( stream - > seek_pending ? stream - > skip : 0 ) ;
2000-01-13 07:09:20 +00:00
}
2011-05-09 08:12:26 +00:00
gint64
file_tell_raw ( FILE_T stream )
2011-05-09 05:30:59 +00:00
{
2014-09-10 16:07:04 +00:00
return stream - > raw_pos ;
2011-05-09 05:30:59 +00:00
}
2011-05-09 08:12:26 +00:00
int
file_fstat ( FILE_T stream , ws_statb64 * statb , int * err )
{
2014-09-10 16:07:04 +00:00
if ( ws_fstat64 ( stream - > fd , statb ) = = - 1 ) {
if ( err ! = NULL )
* err = errno ;
return - 1 ;
}
return 0 ;
2011-05-09 08:12:26 +00:00
}
2012-05-24 05:05:29 +00:00
gboolean
file_iscompressed ( FILE_T stream )
{
2014-09-10 16:07:04 +00:00
return stream - > is_compressed ;
2012-05-24 05:05:29 +00:00
}
2021-09-11 00:46:54 +00:00
/* Returns a wtap compression type. If we don't know the compression type,
* return WTAP_UNCOMPRESSED , but if our compression state is temporarily
* UNKNOWN because we need to reread compression headers , return the last
* known compression type .
*/
static wtap_compression_type
file_get_compression_type ( FILE_T stream )
{
if ( stream - > is_compressed ) {
switch ( ( stream - > compression = = UNKNOWN ) ? stream - > last_compression : stream - > compression ) {
case ZLIB :
case GZIP_AFTER_HEADER :
return WTAP_GZIP_COMPRESSED ;
case ZSTD :
return WTAP_ZSTD_COMPRESSED ;
case LZ4 :
return WTAP_LZ4_COMPRESSED ;
case UNCOMPRESSED :
return WTAP_UNCOMPRESSED ;
default : /* UNKNOWN, should never happen if is_compressed is set */
ws_assert_not_reached ( ) ;
return WTAP_UNCOMPRESSED ;
}
}
return WTAP_UNCOMPRESSED ;
}
2012-12-05 16:19:12 +00:00
int
2011-04-08 00:28:37 +00:00
file_read ( void * buf , unsigned int len , FILE_T file )
2000-01-13 07:09:20 +00:00
{
2014-09-10 16:07:04 +00:00
guint got , n ;
/* if len is zero, avoid unnecessary operations */
if ( len = = 0 )
return 0 ;
/* process a skip request */
if ( file - > seek_pending ) {
file - > seek_pending = FALSE ;
if ( gz_skip ( file , file - > skip ) = = - 1 )
return - 1 ;
}
2016-09-28 23:45:23 +00:00
/*
* Get len bytes to buf , or less than len if at the end ;
* if buf is null , just throw the bytes away .
*/
2014-09-10 16:07:04 +00:00
got = 0 ;
do {
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
if ( file - > out . avail ! = 0 ) {
2014-09-10 16:07:04 +00:00
/* We have stuff in the output buffer; copy
what we have . */
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
n = file - > out . avail > len ? len : file - > out . avail ;
2016-09-28 23:45:23 +00:00
if ( buf ! = NULL ) {
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
memcpy ( buf , file - > out . next , n ) ;
2016-09-28 23:45:23 +00:00
buf = ( char * ) buf + n ;
}
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
file - > out . next + = n ;
file - > out . avail - = n ;
2016-09-28 23:45:23 +00:00
len - = n ;
got + = n ;
file - > pos + = n ;
2018-01-15 00:05:32 +00:00
} else if ( file - > err ! = 0 ) {
2014-09-10 16:07:04 +00:00
/* We have nothing in the output buffer, and
we have an error that may not have been
reported yet ; that means we can ' t generate
any more data into the output buffer , so
return an error indication . */
return - 1 ;
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
} else if ( file - > eof & & file - > in . avail = = 0 ) {
2014-09-10 16:07:04 +00:00
/* We have nothing in the output buffer, and
we ' re at the end of the input ; just return
with what we ' ve gotten so far . */
break ;
} else {
/* We have nothing in the output buffer, and
we can generate more data ; get more output ,
looking for header if required , and
keep looping to process the new stuff
in the output buffer . */
if ( fill_out_buffer ( file ) = = - 1 )
return - 1 ;
}
} while ( len ) ;
return ( int ) got ;
2000-01-13 07:09:20 +00:00
}
2006-11-05 22:46:44 +00:00
2014-03-18 17:21:51 +00:00
/*
* XXX - this * peeks * at next byte , not a character .
*/
int
file_peekc ( FILE_T file )
{
2014-09-10 16:07:04 +00:00
int ret = 0 ;
/* check that we're reading and that there's no error */
2018-01-15 00:05:32 +00:00
if ( file - > err ! = 0 )
2014-09-10 16:07:04 +00:00
return - 1 ;
/* try output buffer (no need to check for skip request) */
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
if ( file - > out . avail ! = 0 ) {
return * ( file - > out . next ) ;
2014-09-10 16:07:04 +00:00
}
/* process a skip request */
if ( file - > seek_pending ) {
file - > seek_pending = FALSE ;
if ( gz_skip ( file , file - > skip ) = = - 1 )
return - 1 ;
}
/* if we processed a skip request, there may be data in the buffer,
* or an error could have occurred ; likewise if we didn ' t do seek but
* now call fill_out_buffer , the errors can occur . So we do this while
* loop to check before and after - this is basically the logic from
* file_read ( ) but only for peeking not consuming a byte
*/
while ( 1 ) {
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
if ( file - > out . avail ! = 0 ) {
return * ( file - > out . next ) ;
2014-09-10 16:07:04 +00:00
}
2018-01-15 00:05:32 +00:00
else if ( file - > err ! = 0 ) {
2014-09-10 16:07:04 +00:00
return - 1 ;
}
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
else if ( file - > eof & & file - > in . avail = = 0 ) {
2014-09-10 16:07:04 +00:00
return - 1 ;
}
else if ( fill_out_buffer ( file ) = = - 1 ) {
return - 1 ;
}
}
/* it's actually impossible to get here */
return ret ;
2014-03-18 17:21:51 +00:00
}
2013-01-06 20:36:33 +00:00
/*
* XXX - this gets a byte , not a character .
*/
2011-04-08 00:28:37 +00:00
int
file_getc ( FILE_T file )
{
2014-09-10 16:07:04 +00:00
unsigned char buf [ 1 ] ;
int ret ;
/* check that we're reading and that there's no error */
2018-01-15 00:05:32 +00:00
if ( file - > err ! = 0 )
2014-09-10 16:07:04 +00:00
return - 1 ;
/* try output buffer (no need to check for skip request) */
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
if ( file - > out . avail ! = 0 ) {
file - > out . avail - - ;
2014-09-10 16:07:04 +00:00
file - > pos + + ;
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
return * ( file - > out . next ) + + ;
2014-09-10 16:07:04 +00:00
}
ret = file_read ( buf , 1 , file ) ;
return ret < 1 ? - 1 : buf [ 0 ] ;
2011-04-08 00:28:37 +00:00
}
2018-04-07 22:00:46 +00:00
/* Like file_gets, but returns a pointer to the terminating NUL. */
2011-04-08 00:28:37 +00:00
char *
2018-04-07 22:00:46 +00:00
file_getsp ( char * buf , int len , FILE_T file )
2006-11-05 22:46:44 +00:00
{
2014-09-10 16:07:04 +00:00
guint left , n ;
char * str ;
unsigned char * eol ;
/* check parameters */
if ( buf = = NULL | | len < 1 )
return NULL ;
/* check that there's no error */
2018-01-15 00:05:32 +00:00
if ( file - > err ! = 0 )
2014-09-10 16:07:04 +00:00
return NULL ;
/* process a skip request */
if ( file - > seek_pending ) {
file - > seek_pending = FALSE ;
if ( gz_skip ( file , file - > skip ) = = - 1 )
return NULL ;
}
/* copy output bytes up to new line or len - 1, whichever comes first --
append a terminating zero to the string ( we don ' t check for a zero in
the contents , let the user worry about that ) */
str = buf ;
left = ( unsigned ) len - 1 ;
if ( left ) do {
/* assure that something is in the output buffer */
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
if ( file - > out . avail = = 0 ) {
2014-09-10 16:07:04 +00:00
/* We have nothing in the output buffer. */
2018-01-15 00:05:32 +00:00
if ( file - > err ! = 0 ) {
2014-09-10 16:07:04 +00:00
/* We have an error that may not have
been reported yet ; that means we
can ' t generate any more data into
the output buffer , so return an
error indication . */
return NULL ;
}
if ( fill_out_buffer ( file ) = = - 1 )
return NULL ; /* error */
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
if ( file - > out . avail = = 0 ) { /* end of file */
2014-09-10 16:07:04 +00:00
if ( buf = = str ) /* got bupkus */
return NULL ;
break ; /* got something -- return it */
}
}
/* look for end-of-line in current output buffer */
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
n = file - > out . avail > left ? left : file - > out . avail ;
eol = ( unsigned char * ) memchr ( file - > out . next , ' \n ' , n ) ;
2014-09-10 16:07:04 +00:00
if ( eol ! = NULL )
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
n = ( unsigned ) ( eol - file - > out . next ) + 1 ;
2014-09-10 16:07:04 +00:00
/* copy through end-of-line, or remainder if not found */
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
memcpy ( buf , file - > out . next , n ) ;
file - > out . avail - = n ;
file - > out . next + = n ;
2014-09-10 16:07:04 +00:00
file - > pos + = n ;
left - = n ;
buf + = n ;
} while ( left & & eol = = NULL ) ;
2018-04-07 22:00:46 +00:00
/* found end-of-line or out of space -- add a terminator and return
a pointer to it */
2014-09-10 16:07:04 +00:00
buf [ 0 ] = 0 ;
2018-04-07 22:00:46 +00:00
return buf ;
}
char *
file_gets ( char * buf , int len , FILE_T file )
{
if ( ! file_getsp ( buf , len , file ) ) return NULL ;
return buf ;
2006-11-05 22:46:44 +00:00
}
2012-12-05 16:19:12 +00:00
int
2011-04-08 00:28:37 +00:00
file_eof ( FILE_T file )
2006-11-05 22:46:44 +00:00
{
2014-09-10 16:07:04 +00:00
/* return end-of-file state */
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
return ( file - > eof & & file - > in . avail = = 0 & & file - > out . avail = = 0 ) ;
2006-11-05 22:46:44 +00:00
}
2011-04-08 00:28:37 +00:00
/*
* Routine to return a Wiretap error code ( 0 for no error , an errno
* for a file error , or a WTAP_ERR_ code for other errors ) for an
2011-04-21 09:41:52 +00:00
* I / O stream . Also returns an error string for some errors .
2011-04-08 00:28:37 +00:00
*/
2000-01-13 07:09:20 +00:00
int
2011-04-21 09:41:52 +00:00
file_error ( FILE_T fh , gchar * * err_info )
2000-01-13 07:09:20 +00:00
{
2014-09-10 16:07:04 +00:00
if ( fh - > err ! = 0 & & err_info ) {
/* g_strdup() returns NULL for NULL argument */
* err_info = g_strdup ( fh - > err_info ) ;
2014-10-08 20:44:46 +00:00
}
2014-09-10 16:07:04 +00:00
return fh - > err ;
2011-04-08 00:28:37 +00:00
}
void
file_clearerr ( FILE_T stream )
{
2014-09-10 16:07:04 +00:00
/* clear error and end-of-file */
stream - > err = 0 ;
stream - > err_info = NULL ;
stream - > eof = FALSE ;
2011-04-08 00:28:37 +00:00
}
2012-06-01 08:05:12 +00:00
void
file_fdclose ( FILE_T file )
{
2014-09-10 16:07:04 +00:00
ws_close ( file - > fd ) ;
file - > fd = - 1 ;
2012-06-01 08:05:12 +00:00
}
gboolean
file_fdreopen ( FILE_T file , const char * path )
{
2014-09-10 16:07:04 +00:00
int fd ;
2012-06-01 08:05:12 +00:00
2014-09-10 16:07:04 +00:00
if ( ( fd = ws_open ( path , O_RDONLY | O_BINARY , 0000 ) ) = = - 1 )
return FALSE ;
file - > fd = fd ;
return TRUE ;
2012-06-01 08:05:12 +00:00
}
void
2011-04-08 00:28:37 +00:00
file_close ( FILE_T file )
{
2014-09-10 16:07:04 +00:00
int fd = file - > fd ;
2011-04-08 00:28:37 +00:00
2014-09-10 16:07:04 +00:00
/* free memory and close file */
if ( file - > size ) {
2016-04-01 00:32:56 +00:00
# ifdef HAVE_ZLIB
2014-09-10 16:07:04 +00:00
inflateEnd ( & ( file - > strm ) ) ;
2021-09-08 11:33:31 +00:00
# endif
# ifdef HAVE_ZSTD
ZSTD_freeDCtx ( file - > zstd_dctx ) ;
# endif
# ifdef USE_LZ4
LZ4F_freeDecompressionContext ( file - > lz4_dctx ) ;
2011-04-08 00:28:37 +00:00
# endif
Don't insist on reading a full buffer from the input file.
Don't loop trying to read a full buffer from the input file.
If you're reading from a file, on UN*X or Windows, you should get the
entire read count unless you're fewer than buffer-size bytes from the
end of the file, in which case you should get what remains in the file.
If you're reading from a pipe, however, that could cause you to block
longer than necessary waiting for a full buffer rather than just for the
next chunk of data from the pipe - which might not be a bufferful, if
the program writing to the file is itself writing less-than-bufferful
chunks, as may be the case in, for example, a pipeline coming from a
live capture and with the intent that TShark display the packets as they
arrive.
While we're at it, if we're trying to do a seek and the seek takes place
within the buffer of uncompressed data, just adjust the position within
that buffer for forward seeks as well as backward seeks; this
substantially reduces the number of ws_lseek64() calls when making a
sequential pass through the file in Wireshark (e.g., running a tap or
filtering the display) and, as we purge the buffer after the
ws_lseek64(), substantically reduces the number of ws_read() calls in
that situation as well.
Have a data structure for a file data buffer, and use it for both the
"input" (compressed data) and "output" (uncompressed data) buffers.
Rename raw_read() to buf_read(), as it reads into a buffer.
Change-Id: I7982b3499a7613a993913a6db887054730764160
Ping-Bug: 14345
Reviewed-on: https://code.wireshark.org/review/25358
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2018-01-18 04:02:56 +00:00
g_free ( file - > out . buf ) ;
g_free ( file - > in . buf ) ;
2014-09-10 16:07:04 +00:00
}
g_free ( file - > fast_seek_cur ) ;
file - > err = 0 ;
file - > err_info = NULL ;
g_free ( file ) ;
/*
* If fd is - 1 , somebody ' s done a file_closefd ( ) on us , so
* we don ' t need to close the FD itself , and shouldn ' t do
* so .
*/
if ( fd ! = - 1 )
ws_close ( fd ) ;
2000-01-13 07:09:20 +00:00
}
2006-11-05 22:46:44 +00:00
2016-04-01 00:32:56 +00:00
# ifdef HAVE_ZLIB
2011-04-11 21:33:22 +00:00
/* internal gzip file state data structure for writing */
struct wtap_writer {
int fd ; /* file descriptor */
gint64 pos ; /* current position in uncompressed data */
2020-10-14 01:48:46 +00:00
guint size ; /* buffer size, zero if not allocated yet */
guint want ; /* requested buffer size, default is GZBUFSIZE */
2011-04-11 21:33:22 +00:00
unsigned char * in ; /* input buffer */
unsigned char * out ; /* output buffer (double-sized when reading) */
unsigned char * next ; /* next output data to deliver or write */
int level ; /* compression level */
int strategy ; /* compression strategy */
int err ; /* error code */
2020-10-14 01:48:46 +00:00
const char * err_info ; /* additional error information string for some errors */
2014-09-10 16:07:04 +00:00
/* zlib deflate stream */
2011-04-11 21:33:22 +00:00
z_stream strm ; /* stream structure in-place (not a pointer) */
} ;
GZWFILE_T
gzwfile_open ( const char * path )
{
int fd ;
GZWFILE_T state ;
int save_errno ;
2011-04-11 22:19:01 +00:00
fd = ws_open ( path , O_BINARY | O_WRONLY | O_CREAT | O_TRUNC , 0666 ) ;
2011-04-11 21:33:22 +00:00
if ( fd = = - 1 )
return NULL ;
state = gzwfile_fdopen ( fd ) ;
if ( state = = NULL ) {
save_errno = errno ;
2015-11-08 10:31:01 +00:00
ws_close ( fd ) ;
2011-04-12 16:31:16 +00:00
errno = save_errno ;
2011-04-11 21:33:22 +00:00
}
return state ;
}
GZWFILE_T
gzwfile_fdopen ( int fd )
{
GZWFILE_T state ;
/* allocate wtap_writer structure to return */
2012-06-02 14:13:14 +00:00
state = ( GZWFILE_T ) g_try_malloc ( sizeof * state ) ;
2011-04-11 21:33:22 +00:00
if ( state = = NULL )
return NULL ;
state - > fd = fd ;
state - > size = 0 ; /* no buffers allocated yet */
state - > want = GZBUFSIZE ; /* requested buffer size */
state - > level = Z_DEFAULT_COMPRESSION ;
state - > strategy = Z_DEFAULT_STRATEGY ;
/* initialize stream */
state - > err = Z_OK ; /* clear error */
2020-10-14 01:48:46 +00:00
state - > err_info = NULL ; /* clear additional error information */
2011-04-11 21:33:22 +00:00
state - > pos = 0 ; /* no uncompressed data yet */
state - > strm . avail_in = 0 ; /* no input data yet */
/* return stream */
return state ;
}
/* Initialize state for writing a gzip file. Mark initialization by setting
2020-10-14 01:48:46 +00:00
state - > size to non - zero . Return - 1 , and set state - > err and possibly
state - > err_info , on failure ; return 0 on success . */
2011-04-11 21:33:22 +00:00
static int
gz_init ( GZWFILE_T state )
{
int ret ;
z_streamp strm = & ( state - > strm ) ;
/* allocate input and output buffers */
2012-06-02 15:32:34 +00:00
state - > in = ( unsigned char * ) g_try_malloc ( state - > want ) ;
state - > out = ( unsigned char * ) g_try_malloc ( state - > want ) ;
2011-04-11 21:33:22 +00:00
if ( state - > in = = NULL | | state - > out = = NULL ) {
2011-04-12 16:31:16 +00:00
g_free ( state - > out ) ;
g_free ( state - > in ) ;
2011-04-15 07:27:03 +00:00
state - > err = ENOMEM ;
2011-04-11 21:33:22 +00:00
return - 1 ;
}
/* allocate deflate memory, set up for gzip compression */
strm - > zalloc = Z_NULL ;
strm - > zfree = Z_NULL ;
strm - > opaque = Z_NULL ;
ret = deflateInit2 ( strm , state - > level , Z_DEFLATED ,
15 + 16 , 8 , state - > strategy ) ;
if ( ret ! = Z_OK ) {
2011-04-12 16:31:16 +00:00
g_free ( state - > out ) ;
2011-04-11 21:33:22 +00:00
g_free ( state - > in ) ;
2011-04-21 17:51:19 +00:00
if ( ret = = Z_MEM_ERROR ) {
2014-09-10 16:07:04 +00:00
/* This means "not enough memory". */
state - > err = ENOMEM ;
2011-04-21 17:51:19 +00:00
} else {
2014-09-10 16:07:04 +00:00
/* This "shouldn't happen". */
state - > err = WTAP_ERR_INTERNAL ;
2020-10-14 01:48:46 +00:00
state - > err_info = " Unknown error from deflateInit2() " ;
2011-04-21 17:51:19 +00:00
}
2011-04-11 21:33:22 +00:00
return - 1 ;
}
/* mark state as initialized */
state - > size = state - > want ;
/* initialize write buffer */
strm - > avail_out = state - > size ;
strm - > next_out = state - > out ;
state - > next = strm - > next_out ;
return 0 ;
}
/* Compress whatever is at avail_in and next_in and write to the output file.
2020-10-14 01:48:46 +00:00
Return - 1 , and set state - > err and possibly state - > err_info , if there is
an error writing to the output file ; return 0 on success .
2011-04-11 21:33:22 +00:00
flush is assumed to be a valid deflate ( ) flush value . If flush is Z_FINISH ,
then the deflate ( ) state is reset to start a new gzip stream . */
static int
gz_comp ( GZWFILE_T state , int flush )
{
2012-12-21 03:32:16 +00:00
int ret ;
ssize_t got ;
ptrdiff_t have ;
2011-04-11 21:33:22 +00:00
z_streamp strm = & ( state - > strm ) ;
/* allocate memory if this is the first time through */
if ( state - > size = = 0 & & gz_init ( state ) = = - 1 )
return - 1 ;
/* run deflate() on provided input until it produces no more output */
ret = Z_OK ;
do {
/* write out current buffer contents if full, or if flushing, but if
doing Z_FINISH then don ' t write until we get to Z_STREAM_END */
if ( strm - > avail_out = = 0 | | ( flush ! = Z_NO_FLUSH & &
2014-09-10 16:07:04 +00:00
( flush ! = Z_FINISH | | ret = = Z_STREAM_END ) ) ) {
2012-12-21 03:32:16 +00:00
have = strm - > next_out - state - > next ;
2011-04-11 21:33:22 +00:00
if ( have ) {
2015-11-08 10:31:01 +00:00
got = ws_write ( state - > fd , state - > next , ( unsigned int ) have ) ;
2014-09-10 16:07:04 +00:00
if ( got < 0 ) {
2011-04-11 21:33:22 +00:00
state - > err = errno ;
return - 1 ;
}
2012-12-21 03:32:16 +00:00
if ( ( ptrdiff_t ) got ! = have ) {
2011-04-11 21:33:22 +00:00
state - > err = WTAP_ERR_SHORT_WRITE ;
return - 1 ;
}
}
if ( strm - > avail_out = = 0 ) {
strm - > avail_out = state - > size ;
strm - > next_out = state - > out ;
}
state - > next = strm - > next_out ;
}
/* compress */
have = strm - > avail_out ;
ret = deflate ( strm , flush ) ;
if ( ret = = Z_STREAM_ERROR ) {
2011-04-21 17:51:19 +00:00
/* This "shouldn't happen". */
state - > err = WTAP_ERR_INTERNAL ;
2020-10-14 01:48:46 +00:00
state - > err_info = " Z_STREAM_ERROR from deflate() " ;
2011-04-11 21:33:22 +00:00
return - 1 ;
}
have - = strm - > avail_out ;
} while ( have ) ;
/* if that completed a deflate stream, allow another to start */
if ( flush = = Z_FINISH )
deflateReset ( strm ) ;
/* all done, no errors */
return 0 ;
}
/* Write out len bytes from buf. Return 0, and set state->err, on
failure or on an attempt to write 0 bytes ( in which case state - > err
is Z_OK ) ; return the number of bytes written on success . */
unsigned
2013-01-04 05:22:43 +00:00
gzwfile_write ( GZWFILE_T state , const void * buf , guint len )
2011-04-11 21:33:22 +00:00
{
2013-01-04 05:22:43 +00:00
guint put = len ;
guint n ;
2011-04-11 21:33:22 +00:00
z_streamp strm ;
strm = & ( state - > strm ) ;
/* check that there's no error */
if ( state - > err ! = Z_OK )
return 0 ;
/* if len is zero, avoid unnecessary operations */
if ( len = = 0 )
return 0 ;
/* allocate memory if this is the first time through */
if ( state - > size = = 0 & & gz_init ( state ) = = - 1 )
return 0 ;
/* for small len, copy to input buffer, otherwise compress directly */
if ( len < state - > size ) {
/* copy to input buffer, compress when full */
do {
if ( strm - > avail_in = = 0 )
strm - > next_in = state - > in ;
n = state - > size - strm - > avail_in ;
if ( n > len )
n = len ;
2015-12-16 09:37:00 +00:00
# ifdef z_const
DIAG_OFF ( cast - qual )
memcpy ( ( Bytef * ) strm - > next_in + strm - > avail_in , buf , n ) ;
DIAG_ON ( cast - qual )
# else
2015-12-16 08:59:01 +00:00
memcpy ( strm - > next_in + strm - > avail_in , buf , n ) ;
2015-12-16 09:37:00 +00:00
# endif
2011-04-11 21:33:22 +00:00
strm - > avail_in + = n ;
state - > pos + = n ;
2013-07-23 23:34:45 +00:00
buf = ( const char * ) buf + n ;
2011-04-11 21:33:22 +00:00
len - = n ;
if ( len & & gz_comp ( state , Z_NO_FLUSH ) = = - 1 )
return 0 ;
} while ( len ) ;
}
else {
/* consume whatever's left in the input buffer */
2018-01-15 00:05:32 +00:00
if ( strm - > avail_in ! = 0 & & gz_comp ( state , Z_NO_FLUSH ) = = - 1 )
2011-04-11 21:33:22 +00:00
return 0 ;
/* directly compress user buffer to file */
strm - > avail_in = len ;
2015-12-16 09:37:00 +00:00
# ifdef z_const
2014-07-23 10:26:05 +00:00
strm - > next_in = ( z_const Bytef * ) buf ;
2015-12-16 08:59:01 +00:00
# else
2015-12-16 09:37:00 +00:00
DIAG_OFF ( cast - qual )
2015-12-16 08:59:01 +00:00
strm - > next_in = ( Bytef * ) buf ;
2015-12-16 09:37:00 +00:00
DIAG_ON ( cast - qual )
2015-12-16 08:59:01 +00:00
# endif
2011-04-11 21:33:22 +00:00
state - > pos + = len ;
if ( gz_comp ( state , Z_NO_FLUSH ) = = - 1 )
return 0 ;
}
/* input was all buffered or compressed (put will fit in int) */
return ( int ) put ;
}
/* Flush out what we've written so far. Returns -1, and sets state->err,
on failure ; returns 0 on success . */
int
gzwfile_flush ( GZWFILE_T state )
{
/* check that there's no error */
if ( state - > err ! = Z_OK )
return - 1 ;
/* compress remaining data with Z_SYNC_FLUSH */
gz_comp ( state , Z_SYNC_FLUSH ) ;
if ( state - > err ! = Z_OK )
return - 1 ;
return 0 ;
}
/* Flush out all data written, and close the file. Returns a Wiretap
2016-12-03 22:17:08 +00:00
error on failure ; returns 0 on success . */
2011-04-11 21:33:22 +00:00
int
2016-12-03 22:17:08 +00:00
gzwfile_close ( GZWFILE_T state )
2011-04-11 21:33:22 +00:00
{
int ret = 0 ;
/* flush, free memory, and close file */
2022-11-20 21:18:22 +00:00
if ( gz_comp ( state , Z_FINISH ) = = - 1 )
2011-04-11 21:33:22 +00:00
ret = state - > err ;
( void ) deflateEnd ( & ( state - > strm ) ) ;
g_free ( state - > out ) ;
g_free ( state - > in ) ;
state - > err = Z_OK ;
2016-12-03 22:17:08 +00:00
if ( ws_close ( state - > fd ) = = - 1 & & ret = = 0 )
ret = errno ;
2011-04-11 21:33:22 +00:00
g_free ( state ) ;
return ret ;
}
int
gzwfile_geterr ( GZWFILE_T state )
{
return state - > err ;
}
# endif
2014-09-10 16:07:04 +00:00
/*
2019-07-26 18:43:17 +00:00
* Editor modelines - https : //www.wireshark.org/tools/modelines.html
2014-09-10 16:07:04 +00:00
*
* Local variables :
* c - basic - offset : 4
* tab - width : 8
* indent - tabs - mode : nil
* End :
*
* vi : set shiftwidth = 4 tabstop = 8 expandtab :
* : indentSize = 4 : tabSize = 8 : noTabs = true :
*/