Add iso8601_to_nstime() for editcap and nettrace

This adds a function to parse a string date-time in ISO 8601 format into
a `nstime_t` structure. It's based on code from epan/tvbuff.c and
wiretap/nettrace_3gpp_32_423.c and meant to eventually replace both.
(Currently only replaces the latter.)

Since most of Wireshark expects ISO 8601 date-times to fit a fairly
strict pattern, iso8601_to_nstime() currently rejects date-times without
separators between the components, even though ISO 8601 actually permits
this. This could be revisited later.

Also uses iso8601_to_nstime in editcap to parse the -A/-B options,
thus allowing the user to specify a time zone if desired. (See #17110)
This commit is contained in:
David Perry 2021-01-08 09:18:39 +00:00 committed by AndersBroman
parent 73cef353bf
commit b758fdaede
8 changed files with 280 additions and 195 deletions

View File

@ -104,6 +104,7 @@ libwsutil.so.0 libwsutil0 #MINVER#
init_report_message@Base 2.3.0
is_default_profile@Base 1.12.0~rc1
isdigit_string@Base 1.10.0
iso8601_to_nstime@Base 3.5.0
isprint_string@Base 1.10.0
isprint_utf8_string@Base 2.6.1
json_decode_string_inplace@Base 2.9.0
@ -181,6 +182,7 @@ libwsutil.so.0 libwsutil0 #MINVER#
started_with_special_privs@Base 1.10.0
test_for_directory@Base 1.12.0~rc1
test_for_fifo@Base 1.12.0~rc1
tm_is_valid@Base 3.5.0
type_util_gdouble_to_guint64@Base 1.10.0
type_util_guint64_to_gdouble@Base 1.10.0
ulaw2linear@Base 1.12.0~rc1

View File

@ -101,14 +101,20 @@ strings that include spaces.
=item -A E<lt>start timeE<gt>
Reads only the packets whose timestamp is on or after start time.
The time is given in the following format YYYY-MM-DD HH:MM:SS[.nnnnnnnnn]
(the decimal and fractional seconds are optional).
The time is given in ISO 8601 format, either
YYYY-MM-DD HH:MM:SS[.nnnnnnnnn][Z|±hh:mm] or
YYYY-MM-DDTHH:MM:SS[.nnnnnnnnn][Z|±hh:mm] .
The fractional seconds are optional, as is the time zone offset from UTC
(in which case local time is assumed).
=item -B E<lt>stop timeE<gt>
Reads only the packets whose timestamp is before stop time.
The time is given in the following format YYYY-MM-DD HH:MM:SS[.nnnnnnnnn]
(the decimal and fractional seconds are optional).
The time is given in ISO 8601 format, either
YYYY-MM-DD HH:MM:SS[.nnnnnnnnn][Z|±hh:mm] or
YYYY-MM-DDTHH:MM:SS[.nnnnnnnnn][Z|±hh:mm] .
The fractional seconds are optional, as is the time zone offset from UTC
(in which case local time is assumed).
=item -c E<lt>packets per fileE<gt>

View File

@ -759,9 +759,11 @@ print_usage(FILE *output)
fprintf(output, "Packet selection:\n");
fprintf(output, " -r keep the selected packets; default is to delete them.\n");
fprintf(output, " -A <start time> only read packets whose timestamp is after (or equal\n");
fprintf(output, " to) the given time (format as YYYY-MM-DD hh:mm:ss[.nnnnnnnnn]).\n");
fprintf(output, " to) the given time.\n");
fprintf(output, " -B <stop time> only read packets whose timestamp is before the\n");
fprintf(output, " given time (format as YYYY-MM-DD hh:mm:ss[.nnnnnnnnn]).\n");
fprintf(output, " given time.\n");
fprintf(output, " Time format for -A/-B options is\n");
fprintf(output, " YYYY-MM-DDThh:mm:ss[.nnnnnnnnn][Z|+-hh:mm]\n");
fprintf(output, "\n");
fprintf(output, "Duplicate packet removal:\n");
fprintf(output, " --novlan remove vlan info from packets before checking for duplicates.\n");
@ -1279,77 +1281,25 @@ main(int argc, char *argv[])
case 'A':
case 'B':
{
#define NSEC_MAXLEN 9
struct tm st_tm;
guint32 nsec = 0;
char *och;
memset(&st_tm,0,sizeof(struct tm));
if (!(och=strptime(optarg,"%Y-%m-%d %T", &st_tm))) {
goto invalid_time;
}
/* Sub-second support: see if the time is followed by a '.' */
if (och != NULL && *och != '\0') {
char *c;
char subsec[NSEC_MAXLEN+1] = "";
int nchars;
if (*och != '.') {
goto invalid_time;
}
och++;
c = subsec;
/* Ensure that only 1-9 digits follow the '.' */
for (nchars = 0; *och != '\0' && nchars < NSEC_MAXLEN; nchars++) {
if (!g_ascii_isdigit(*och)) {
goto invalid_time;
}
*c++ = *och++;
}
if (*och != '\0') {
goto invalid_time;
}
/* Right-pad what we do have, so eg. 5 = 500,000,000 ns */
for (; nchars < NSEC_MAXLEN; nchars++) {
*c++ = '0';
}
*c = '\0';
if (!ws_strtou32(subsec, NULL, &nsec) || nsec >= NANOSECS_PER_SEC) {
goto invalid_time;
}
}
nstime_t in_time;
check_startstop = TRUE;
st_tm.tm_isdst = -1;
/*
* XXX - this will normalize invalid dates rather than
* returning an error, so you could specify, for example,
* 2020-10-40 (to quote the macOS and probably *BSD manual
* page for ctime()/localtime()/mktime()/etc., "October 40
* is changed into November 9").
*
* Is that a bug or a feature?
*/
if (opt == 'A') {
starttime.secs = mktime(&st_tm);
starttime.nsecs = nsec;
have_starttime = TRUE;
} else {
stoptime.secs = mktime(&st_tm);
stoptime.nsecs = nsec;
have_stoptime = TRUE;
if (0 < iso8601_to_nstime(&in_time, optarg)) {
if (opt == 'A') {
nstime_copy(&starttime, &in_time);
have_starttime = TRUE;
} else {
nstime_copy(&stoptime, &in_time);
have_stoptime = TRUE;
}
break;
}
else {
fprintf(stderr, "editcap: \"%s\" isn't a valid date and time\n\n",
optarg);
ret = INVALID_OPTION;
goto clean_exit;
}
break;
invalid_time:
fprintf(stderr, "editcap: \"%s\" isn't a valid date and time\n\n",
optarg);
ret = INVALID_OPTION;
goto clean_exit;
}
case 'c':

View File

@ -762,125 +762,6 @@ nettrace_close(wtap *wth)
}
}
/* This attribute specification contains a timestamp that refers to the start of the
* first trace data that is stored in this file.
*
* It is a complete timestamp including day, time and delta UTC hour. E.g.
* "2001-09-11T09:30:47-05:00".
*/
#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0))
static char*
nettrace_parse_begin_time(char *curr_pos, size_t n, nstime_t *ts)
{
/* Time vars */
guint year, month, day, hour, minute, second, frac;
int UTCdiffh, UTCdiffm = 0;
int time_length = 0;
int scan_found;
static const guint days_in_month[12] = {
31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
};
struct tm tm;
char *end_pos;
int length;
nstime_set_unset(ts); /* mark time as invalid, until successful converted */
end_pos = g_strstr_len(curr_pos, n, "\"/>");
length = (int)(end_pos - curr_pos);
if (length < 2) {
return end_pos + 3;
}
/* Scan for this format: 2001-09-11T09:30:47 Then we will parse any fractions and UTC offset */
scan_found = sscanf(curr_pos, "%4u-%2u-%2uT%2u:%2u:%2u%n",
&year, &month, &day, &hour, &minute, &second, &time_length);
if (scan_found == 6 && time_length == 19) {
/* Fill in the fields and return it in a time_t */
tm.tm_year = year - 1900;
if (month < 1 || month > 12) {
/* g_warning("Failed to parse time, month is %u", month); */
return curr_pos;
}
tm.tm_mon = month - 1; /* Zero count*/
if (day > ((month == 2 && isleap(year)) ? 29 : days_in_month[month - 1])) {
/* g_warning("Failed to parse time, %u-%02u-%2u is not a valid day", year, month, day); */
return curr_pos;
}
tm.tm_mday = day;
if (hour > 23) {
/* g_warning("Failed to parse time, hour is %u", hour); */
return curr_pos;
}
tm.tm_hour = hour;
if (minute > 59) {
/* g_warning("Failed to parse time, minute is %u", minute); */
return curr_pos;
}
tm.tm_min = minute;
if (second > 60) {
/*
* Yes, 60, for leap seconds - POSIX's and Windows'
* refusal to believe in them nonwithstanding.
*/
/* g_warning("Failed to parse time, second is %u", second); */
return curr_pos;
}
tm.tm_sec = second;
tm.tm_isdst = -1; /* daylight saving time info not known */
/* Move curr_pos to end of parsed object and get that character 2019-01-10T10:14:56 */
curr_pos += time_length;
if (*curr_pos == '.' || *curr_pos == ',') {
/* We have fractions */
curr_pos++;
if (1 == sscanf(curr_pos, "%u%n", &frac, &time_length)) {
if ((frac >= 1000000000) || (frac == 0)) {
ts->nsecs = 0;
} else {
switch (time_length) { /* including leading zeros */
case 1: ts->nsecs = frac * 100000000; break;
case 2: ts->nsecs = frac * 10000000; break;
case 3: ts->nsecs = frac * 1000000; break;
case 4: ts->nsecs = frac * 100000; break;
case 5: ts->nsecs = frac * 10000; break;
case 6: ts->nsecs = frac * 1000; break;
case 7: ts->nsecs = frac * 100; break;
case 8: ts->nsecs = frac * 10; break;
default: ts->nsecs = frac;
}
}
curr_pos += time_length;
}
}
if (*curr_pos == '-' || *curr_pos == '+' || *curr_pos == 'Z') {
/* We have UTC offset */
if (1 <= sscanf(curr_pos, "%3d:%2d", &UTCdiffh, &UTCdiffm)) {
/* adjust for timezone */
tm.tm_hour -= UTCdiffh;
tm.tm_min -= UTCdiffh < 0 ? -UTCdiffm: UTCdiffm;
} /* else 'Z' for Zero time */
/* convert to UTC time */
#ifdef _WIN32
ts->secs = _mkgmtime(&tm);
#else
ts->secs = timegm(&tm);
#endif
} else {
/* no UTC offset means localtime in ISO 8601 */
ts->secs = mktime(&tm);
}
/* } else {
g_warning("Failed to parse time, only %u fields", scan_found); */
}
return curr_pos;
}
/* Test the current file to see if it's one we can read.
* Set in file_access.c as the function to be called for this file type.
*/
@ -929,7 +810,7 @@ nettrace_3gpp_32_423_file_open(wtap *wth, int *err, gchar **err_info)
/* Ok it's our file. From here we'll need to free memory */
file_info = g_new0(nettrace_3gpp_32_423_file_info_t, 1);
curr_pos = nettrace_parse_begin_time(curr_pos, (guint)(bytes_read - (curr_pos - magic_buf)), &file_info->start_time);
curr_pos += iso8601_to_nstime(&file_info->start_time, curr_pos);
file_info->start_offset = start_offset + (curr_pos - magic_buf);
file_info->buffer = g_byte_array_sized_new(RINGBUFFER_START_SIZE);
g_byte_array_append(file_info->buffer, curr_pos, (guint)(bytes_read - (curr_pos - magic_buf)));

View File

@ -10,9 +10,12 @@
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include <stdio.h>
#include <string.h>
#include <glib.h>
#include "nstime.h"
#include "epochs.h"
#include "time_util.h"
/* this is #defined so that we can clearly see that we have the right number of
zeros, rather than as a guard against the number of nanoseconds in a second
@ -264,6 +267,192 @@ nsfiletime_to_nstime(nstime_t *nstime, guint64 nsfiletime)
return common_filetime_to_nstime(nstime, ftsecs, nsecs);
}
/*
* function: iso8601_to_nstime
* parses a character string for a date and time given in
* ISO 8601 date-time format (eg: 2014-04-07T05:41:56.782+00:00)
* and converts to an nstime_t
* returns number of chars parsed on success, or 0 on failure
*
* NB. ISO 8601 is actually a lot more flexible than the above format,
* much to a developer's chagrin. The -/T/: separators are technically
* optional.
* Code is here to allow for that, but short-circuited for now since
* our callers assume they're there.
*
* Future improvements could parse other ISO 8601 formats, such as
* YYYY-Www-D, YYYY-DDD, etc. For a relatively easy introduction to
* these formats, see wikipedia: https://en.wikipedia.org/wiki/ISO_8601
*/
guint8
iso8601_to_nstime(nstime_t *nstime, const char *ptr)
{
struct tm tm;
gint n_scanned = 0;
gint n_chars = 0;
guint frac = 0;
gint off_hr = 0;
gint off_min = 0;
guint8 ret_val = 0;
const char *start = ptr;
gboolean has_separator = FALSE;
gboolean have_offset = FALSE;
memset(&tm, 0, sizeof(tm));
tm.tm_isdst = -1;
nstime_set_unset(nstime);
/* There may be 2 or 0 dashes between the date parts */
has_separator = (*(ptr+4) == '-');
/* For now we require the separator to remove ambiguity */
if (!has_separator) return 0;
/* Note: sscanf is known to be inconsistent across platforms with respect
to whether a %n is counted as a return value or not, so we use '<'/'>='
*/
n_scanned = sscanf(ptr, has_separator ? "%4u-%2u-%2u%n" : "%4u%2u%2u%n",
&tm.tm_year,
&tm.tm_mon,
&tm.tm_mday,
&n_chars);
if (n_scanned >= 3) {
/* Got year, month, and day */
tm.tm_mon--; /* struct tm expects 0-based month */
tm.tm_year -= 1900; /* struct tm expects number of years since 1900 */
ptr += n_chars;
}
else {
return 0;
}
if (*ptr == 'T' || *ptr == ' ') {
/* The 'T' between date and time is optional if the meaning is
unambiguous. We also allow for ' ' here to support formats
such as editcap's -A/-B options */
ptr++;
}
else {
/* For now we require the separator to remove ambiguity;
remove this entire 'else' when we wish to change that */
return 0;
}
/* Now we're on to the time part. We'll require a minimum of hours and
minutes.
Test for a possible ':' */
has_separator = (*(ptr+2) == ':');
if (!has_separator) return 0;
n_scanned = sscanf(ptr, has_separator ? "%2u:%2u%n" : "%2u%2u%n",
&tm.tm_hour,
&tm.tm_min,
&n_chars);
if (n_scanned >= 2) {
ptr += n_chars;
}
else {
/* didn't get hours and minutes */
return 0;
}
/* Test for (whole) seconds */
if ((has_separator && *ptr == ':') ||
(!has_separator && g_ascii_isdigit(*ptr))) {
/* Looks like we should have them */
if (1 > sscanf(ptr, has_separator ? ":%2u%n" : "%2u%n",
&tm.tm_sec, &n_chars)) {
/* Couldn't get them */
return 0;
}
ptr += n_chars;
/* Now let's test for fractional seconds */
if (*ptr == '.' || *ptr == ',') {
/* Get fractional seconds */
ptr++;
if (1 <= sscanf(ptr, "%u%n", &frac, &n_chars)) {
/* normalize frac to nanoseconds */
if ((frac >= 1000000000) || (frac == 0)) {
frac = 0;
} else {
switch (n_chars) { /* including leading zeros */
case 1: frac *= 100000000; break;
case 2: frac *= 10000000; break;
case 3: frac *= 1000000; break;
case 4: frac *= 100000; break;
case 5: frac *= 10000; break;
case 6: frac *= 1000; break;
case 7: frac *= 100; break;
case 8: frac *= 10; break;
default: break;
}
}
ptr += n_chars;
}
/* If we didn't get frac, it's still its default of 0 */
}
}
else {
tm.tm_sec = 0;
}
/* Validate what we got so far. mktime() doesn't care about strange
values (and we use this to our advantage when calculating the
time zone offset) but we should at least start with something valid */
if (!tm_is_valid(&tm)) {
return 0;
}
/* Check for a time zone offset */
if (*ptr == '-' || *ptr == '+' || *ptr == 'Z') {
/* We have a UTC-relative offset */
if (*ptr == 'Z') {
off_hr = off_min = n_scanned = 0;
have_offset = TRUE;
ptr++;
}
else {
has_separator = (*(ptr+3) == ':');
if (!has_separator) return 0;
n_scanned = sscanf(ptr, has_separator ? "%3d:%2d%n" : "%3d%2d%n",
&off_hr,
&off_min,
&n_chars);
if (n_scanned >= 1) {
/* Definitely got hours */
have_offset = TRUE;
if (n_scanned >= 2) {
/* Got minutes too */
ptr += n_chars;
}
else {
/* Only got hours, just move ptr past the +hh or whatever */
off_min = 0;
ptr += 3;
}
}
else {
/* Didn't get a valid offset, treat as if there's none at all */
off_hr = off_min = n_scanned = 0;
have_offset = FALSE;
}
}
}
if (have_offset) {
tm.tm_hour -= off_hr;
tm.tm_min -= (off_hr < 0 ? -off_min : off_min);
nstime->secs = mktime_utc(&tm);
}
else {
/* No UTC offset given; ISO 8601 says this means localtime */
nstime->secs = mktime(&tm);
}
nstime->nsecs = frac;
ret_val = (guint)(ptr-start);
return ret_val;
}
/*
* Editor modelines
*

View File

@ -11,6 +11,7 @@
#ifndef __NSTIME_H__
#define __NSTIME_H__
#include <glib.h>
#include <time.h>
#include "ws_symbol_export.h"
@ -122,6 +123,11 @@ WS_DLL_PUBLIC gboolean filetime_to_nstime(nstime_t *nstime, guint64 filetime);
FALSE on failure */
WS_DLL_PUBLIC gboolean nsfiletime_to_nstime(nstime_t *nstime, guint64 nsfiletime);
/** parse an ISO 8601 format datetime string to nstime, returns number of
chars parsed on success, 0 on failure.
Note that nstime is set to unset in the case of failure */
WS_DLL_PUBLIC guint8 iso8601_to_nstime(nstime_t *nstime, const char *ptr);
#ifdef __cplusplus
}
#endif /* __cplusplus */

View File

@ -23,6 +23,9 @@
#include <windows.h>
#endif
/* Test if the given year is a leap year */
#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0))
/* converts a broken down date representation, relative to UTC,
* to a timestamp; it uses timegm() if it's available.
* Copied from Glib source gtimer.c
@ -47,7 +50,7 @@ mktime_utc(struct tm *tm)
/* count number of leap years */
yr = tm->tm_year + 1900;
if (tm->tm_mon + 1 < 3 && (yr % 4) == 0 && ((yr % 100) != 0 || (yr % 400) == 0))
if (tm->tm_mon + 1 < 3 && isleap(yr))
yr--;
retval += (((yr / 4) - (yr / 100) + (yr / 400)) - 477); /* 477 = ((1970 / 4) - (1970 / 100) + (1970 / 400)) */
@ -61,6 +64,42 @@ mktime_utc(struct tm *tm)
#endif /* !HAVE_TIMEGM */
}
/* Validate the values in a time_t
* Currently checks tm_year, tm_mon, tm_mday, tm_hour, tm_min, and tm_sec;
* disregards tm_wday, tm_yday, and tm_isdst.
* Use this in situations where you wish to return an error rather than
* normalizing invalid dates; otherwise you could specify, for example,
* 2020-10-40 (to quote the macOS and probably *BSD manual
* page for ctime()/localtime()/mktime()/etc., "October 40
* is changed into November 9").
*/
gboolean
tm_is_valid(struct tm *tm)
{
static const gint8 days_in_month[12] = {
31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
};
if (tm->tm_mon < 0 || tm->tm_mon > 11) {
return FALSE;
}
if (tm->tm_mday < 0 || tm->tm_mday >
((tm->tm_mon == 1 && isleap(tm->tm_year)) ? 29 : days_in_month[tm->tm_mon])) {
return FALSE;
}
if (tm->tm_hour < 0 || tm->tm_hour > 23) {
return FALSE;
}
if (tm->tm_min < 0 || tm->tm_min > 59) {
return FALSE;
}
if (tm->tm_sec < 0 || tm->tm_sec > 60) {
/* 60, not 59, to account for leap seconds */
return FALSE;
}
return TRUE;
}
void get_resource_usage(double *user_time, double *sys_time) {
#ifndef _WIN32
struct rusage ru;

View File

@ -18,9 +18,21 @@ extern "C" {
#include <time.h>
/** Converts a broken down date representation, relative to UTC,
* to a timestamp
*/
WS_DLL_PUBLIC
time_t mktime_utc(struct tm *tm);
/** Validate the values in a time_t.
* Currently checks tm_year, tm_mon, tm_mday, tm_hour, tm_min, and tm_sec;
* disregards tm_wday, tm_yday, and tm_isdst.
*
* @param tm The struct tm to validate.
*/
WS_DLL_PUBLIC
gboolean tm_is_valid(struct tm *tm);
/** Fetch the process CPU time.
*
* Fetch the current process user and system CPU times, convert them to