nstime: Support ISO 8601 basic format

Add support in iso8601_to_nstime for the ISO 8601 Basic date/time
format that lacks the - and : separators.
This commit is contained in:
John Thacker 2021-11-30 01:04:07 -05:00
parent 261d223a30
commit cf0ecc4fe1
6 changed files with 66 additions and 29 deletions

View File

@ -1340,7 +1340,7 @@ main(int argc, char *argv[])
nstime_t in_time;
check_startstop = TRUE;
if ((0 < iso8601_to_nstime(&in_time, ws_optarg)) || (0 < unix_epoch_to_nstime(&in_time, ws_optarg))) {
if ((0 < iso8601_to_nstime(&in_time, ws_optarg, ISO8601_DATETIME)) || (0 < unix_epoch_to_nstime(&in_time, ws_optarg))) {
if (opt == 'A') {
nstime_copy(&starttime, &in_time);
have_starttime = TRUE;

View File

@ -1792,7 +1792,7 @@ tvb_get_string_time(tvbuff_t *tvb, const gint offset, const gint length,
if (*ptr) {
if ((encoding & ENC_ISO_8601_DATE_TIME) == ENC_ISO_8601_DATE_TIME) {
if ((num_chars = iso8601_to_nstime(ns, ptr))) {
if ((num_chars = iso8601_to_nstime(ns, ptr, ISO8601_DATETIME))) {
errno = 0;
end = ptr + num_chars;
}

View File

@ -47,7 +47,7 @@ static gboolean eri_enb_log_get_packet(FILE_T fh, wtap_rec* rec,
length = length - 1;
}
if (0 < iso8601_to_nstime(&packet_time, line+1)) {
if (0 < iso8601_to_nstime(&packet_time, line+1, ISO8601_DATETIME)) {
rec->ts.secs = packet_time.secs;
rec->ts.nsecs = packet_time.nsecs;
rec->presence_flags |= WTAP_HAS_TS;

View File

@ -807,7 +807,7 @@ nettrace_3gpp_32_423_file_open(wtap *wth, int *err, gchar **err_info)
/* Ok it's our file. From here we'll need to free memory */
file_info = g_new0(nettrace_3gpp_32_423_file_info_t, 1);
curr_pos += iso8601_to_nstime(&file_info->start_time, curr_pos);
curr_pos += iso8601_to_nstime(&file_info->start_time, curr_pos, ISO8601_DATETIME);
file_info->start_offset = start_offset + (curr_pos - magic_buf);
file_info->buffer = g_byte_array_sized_new(RINGBUFFER_START_SIZE);
g_byte_array_append(file_info->buffer, curr_pos, (guint)(bytes_read - (curr_pos - magic_buf)));

View File

@ -279,17 +279,27 @@ nsfiletime_to_nstime(nstime_t *nstime, guint64 nsfiletime)
* returns number of chars parsed on success, or 0 on failure
*
* NB. ISO 8601 is actually a lot more flexible than the above format,
* much to a developer's chagrin. The -/T/: separators are technically
* optional.
* Code is here to allow for that, but short-circuited for now since
* our callers assume they're there.
* much to a developer's chagrin. The "basic format" is distinguished from
* the "extended format" by lacking the - and : separators. This function
* supports both the basic and extended format (as well as both simultaneously)
* with several common options and extensions. Time resolution is supported
* up to nanoseconds (9 fractional digits) or down to whole minutes (omitting
* the seconds component in the latter case). The T separator can be replaced
* by a space in either format (a common extension not in ISO 8601 but found
* in, e.g., RFC 3339) or omitted entirely in the basic format.
*
* Many standards that use ISO 8601 implement profiles with additional
* constraints, such as requiring that the seconds field be present, only
* allowing "." as the decimal separator, or limiting the number of fractional
* digits. Callers that wish to check constraints not yet enforced by a
* profile supported by the function must do so themselves.
*
* Future improvements could parse other ISO 8601 formats, such as
* YYYY-Www-D, YYYY-DDD, etc. For a relatively easy introduction to
* these formats, see wikipedia: https://en.wikipedia.org/wiki/ISO_8601
*/
guint8
iso8601_to_nstime(nstime_t *nstime, const char *ptr)
iso8601_to_nstime(nstime_t *nstime, const char *ptr, iso8601_fmt_e format)
{
struct tm tm;
gint n_scanned = 0;
@ -307,10 +317,7 @@ iso8601_to_nstime(nstime_t *nstime, const char *ptr)
tm.tm_isdst = -1;
nstime_set_unset(nstime);
/* The ISO 8901 Basic format lacks the - and : separators, while the
* Extended format has them. (Both formats have the 'T' separator
* between date and time, which may be omitted by mutual agreement.)
* Verify that we start with a four digit year and then look for the
/* Verify that we start with a four digit year and then look for the
* separator. */
for (n_scanned = 0; n_scanned < 4; n_scanned++) {
if (!g_ascii_isdigit(*ptr)) {
@ -320,27 +327,42 @@ iso8601_to_nstime(nstime_t *nstime, const char *ptr)
tm.tm_year += *ptr++ - '0';
}
if (*ptr == '-') {
has_separator = TRUE;
switch (format) {
case ISO8601_DATETIME_BASIC:
return 0;
case ISO8601_DATETIME:
case ISO8601_DATETIME_AUTO:
default:
has_separator = TRUE;
ptr++;
};
} else if (g_ascii_isdigit(*ptr)) {
has_separator = FALSE;
switch (format) {
case ISO8601_DATETIME:
return 0;
case ISO8601_DATETIME_BASIC:
case ISO8601_DATETIME_AUTO:
default:
has_separator = FALSE;
};
} else {
return 0;
}
/* For now we require the separator to remove ambiguity */
if (!has_separator) return 0;
tm.tm_year -= 1900; /* struct tm expects number of years since 1900 */
ptr++;
/* Note: sscanf is known to be inconsistent across platforms with respect
to whether a %n is counted as a return value or not, so we use '<'/'>='
to whether a %n is counted as a return value or not (XXX: Is this
still true, despite the express comments of C99 §7.19.6.2 12?), so we
use '<'/'>='
*/
/* XXX: sscanf allows an optional sign indicator before each integer
* converted (whether with %d or %u), so this will convert some bogus
* strings. Either checking afterwards or doing the whole thing by hand
* as with the year above is the only correct way. (strptime certainly
* can't handle the no separator "Basic" format.)
* can't handle the basic format.)
*/
n_scanned = sscanf(ptr, has_separator ? "%2u-%2u%n" : "%2u%2u%n",
&tm.tm_mon,
@ -357,13 +379,14 @@ iso8601_to_nstime(nstime_t *nstime, const char *ptr)
if (*ptr == 'T' || *ptr == ' ') {
/* The 'T' between date and time is optional if the meaning is
unambiguous. We also allow for ' ' here to support formats
such as editcap's -A/-B options */
unambiguous. We also allow for ' ' here per RFC 3339 to support
formats such as editcap's -A/-B options. */
ptr++;
}
else {
/* For now we require the separator to remove ambiguity;
remove this entire 'else' when we wish to change that */
else if (has_separator) {
/* Allow no separator between date and time iff we have no
separator between units. (Some extended formats may negotiate
no separator here, so this could be changed.) */
return 0;
}
@ -420,12 +443,14 @@ iso8601_to_nstime(nstime_t *nstime, const char *ptr)
}
}
else {
/* No seconds. ISO 8601 allows decimal fractions of a minute here,
* but that's pretty rare in practice. Could be added later if needed.
*/
tm.tm_sec = 0;
}
/* Validate what we got so far. mktime() doesn't care about strange
values (and we use this to our advantage when calculating the
time zone offset) but we should at least start with something valid */
values but we should at least start with something valid */
if (!tm_is_valid(&tm)) {
return 0;
}
@ -471,6 +496,12 @@ iso8601_to_nstime(nstime_t *nstime, const char *ptr)
if (sign == '+') {
nstime->secs += (off_hr * 3600) + (off_min * 60);
} else if (sign == '-') {
/* -00:00 is illegal according to ISO 8601, but RFC 3339 allows
* it under a convention where -00:00 means "time in UTC is known,
* local timezone is unknown." This has the same value as an
* offset of Z or +00:00, but semantically implies that UTC is
* not the preferred time zone, which is immaterial to us.
*/
nstime->secs -= ((-off_hr) * 3600) + (off_min * 60);
}
}

View File

@ -123,10 +123,16 @@ WS_DLL_PUBLIC gboolean filetime_to_nstime(nstime_t *nstime, guint64 filetime);
FALSE on failure */
WS_DLL_PUBLIC gboolean nsfiletime_to_nstime(nstime_t *nstime, guint64 nsfiletime);
typedef enum {
ISO8601_DATETIME, /** e.g. 2014-07-04T12:34:56.789+00:00 */
ISO8601_DATETIME_BASIC, /** ISO8601 Basic format, i.e. no - : separators */
ISO8601_DATETIME_AUTO, /** Autodetect the presence of separators */
} iso8601_fmt_e;
/** parse an ISO 8601 format datetime string to nstime, returns number of
chars parsed on success, 0 on failure.
Note that nstime is set to unset in the case of failure */
WS_DLL_PUBLIC guint8 iso8601_to_nstime(nstime_t *nstime, const char *ptr);
WS_DLL_PUBLIC guint8 iso8601_to_nstime(nstime_t *nstime, const char *ptr, iso8601_fmt_e format);
/** parse an Unix epoch timestamp format datetime string to nstime, returns
number of chars parsed on success, 0 on failure.