nstime: Support ISO 8601 basic format

Add support in iso8601_to_nstime for the ISO 8601 Basic date/time
format that lacks the - and : separators.
This commit is contained in:
John Thacker 2021-11-30 01:04:07 -05:00
parent 261d223a30
commit cf0ecc4fe1
6 changed files with 66 additions and 29 deletions

View File

@ -1340,7 +1340,7 @@ main(int argc, char *argv[])
nstime_t in_time; nstime_t in_time;
check_startstop = TRUE; check_startstop = TRUE;
if ((0 < iso8601_to_nstime(&in_time, ws_optarg)) || (0 < unix_epoch_to_nstime(&in_time, ws_optarg))) { if ((0 < iso8601_to_nstime(&in_time, ws_optarg, ISO8601_DATETIME)) || (0 < unix_epoch_to_nstime(&in_time, ws_optarg))) {
if (opt == 'A') { if (opt == 'A') {
nstime_copy(&starttime, &in_time); nstime_copy(&starttime, &in_time);
have_starttime = TRUE; have_starttime = TRUE;

View File

@ -1792,7 +1792,7 @@ tvb_get_string_time(tvbuff_t *tvb, const gint offset, const gint length,
if (*ptr) { if (*ptr) {
if ((encoding & ENC_ISO_8601_DATE_TIME) == ENC_ISO_8601_DATE_TIME) { if ((encoding & ENC_ISO_8601_DATE_TIME) == ENC_ISO_8601_DATE_TIME) {
if ((num_chars = iso8601_to_nstime(ns, ptr))) { if ((num_chars = iso8601_to_nstime(ns, ptr, ISO8601_DATETIME))) {
errno = 0; errno = 0;
end = ptr + num_chars; end = ptr + num_chars;
} }

View File

@ -47,7 +47,7 @@ static gboolean eri_enb_log_get_packet(FILE_T fh, wtap_rec* rec,
length = length - 1; length = length - 1;
} }
if (0 < iso8601_to_nstime(&packet_time, line+1)) { if (0 < iso8601_to_nstime(&packet_time, line+1, ISO8601_DATETIME)) {
rec->ts.secs = packet_time.secs; rec->ts.secs = packet_time.secs;
rec->ts.nsecs = packet_time.nsecs; rec->ts.nsecs = packet_time.nsecs;
rec->presence_flags |= WTAP_HAS_TS; rec->presence_flags |= WTAP_HAS_TS;

View File

@ -807,7 +807,7 @@ nettrace_3gpp_32_423_file_open(wtap *wth, int *err, gchar **err_info)
/* Ok it's our file. From here we'll need to free memory */ /* Ok it's our file. From here we'll need to free memory */
file_info = g_new0(nettrace_3gpp_32_423_file_info_t, 1); file_info = g_new0(nettrace_3gpp_32_423_file_info_t, 1);
curr_pos += iso8601_to_nstime(&file_info->start_time, curr_pos); curr_pos += iso8601_to_nstime(&file_info->start_time, curr_pos, ISO8601_DATETIME);
file_info->start_offset = start_offset + (curr_pos - magic_buf); file_info->start_offset = start_offset + (curr_pos - magic_buf);
file_info->buffer = g_byte_array_sized_new(RINGBUFFER_START_SIZE); file_info->buffer = g_byte_array_sized_new(RINGBUFFER_START_SIZE);
g_byte_array_append(file_info->buffer, curr_pos, (guint)(bytes_read - (curr_pos - magic_buf))); g_byte_array_append(file_info->buffer, curr_pos, (guint)(bytes_read - (curr_pos - magic_buf)));

View File

@ -279,17 +279,27 @@ nsfiletime_to_nstime(nstime_t *nstime, guint64 nsfiletime)
* returns number of chars parsed on success, or 0 on failure * returns number of chars parsed on success, or 0 on failure
* *
* NB. ISO 8601 is actually a lot more flexible than the above format, * NB. ISO 8601 is actually a lot more flexible than the above format,
* much to a developer's chagrin. The -/T/: separators are technically * much to a developer's chagrin. The "basic format" is distinguished from
* optional. * the "extended format" by lacking the - and : separators. This function
* Code is here to allow for that, but short-circuited for now since * supports both the basic and extended format (as well as both simultaneously)
* our callers assume they're there. * with several common options and extensions. Time resolution is supported
* up to nanoseconds (9 fractional digits) or down to whole minutes (omitting
* the seconds component in the latter case). The T separator can be replaced
* by a space in either format (a common extension not in ISO 8601 but found
* in, e.g., RFC 3339) or omitted entirely in the basic format.
*
* Many standards that use ISO 8601 implement profiles with additional
* constraints, such as requiring that the seconds field be present, only
* allowing "." as the decimal separator, or limiting the number of fractional
* digits. Callers that wish to check constraints not yet enforced by a
* profile supported by the function must do so themselves.
* *
* Future improvements could parse other ISO 8601 formats, such as * Future improvements could parse other ISO 8601 formats, such as
* YYYY-Www-D, YYYY-DDD, etc. For a relatively easy introduction to * YYYY-Www-D, YYYY-DDD, etc. For a relatively easy introduction to
* these formats, see wikipedia: https://en.wikipedia.org/wiki/ISO_8601 * these formats, see wikipedia: https://en.wikipedia.org/wiki/ISO_8601
*/ */
guint8 guint8
iso8601_to_nstime(nstime_t *nstime, const char *ptr) iso8601_to_nstime(nstime_t *nstime, const char *ptr, iso8601_fmt_e format)
{ {
struct tm tm; struct tm tm;
gint n_scanned = 0; gint n_scanned = 0;
@ -307,10 +317,7 @@ iso8601_to_nstime(nstime_t *nstime, const char *ptr)
tm.tm_isdst = -1; tm.tm_isdst = -1;
nstime_set_unset(nstime); nstime_set_unset(nstime);
/* The ISO 8901 Basic format lacks the - and : separators, while the /* Verify that we start with a four digit year and then look for the
* Extended format has them. (Both formats have the 'T' separator
* between date and time, which may be omitted by mutual agreement.)
* Verify that we start with a four digit year and then look for the
* separator. */ * separator. */
for (n_scanned = 0; n_scanned < 4; n_scanned++) { for (n_scanned = 0; n_scanned < 4; n_scanned++) {
if (!g_ascii_isdigit(*ptr)) { if (!g_ascii_isdigit(*ptr)) {
@ -320,27 +327,42 @@ iso8601_to_nstime(nstime_t *nstime, const char *ptr)
tm.tm_year += *ptr++ - '0'; tm.tm_year += *ptr++ - '0';
} }
if (*ptr == '-') { if (*ptr == '-') {
switch (format) {
case ISO8601_DATETIME_BASIC:
return 0;
case ISO8601_DATETIME:
case ISO8601_DATETIME_AUTO:
default:
has_separator = TRUE; has_separator = TRUE;
ptr++;
};
} else if (g_ascii_isdigit(*ptr)) { } else if (g_ascii_isdigit(*ptr)) {
switch (format) {
case ISO8601_DATETIME:
return 0;
case ISO8601_DATETIME_BASIC:
case ISO8601_DATETIME_AUTO:
default:
has_separator = FALSE; has_separator = FALSE;
};
} else { } else {
return 0; return 0;
} }
/* For now we require the separator to remove ambiguity */
if (!has_separator) return 0;
tm.tm_year -= 1900; /* struct tm expects number of years since 1900 */ tm.tm_year -= 1900; /* struct tm expects number of years since 1900 */
ptr++;
/* Note: sscanf is known to be inconsistent across platforms with respect /* Note: sscanf is known to be inconsistent across platforms with respect
to whether a %n is counted as a return value or not, so we use '<'/'>=' to whether a %n is counted as a return value or not (XXX: Is this
still true, despite the express comments of C99 §7.19.6.2 12?), so we
use '<'/'>='
*/ */
/* XXX: sscanf allows an optional sign indicator before each integer /* XXX: sscanf allows an optional sign indicator before each integer
* converted (whether with %d or %u), so this will convert some bogus * converted (whether with %d or %u), so this will convert some bogus
* strings. Either checking afterwards or doing the whole thing by hand * strings. Either checking afterwards or doing the whole thing by hand
* as with the year above is the only correct way. (strptime certainly * as with the year above is the only correct way. (strptime certainly
* can't handle the no separator "Basic" format.) * can't handle the basic format.)
*/ */
n_scanned = sscanf(ptr, has_separator ? "%2u-%2u%n" : "%2u%2u%n", n_scanned = sscanf(ptr, has_separator ? "%2u-%2u%n" : "%2u%2u%n",
&tm.tm_mon, &tm.tm_mon,
@ -357,13 +379,14 @@ iso8601_to_nstime(nstime_t *nstime, const char *ptr)
if (*ptr == 'T' || *ptr == ' ') { if (*ptr == 'T' || *ptr == ' ') {
/* The 'T' between date and time is optional if the meaning is /* The 'T' between date and time is optional if the meaning is
unambiguous. We also allow for ' ' here to support formats unambiguous. We also allow for ' ' here per RFC 3339 to support
such as editcap's -A/-B options */ formats such as editcap's -A/-B options. */
ptr++; ptr++;
} }
else { else if (has_separator) {
/* For now we require the separator to remove ambiguity; /* Allow no separator between date and time iff we have no
remove this entire 'else' when we wish to change that */ separator between units. (Some extended formats may negotiate
no separator here, so this could be changed.) */
return 0; return 0;
} }
@ -420,12 +443,14 @@ iso8601_to_nstime(nstime_t *nstime, const char *ptr)
} }
} }
else { else {
/* No seconds. ISO 8601 allows decimal fractions of a minute here,
* but that's pretty rare in practice. Could be added later if needed.
*/
tm.tm_sec = 0; tm.tm_sec = 0;
} }
/* Validate what we got so far. mktime() doesn't care about strange /* Validate what we got so far. mktime() doesn't care about strange
values (and we use this to our advantage when calculating the values but we should at least start with something valid */
time zone offset) but we should at least start with something valid */
if (!tm_is_valid(&tm)) { if (!tm_is_valid(&tm)) {
return 0; return 0;
} }
@ -471,6 +496,12 @@ iso8601_to_nstime(nstime_t *nstime, const char *ptr)
if (sign == '+') { if (sign == '+') {
nstime->secs += (off_hr * 3600) + (off_min * 60); nstime->secs += (off_hr * 3600) + (off_min * 60);
} else if (sign == '-') { } else if (sign == '-') {
/* -00:00 is illegal according to ISO 8601, but RFC 3339 allows
* it under a convention where -00:00 means "time in UTC is known,
* local timezone is unknown." This has the same value as an
* offset of Z or +00:00, but semantically implies that UTC is
* not the preferred time zone, which is immaterial to us.
*/
nstime->secs -= ((-off_hr) * 3600) + (off_min * 60); nstime->secs -= ((-off_hr) * 3600) + (off_min * 60);
} }
} }

View File

@ -123,10 +123,16 @@ WS_DLL_PUBLIC gboolean filetime_to_nstime(nstime_t *nstime, guint64 filetime);
FALSE on failure */ FALSE on failure */
WS_DLL_PUBLIC gboolean nsfiletime_to_nstime(nstime_t *nstime, guint64 nsfiletime); WS_DLL_PUBLIC gboolean nsfiletime_to_nstime(nstime_t *nstime, guint64 nsfiletime);
typedef enum {
ISO8601_DATETIME, /** e.g. 2014-07-04T12:34:56.789+00:00 */
ISO8601_DATETIME_BASIC, /** ISO8601 Basic format, i.e. no - : separators */
ISO8601_DATETIME_AUTO, /** Autodetect the presence of separators */
} iso8601_fmt_e;
/** parse an ISO 8601 format datetime string to nstime, returns number of /** parse an ISO 8601 format datetime string to nstime, returns number of
chars parsed on success, 0 on failure. chars parsed on success, 0 on failure.
Note that nstime is set to unset in the case of failure */ Note that nstime is set to unset in the case of failure */
WS_DLL_PUBLIC guint8 iso8601_to_nstime(nstime_t *nstime, const char *ptr); WS_DLL_PUBLIC guint8 iso8601_to_nstime(nstime_t *nstime, const char *ptr, iso8601_fmt_e format);
/** parse an Unix epoch timestamp format datetime string to nstime, returns /** parse an Unix epoch timestamp format datetime string to nstime, returns
number of chars parsed on success, 0 on failure. number of chars parsed on success, 0 on failure.