aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeroen van Rijn <Kelimion@users.noreply.github.com>2024-05-24 23:56:20 +0200
committerGitHub <noreply@github.com>2024-05-24 23:56:20 +0200
commitfb22c59d1b3b99be4e9f1774201174fa4b039cb5 (patch)
treeaee0aed3c0f480a6a6e081f0ceaec33cff58dc76
parente1c4b9b06a3beea4ce0e76f937f58cb6c121c5d8 (diff)
parentb945e3e708b41b456bec99b48c4b5aeab16ea53e (diff)
Merge pull request #3625 from Kelimion/iso8061
Add dedicated ISO 8601 parser.
-rw-r--r--core/time/iso8061.odin113
-rw-r--r--tests/core/time/test_core_time.odin71
2 files changed, 180 insertions, 4 deletions
diff --git a/core/time/iso8061.odin b/core/time/iso8061.odin
new file mode 100644
index 000000000..528e0b00a
--- /dev/null
+++ b/core/time/iso8061.odin
@@ -0,0 +1,113 @@
+package time
+// Parsing ISO 8601 date/time strings into time.Time.
+
+import dt "core:time/datetime"
+
+// Parses an ISO 8601 string and returns Time in UTC, with any UTC offset applied to it.
+// Only 4-digit years are accepted.
+// Optional pointer to boolean `is_leap` will return `true` if the moment was a leap second.
+// Leap seconds are smeared into 23:59:59.
+iso8601_to_time_utc :: proc(iso_datetime: string, is_leap: ^bool = nil) -> (res: Time, consumed: int) {
+ offset: int
+
+ res, offset, consumed = iso8601_to_time_and_offset(iso_datetime, is_leap)
+ res._nsec += (i64(-offset) * i64(Minute))
+ return res, consumed
+}
+
+// Parses an ISO 8601 string and returns Time and a UTC offset in minutes.
+// e.g. 1985-04-12T23:20:50.52Z
+// Note: Only 4-digit years are accepted.
+// Optional pointer to boolean `is_leap` will return `true` if the moment was a leap second.
+// Leap seconds are smeared into 23:59:59.
+iso8601_to_time_and_offset :: proc(iso_datetime: string, is_leap: ^bool = nil) -> (res: Time, utc_offset: int, consumed: int) {
+ moment, offset, leap_second, count := iso8601_to_components(iso_datetime)
+ if count == 0 {
+ return
+ }
+
+ if is_leap != nil {
+ is_leap^ = leap_second
+ }
+
+ if _res, ok := datetime_to_time(moment.year, moment.month, moment.day, moment.hour, moment.minute, moment.second, moment.nano); !ok {
+ return {}, 0, 0
+ } else {
+ return _res, offset, count
+ }
+}
+
+// Parses an ISO 8601 string and returns Time and a UTC offset in minutes.
+// e.g. 1985-04-12T23:20:50.52Z
+// Performs no validation on whether components are valid, e.g. it'll return hour = 25 if that's what it's given
+iso8601_to_components :: proc(iso_datetime: string) -> (res: dt.DateTime, utc_offset: int, is_leap: bool, consumed: int) {
+ moment, offset, count, leap_second, ok := _iso8601_to_components(iso_datetime)
+ if !ok {
+ return
+ }
+ return moment, offset, leap_second, count
+}
+
+// Parses an ISO 8601 string and returns datetime.DateTime.
+// Performs no validation on whether components are valid, e.g. it'll return hour = 25 if that's what it's given
+@(private)
+_iso8601_to_components :: proc(iso_datetime: string) -> (res: dt.DateTime, utc_offset: int, consumed: int, is_leap: bool, ok: bool) {
+ // A compliant date is at minimum 20 characters long, e.g. YYYY-MM-DDThh:mm:ssZ
+ (len(iso_datetime) >= 20) or_return
+
+ // Scan and eat YYYY-MM-DD[Tt], then scan and eat HH:MM:SS, leave separator
+ year := scan_digits(iso_datetime[0:], "-", 4) or_return
+ month := scan_digits(iso_datetime[5:], "-", 2) or_return
+ day := scan_digits(iso_datetime[8:], "Tt ", 2) or_return
+ hour := scan_digits(iso_datetime[11:], ":", 2) or_return
+ minute := scan_digits(iso_datetime[14:], ":", 2) or_return
+ second := scan_digits(iso_datetime[17:], "", 2) or_return
+ nanos := 0
+ count := 19
+
+ // Scan fractional seconds
+ if iso_datetime[count] == '.' {
+ count += 1 // consume '.'
+ multiplier := 100_000_000
+ for digit in iso_datetime[count:] {
+ if multiplier >= 1 && int(digit) >= '0' && int(digit) <= '9' {
+ nanos += int(digit - '0') * multiplier
+ multiplier /= 10
+ count += 1
+ } else {
+ break
+ }
+ }
+ }
+
+ // Leap second handling
+ if minute == 59 && second == 60 {
+ second = 59
+ is_leap = true
+ }
+
+ err: dt.Error
+ if res, err = dt.components_to_datetime(year, month, day, hour, minute, second, nanos); err != .None {
+ return {}, 0, 0, false, false
+ }
+
+ if len(iso_datetime[count:]) == 0 {
+ return res, utc_offset, count, is_leap, true
+ }
+
+ // Scan UTC offset
+ switch iso_datetime[count] {
+ case 'Z', 'z':
+ utc_offset = 0
+ count += 1
+ case '+', '-':
+ (len(iso_datetime[count:]) >= 6) or_return
+ offset_hour := scan_digits(iso_datetime[count+1:], ":", 2) or_return
+ offset_minute := scan_digits(iso_datetime[count+4:], "", 2) or_return
+
+ utc_offset = 60 * offset_hour + offset_minute
+ utc_offset *= -1 if iso_datetime[count] == '-' else 1
+ count += 6
+ }
+ return res, utc_offset, count, is_leap, true
+} \ No newline at end of file
diff --git a/tests/core/time/test_core_time.odin b/tests/core/time/test_core_time.odin
index 1f936e4a7..c6c6869a7 100644
--- a/tests/core/time/test_core_time.odin
+++ b/tests/core/time/test_core_time.odin
@@ -42,6 +42,7 @@ main :: proc() {
test_ordinal_date_roundtrip(&t)
test_component_to_time_roundtrip(&t)
test_parse_rfc3339_string(&t)
+ test_parse_iso8601_string(&t)
for _, leak in track.allocation_map {
expect(&t, false, fmt.tprintf("%v leaked %m\n", leak.location, leak.size))
@@ -91,14 +92,49 @@ RFC3339_Test :: struct{
// These are based on RFC 3339's examples, see https://www.rfc-editor.org/rfc/rfc3339#page-10
rfc3339_tests :: []RFC3339_Test{
// This represents 20 minutes and 50.52 seconds after the 23rd hour of April 12th, 1985 in UTC.
- {"1985-04-12T23:20:50.52Z", {482196050520000000}, true, 0, 23, false},
- {"1985-04-12t23:20:50.52Z", {482196050520000000}, true, 0, 23, false},
{"1985-04-12 23:20:50.52Z", {482196050520000000}, true, 0, 23, false},
// Same, but lowercase z
{"1985-04-12 23:20:50.52z", {482196050520000000}, true, 0, 23, false},
// This represents 39 minutes and 57 seconds after the 16th hour of December 19th, 1996 with an offset of -08:00 from UTC (Pacific Standard Time).
// Note that this is equivalent to 1996-12-20T00:39:57Z in UTC.
+ {"1996-12-19 16:39:57-08:00", {851013597000000000}, false, -480, 25, false},
+ {"1996-12-19 16:39:57-08:00", {851042397000000000}, true, 0, 25, false},
+ {"1996-12-20 00:39:57Z", {851042397000000000}, false, 0, 20, false},
+
+ // This represents the leap second inserted at the end of 1990.
+ // It'll be represented as 1990-12-31 23:59:59 UTC after parsing, and `is_leap` will be set to `true`.
+ {"1990-12-31 23:59:60Z", {662687999000000000}, true, 0, 20, true},
+
+ // This represents the same leap second in Pacific Standard Time, 8 hours behind UTC.
+ {"1990-12-31 15:59:60-08:00", {662687999000000000}, true, 0, 25, true},
+
+ // This represents the same instant of time as noon, January 1, 1937, Netherlands time.
+ // Standard time in the Netherlands was exactly 19 minutes and 32.13 seconds ahead of UTC by law
+ // from 1909-05-01 through 1937-06-30. This time zone cannot be represented exactly using the
+ // HH:MM format, and this timestamp uses the closest representable UTC offset.
+ {"1937-01-01 12:00:27.87+00:20", {-1041335972130000000}, false, 20, 28, false},
+ {"1937-01-01 12:00:27.87+00:20", {-1041337172130000000}, true, 0, 28, false},
+}
+
+ISO8601_Test :: struct{
+ iso_8601: string,
+ datetime: time.Time,
+ apply_offset: bool,
+ utc_offset: int,
+ consumed: int,
+ is_leap: bool,
+}
+
+// These are based on RFC 3339's examples, see https://www.rfc-editor.org/rfc/rfc3339#page-10
+iso8601_tests :: []ISO8601_Test{
+ // This represents 20 minutes and .003362 seconds after the 23rd hour of April 12th, 1985 in UTC.
+ {"1985-04-12T23:20:50.003362", {482196050003362000}, true, 0, 26, false},
+ {"1985-04-12t23:20:50.003362", {482196050003362000}, true, 0, 26, false},
+ {"1985-04-12 23:20:50.003362", {482196050003362000}, true, 0, 26, false},
+
+ // This represents 39 minutes and 57 seconds after the 16th hour of December 19th, 1996 with an offset of -08:00 from UTC (Pacific Standard Time).
+ // Note that this is equivalent to 1996-12-20T00:39:57Z in UTC.
{"1996-12-19T16:39:57-08:00", {851013597000000000}, false, -480, 25, false},
{"1996-12-19T16:39:57-08:00", {851042397000000000}, true, 0, 25, false},
{"1996-12-20T00:39:57Z", {851042397000000000}, false, 0, 20, false},
@@ -114,8 +150,8 @@ rfc3339_tests :: []RFC3339_Test{
// Standard time in the Netherlands was exactly 19 minutes and 32.13 seconds ahead of UTC by law
// from 1909-05-01 through 1937-06-30. This time zone cannot be represented exactly using the
// HH:MM format, and this timestamp uses the closest representable UTC offset.
- {"1937-01-01T12:00:27.87+00:20", {-1041335972130000000}, false, 20, 28, false},
- {"1937-01-01T12:00:27.87+00:20", {-1041337172130000000}, true, 0, 28, false},
+ {"1937-01-01 12:00:27.87+00:20", {-1041335972130000000}, false, 20, 28, false},
+ {"1937-01-01 12:00:27.87+00:20", {-1041337172130000000}, true, 0, 28, false},
}
@test
@@ -145,6 +181,33 @@ test_parse_rfc3339_string :: proc(t: ^testing.T) {
}
}
+@test
+test_parse_iso8601_string :: proc(t: ^testing.T) {
+ for test in iso8601_tests {
+ is_leap := false
+ if test.apply_offset {
+ res, consumed := time.iso8601_to_time_utc(test.iso_8601, &is_leap)
+ msg := fmt.tprintf("[apply offet] Parsing failed: %v -> %v (nsec: %v). Expected %v consumed, got %v", test.iso_8601, res, res._nsec, test.consumed, consumed)
+ expect(t, test.consumed == consumed, msg)
+
+ if test.consumed == consumed {
+ expect(t, test.datetime == res, fmt.tprintf("Time didn't match. Expected %v (%v), got %v (%v)", test.datetime, test.datetime._nsec, res, res._nsec))
+ expect(t, test.is_leap == is_leap, "Expected a leap second, got none.")
+ }
+ } else {
+ res, offset, consumed := time.iso8601_to_time_and_offset(test.iso_8601)
+ msg := fmt.tprintf("Parsing failed: %v -> %v (nsec: %v), offset: %v. Expected %v consumed, got %v", test.iso_8601, res, res._nsec, offset, test.consumed, consumed)
+ expect(t, test.consumed == consumed, msg)
+
+ if test.consumed == consumed {
+ expect(t, test.datetime == res, fmt.tprintf("Time didn't match. Expected %v (%v), got %v (%v)", test.datetime, test.datetime._nsec, res, res._nsec))
+ expect(t, test.utc_offset == offset, fmt.tprintf("UTC offset didn't match. Expected %v, got %v", test.utc_offset, offset))
+ expect(t, test.is_leap == is_leap, "Expected a leap second, got none.")
+ }
+ }
+ }
+}
+
MONTH_DAYS := []int{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}
YEAR_START :: 1900
YEAR_END :: 2024