Skip to content

Commit

Permalink
RavenDB-23346 Fixing LazyStringParser.TryParseDateTime to take into a…
Browse files Browse the repository at this point in the history
…ccount that trailing zeros can be dropped when using .FFFFFFFK / .FFFK format. Adding support for r format specifier
  • Loading branch information
arekpalinski committed Dec 17, 2024
1 parent 76149a7 commit 2d22e4e
Show file tree
Hide file tree
Showing 3 changed files with 293 additions and 12 deletions.
77 changes: 77 additions & 0 deletions bench/Micro.Benchmark/Benchmarks/Parsing/DateTimeParsing.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
using System.Linq;
using System.Text;
using BenchmarkDotNet.Analysers;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Columns;
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Environments;
using BenchmarkDotNet.Exporters;
using BenchmarkDotNet.Jobs;
using BenchmarkDotNet.Validators;
using Sparrow.Json;

namespace Micro.Benchmark.Benchmarks.Parsing;

[Config(typeof(Config))]
public class DateTimeParsing
{
private class Config : ManualConfig
{
public Config()
{
AddJob(new Job { Environment = { Runtime = CoreRuntime.Core80, Platform = Platform.X64, Jit = Jit.RyuJit, } });

// Exporters for data
AddExporter(GetExporters().ToArray());
// Generate plots using R if %R_HOME% is correctly set
AddExporter(RPlotExporter.Default);

AddColumn(StatisticColumn.AllStatistics);

AddValidator(BaselineValidator.FailOnError);
AddValidator(JitOptimizationsValidator.FailOnError);

AddAnalyser(EnvironmentAnalyser.Default);
}
}

private const int NumberOfOperations = 10000;
private byte[][] _dates;

[GlobalSetup]
public void Setup()
{
_dates =
[
"2016-10-05T21:07:32.2082285Z"u8.ToArray(),
"2016-10-05T21:07:32.2082285+03:00"u8.ToArray(),
"2024-12-13T02:38:42.786481Z"u8.ToArray(),
"2016-10-05T21:07:32"u8.ToArray(),
"2021-12-12T10:34:23.838"u8.ToArray(),
"2021-12-12T10:34:23.838Z"u8.ToArray(),
"2015-10-17T13:28:17-05:00"u8.ToArray()
];
}

[Benchmark(OperationsPerInvoke = NumberOfOperations)]
public unsafe void TryParseDateTimeBenchmark()
{
fixed (byte* buffer1 = _dates[0])
fixed (byte* buffer2 = _dates[1])
fixed (byte* buffer3 = _dates[2])
fixed (byte* buffer4 = _dates[3])
fixed (byte* buffer5 = _dates[4])
fixed (byte* buffer6 = _dates[5])
fixed (byte* buffer7 = _dates[6])
{
var buffers = new[] { buffer1, buffer2, buffer3, buffer4, buffer5, buffer6, buffer7 };

for (int i = 0; i < NumberOfOperations; i++)
{
int index = i % buffers.Length;

LazyStringParser.TryParseDateTime(buffers[index], _dates[index].Length, out var time, out var dto, properlyParseThreeDigitsMilliseconds: true);
}
}
}
}
99 changes: 87 additions & 12 deletions src/Sparrow/Json/LazyStringParser.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
using System;
using System.Diagnostics.CodeAnalysis;
using System.Diagnostics.Contracts;
using System.Runtime.CompilerServices;
using System.Text;
using Sparrow.Utils;

namespace Sparrow.Json
Expand Down Expand Up @@ -683,7 +685,17 @@ public static Result TryParseDateTime(byte* buffer, int len, out DateTime dt, ou

if (buffer[4] != '-' || buffer[7] != '-' || buffer[10] != 'T' ||
buffer[13] != ':' || buffer[16] != ':' || buffer[16] != ':')
goto Failed;
{
if (len != 29 || buffer[3] != ',' || buffer[19] != ':' || buffer[22] != ':')
goto Failed;

// ddd, dd MMM yyyy HH':'mm':'ss 'GMT' - "r" format specifier
if (DateTimeOffset.TryParse(Encoding.UTF8.GetString(buffer, len), out dto))
{
dt = default;
return Result.DateTimeOffset;
}
}

return TryParseDateTimeInternal(buffer, len, out dt, out dto, properlyParseThreeDigitsMilliseconds);

Expand Down Expand Up @@ -727,22 +739,48 @@ private static Result TryParseDateTimeInternal(byte* buffer, int len, out DateTi
goto case 19;
case 19: //"yyyy'-'MM'-'dd'T'HH':'mm':'ss",
goto Finished_DT;
case 24: //"yyyy'-'MM'-'dd'T'HH':'mm':'ss'.'fff'Z'",
if (buffer[23] != 'Z')
case 24: //"yyyy'-'MM'-'dd'T'HH':'mm':'ss'.'fff'Z'" OR "yyyy'-'MM'-'dd'T'HH':'mm':'ss'.'ffff'",
if (buffer[23] == 'Z')
{
kind = DateTimeKind.Utc;
goto case 23;
}
if (buffer[19] != '.')
goto Failed;
kind = DateTimeKind.Utc;
goto case 23;
case 23: //"yyyy'-'MM'-'dd'T'HH':'mm':'ss'.'fff",
if (TryParseNumber4(buffer, 20, out fractions) == false)
goto Failed;
if (properlyParseThreeDigitsMilliseconds)
fractions *= 1000;
goto Finished_DT;
case 23: //"yyyy'-'MM'-'dd'T'HH':'mm':'ss'.'fff" OR "yyyy'-'MM'-'dd'T'HH':'mm':'ss'.'ffZ",
if (buffer[22] == 'Z')
{
kind = DateTimeKind.Utc;
goto case 22;
}
if (buffer[19] != '.')
goto Failed;
if (TryParseNumber3(buffer, 20, out fractions) == false)
goto Failed;
if (properlyParseThreeDigitsMilliseconds)
fractions *= 10000;
goto Finished_DT;
case 25: //"yyyy'-'MM'-'dd'T'HH':'mm':'ss'+'dd':'dd'",
case 25: //"yyyy'-'MM'-'dd'T'HH':'mm':'ss'+'dd':'dd'" OR "yyyy'-'MM'-'dd'T'HH':'mm':'ss'.'fffff" OR "yyyy'-'MM'-'dd'T'HH':'mm':'ss'.'ffffZ"
if (buffer[22] != ':' || (buffer[19] != '+' && buffer[19] != '-'))
goto Failed;
{
if (buffer[24] == 'Z')
{
kind = DateTimeKind.Utc;
goto case 24;
}
if (buffer[19] != '.')
goto Failed;
if (TryParseNumber(buffer + 20, 5, out fractions) == false)
goto Failed;

fractions *= 100;
goto Finished_DT;
}

if (TryParseNumber2(buffer, 20, out int offsetHour) == false)
goto Failed;
Expand All @@ -762,7 +800,12 @@ private static Result TryParseDateTimeInternal(byte* buffer, int len, out DateTi
goto Failed;
kind = DateTimeKind.Utc;
goto case 27;
case 27: //"yyyy'-'MM'-'dd'T'HH':'mm':'ss.fffffff"
case 27: //"yyyy'-'MM'-'dd'T'HH':'mm':'ss.fffffff" OR "yyyy'-'MM'-'dd'T'HH':'mm':'ss.ffffffZ"
if (buffer[26] == 'Z')
{
kind = DateTimeKind.Utc;
goto case 26;
}
if (buffer[19] != '.')
goto Failed;
if (TryParseNumber(buffer + 20, 7, out fractions) == false)
Expand All @@ -788,16 +831,48 @@ private static Result TryParseDateTimeInternal(byte* buffer, int len, out DateTi
dto = new DateTimeOffset(DateToTicks(year, month, day, hour, minute, second, fractions), offset);
result = Result.DateTimeOffset;
goto Finished;
case 22: //"yyyy'-'MM'-'dd'T'HH':'mm':'ss'.'ff" OR //"yyyy'-'MM'-'dd'T'HH':'mm':'ss'.'fZ",
if (buffer[21] == 'Z')
{
kind = DateTimeKind.Utc;
goto case 21;
}
if (buffer[19] != '.')
goto Failed;
if (TryParseNumber2(buffer, 20, out fractions) == false)
goto Failed;
fractions *= 100000;
goto Finished_DT;
case 21: //"yyyy'-'MM'-'dd'T'HH':'mm':'ss'.'f",
if (buffer[19] != '.')
goto Failed;
if (TryParseNumber(buffer + 20, 1, out fractions) == false)
goto Failed;
fractions *= 1000000;
goto Finished_DT;
case 26: //"yyyy'-'MM'-'dd'T'HH':'mm':'ss.ffffff" OR //"yyyy'-'MM'-'dd'T'HH':'mm':'ss.fffffZ"
if (buffer[25] == 'Z')
{
kind = DateTimeKind.Utc;
goto case 25;
}
if (buffer[19] != '.')
goto Failed;
if (TryParseNumber(buffer + 20, 6, out fractions) == false)
goto Failed;

fractions *= 10;
goto Finished_DT;
}

Finished_DT:
Finished_DT:
dt = new DateTime(DateToTicks(year, month, day, hour, minute, second, fractions), kind);
dto = default(DateTimeOffset);

Finished:
Finished:
return result;

Failed:
Failed:
dt = default(DateTime);
dto = default(DateTimeOffset);
result = Result.Failed;
Expand Down
129 changes: 129 additions & 0 deletions test/FastTests/Utils/TimeParsing.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Text;
using FastTests.Voron.FixedSize;
using Sparrow;
using Sparrow.Json;
using Tests.Infrastructure;
using Xunit;
using Xunit.Abstractions;

Expand Down Expand Up @@ -151,7 +154,133 @@ public void TimeOnly(string date, int hh, int mm, int ss, int ms)
}
}

[RavenTheory(RavenTestCategory.Core)]
[InlineData("2024-12-13T02:38:42.786481Z")] // 27
[InlineData("2024-12-13T02:38:42.7864811")] // 27
[InlineData("2024-12-13T02:38:42.78648Z")] //26
[InlineData("2024-12-13T02:38:42.786488")] //26
[InlineData("2024-12-13T02:38:42.7864Z")] //25
[InlineData("2024-12-13T02:38:42.78644")] //25
[InlineData("2024-12-13T02:38:42.786Z")] // 24
[InlineData("2024-12-13T02:38:42.7868")] // 24
[InlineData("2024-12-13T02:38:42.78Z")] //23
[InlineData("2024-12-13T02:38:42.788")] //23
[InlineData("2024-12-13T02:38:42.7Z")] // 22
[InlineData("2024-12-13T02:38:42.77")] // 22
[InlineData("2024-12-13T02:38:42.7")] // 21
[InlineData("2024-12-13T02:38:42Z")]
public void CanParseValidDatesWithTrailingZerosInMillisecondsPart(string dt)
{
var expected = DateTime.ParseExact(dt, DefaultFormat.DateTimeFormatsToRead, CultureInfo.InvariantCulture,
DateTimeStyles.RoundtripKind);

var bytes = Encoding.UTF8.GetBytes(dt);
fixed (byte* buffer = bytes)
{
Assert.Equal(LazyStringParser.Result.DateTime,
LazyStringParser.TryParseDateTime(buffer, bytes.Length, out DateTime time, out _, properlyParseThreeDigitsMilliseconds: true));

Assert.Equal(expected.Kind, time.Kind);
Assert.Equal(expected, time);
}
}

[RavenTheory(RavenTestCategory.Core)]
[InlineData("2024-12-13T02:38:42.7864810Z")]
[InlineData("2024-12-13T02:38:42.7864800Z")]
[InlineData("2024-12-13T02:38:42.7864000Z")]
[InlineData("2024-12-13T02:38:42.7860000Z")]
[InlineData("2024-12-13T02:38:42.7800000Z")]
[InlineData("2024-12-13T02:38:42.7000000Z")]
[InlineData("2024-12-13T02:38:42.0000000Z")]
public void CanParseValidUtcDatesWithTrailingZerosInMillisecondsPart_DifferentFormats(string dt)
{
var expected = DateTime.ParseExact(dt, DefaultFormat.DateTimeFormatsToRead, CultureInfo.InvariantCulture,
DateTimeStyles.RoundtripKind);

var formatsToRead = new Dictionary<string, DateTimeKind>
{
{DefaultFormat.DateTimeFormatsToRead[0], DateTimeKind.Utc},
{DefaultFormat.DateTimeFormatsToRead[3], DateTimeKind.Utc},
{DefaultFormat.DateTimeFormatsToRead[5], DateTimeKind.Utc},
{DefaultFormat.DateTimeFormatsToRead[6], DateTimeKind.Utc},
};

foreach (var dateTimeFormat in formatsToRead)
{
string tested = expected.ToString(dateTimeFormat.Key);

var expectedAfterFormatting = DateTime.ParseExact(tested, DefaultFormat.DateTimeFormatsToRead, CultureInfo.InvariantCulture,
DateTimeStyles.RoundtripKind);

var bytes = Encoding.UTF8.GetBytes(tested);
fixed (byte* buffer = bytes)
{
Assert.Equal(LazyStringParser.Result.DateTime,
LazyStringParser.TryParseDateTime(buffer, bytes.Length, out DateTime time, out _, properlyParseThreeDigitsMilliseconds: true));
Assert.Equal(expectedAfterFormatting, time);
}
}
}

[RavenTheory(RavenTestCategory.Core)]
[InlineDataWithRandomSeed]
public void CanParseValidRandomDate(int seed)
{
var r = new Random(seed);

var dt = GetRandomDate(r);

var formatsToRead = new Dictionary<string, DateTimeKind>
{
{DefaultFormat.DateTimeFormatsToRead[0], DateTimeKind.Utc},
{DefaultFormat.DateTimeFormatsToRead[1], DateTimeKind.Unspecified},
{DefaultFormat.DateTimeFormatsToRead[2], DateTimeKind.Local},
{DefaultFormat.DateTimeFormatsToRead[3], DateTimeKind.Utc},
{DefaultFormat.DateTimeFormatsToRead[4], DateTimeKind.Unspecified},
{DefaultFormat.DateTimeFormatsToRead[5], DateTimeKind.Utc},
{DefaultFormat.DateTimeFormatsToRead[6], DateTimeKind.Utc},
};

Assert.Equal(formatsToRead.Count, DefaultFormat.DateTimeFormatsToRead.Length);

foreach (var dateTimeFormat in formatsToRead)
{
string tested = dt.ToString(dateTimeFormat.Key);

var bytes = Encoding.UTF8.GetBytes(tested);
fixed (byte* buffer = bytes)
{
var parseResult = LazyStringParser.TryParseDateTime(buffer, bytes.Length, out var dateTime, out var dateTimeOffset, properlyParseThreeDigitsMilliseconds: true);

Assert.True(parseResult != LazyStringParser.Result.Failed, $"parseResult: {parseResult}, tested value: {tested}");

switch (parseResult)
{
case LazyStringParser.Result.DateTime:

Assert.Equal(tested, dateTime.ToString(dateTimeFormat.Key));
break;
case LazyStringParser.Result.DateTimeOffset:

Assert.Equal(tested, dateTimeOffset.ToString(dateTimeFormat.Key));
break;
}
}
}
}

private static DateTime GetRandomDate(Random random, int minYear = 1900, int maxYear = 2099)
{
var year = random.Next(minYear, maxYear);
var month = random.Next(1, 12);
var noOfDaysInMonth = DateTime.DaysInMonth(year, month);
var day = random.Next(1, noOfDaysInMonth);

DateTime randomDate = new DateTime(year, month, day);

randomDate = randomDate.AddMilliseconds(random.Next(0, 9999999));
return randomDate;
}
}
}

0 comments on commit 2d22e4e

Please sign in to comment.