From 332c8dcf855144f05589f2b8e0d9ad687d76121b Mon Sep 17 00:00:00 2001 From: Mark Pflug Date: Tue, 23 Apr 2024 14:25:37 -0700 Subject: [PATCH] Add field access validation. (#252) --- docs/Csv/Sylvan.Data.Csv.Releases.md | 5 ++ .../CsvDataReaderTests.cs | 45 ++++++++++++++ source/Sylvan.Data.Csv/CsvDataReader.cs | 59 ++++++++++++++----- source/Sylvan.Data.Csv/Sylvan.Data.Csv.csproj | 2 +- 4 files changed, 96 insertions(+), 15 deletions(-) diff --git a/docs/Csv/Sylvan.Data.Csv.Releases.md b/docs/Csv/Sylvan.Data.Csv.Releases.md index 7197cbf..a402989 100644 --- a/docs/Csv/Sylvan.Data.Csv.Releases.md +++ b/docs/Csv/Sylvan.Data.Csv.Releases.md @@ -1,5 +1,10 @@ # Sylvan.Data.Csv Release Notes +_1.3.8_ +- Fixes an issue with detecting MacOS-style ('\r') line ends in some scenarios. +- Field accessors now throw `InvalidOperationException` when called at inappropriate times. + This might be a behavior breaking change for code that relied on invalid access patterns. + _1.3.7_ - Adds `CsvDataWriterOptions.QuoteStrings` which now allows quoting empty, non-empty, or all strings. This is useful when string data might otherwise be interpreted as a number. This obsoletes QuoteEmptyString option. diff --git a/source/Sylvan.Data.Csv.Tests/CsvDataReaderTests.cs b/source/Sylvan.Data.Csv.Tests/CsvDataReaderTests.cs index 2869c16..4b937db 100644 --- a/source/Sylvan.Data.Csv.Tests/CsvDataReaderTests.cs +++ b/source/Sylvan.Data.Csv.Tests/CsvDataReaderTests.cs @@ -1911,6 +1911,51 @@ public void FinalCharInCellIsEscapeError() Assert.Equal("\\\n", value0); } + [Fact] + public void FieldAccessInvalidState() + { + var data = "a,b\n1,2\n"; + var r = CsvDataReader.Create(new StringReader(data)); + + Assert.Throws(() => r.GetString(0)); + Assert.True(r.Read()); + Assert.Equal("1", r.GetString(0)); + Assert.False(r.Read()); + Assert.Throws(() => r.GetString(0)); + } + + [Fact] + public void FieldAccessInvalidStateAccessors() + { + var data = "a,b\n1,2\n"; + var r = CsvDataReader.Create(new StringReader(data)); + + Assert.Throws(() => r.GetString(0)); + Assert.Throws(() => r.GetInt16(0)); + Assert.Throws(() => r.GetInt32(0)); + Assert.Throws(() => r.GetInt64(0)); + Assert.Throws(() => r.GetFloat(0)); + Assert.Throws(() => r.GetDouble(0)); + Assert.Throws(() => r.GetDateTime(0)); + Assert.Throws(() => r.GetDateTimeOffset(0)); + Assert.Throws(() => r.GetByte(0)); + Assert.Throws(() => r.GetChar(0)); + } + + [Fact] + public void MacOSLineEndDetect() + { + var data = "a,b\r1,2\r"; // weird line ends + var opts = new CsvDataReaderOptions { Delimiter = ',' }; + var r = CsvDataReader.Create(new StringReader(data), opts); + Assert.Equal(2, r.FieldCount); + Assert.True(r.Read()); + Assert.Equal("1", r[0]); + Assert.Equal("2", r[1]); + Assert.False(r.Read()); + } + + [Theory] // These test cases were copied from the Sep parser library. Thanks, Nietras. [InlineData("a", true, "a")] diff --git a/source/Sylvan.Data.Csv/CsvDataReader.cs b/source/Sylvan.Data.Csv/CsvDataReader.cs index fcbe35f..b037862 100644 --- a/source/Sylvan.Data.Csv/CsvDataReader.cs +++ b/source/Sylvan.Data.Csv/CsvDataReader.cs @@ -88,6 +88,14 @@ enum State Closed, } + void ValidateState() + { + if (this.state != State.Open) + { + throw new InvalidOperationException(); + } + } + enum ReadResult { False, @@ -319,7 +327,7 @@ public void Initialize() this.fieldCount = count; for (int i = 0; i < count; i++) { - var name = hasHeaders ? GetString(i) : null; + var name = hasHeaders ? GetStringRaw(i) : null; var columnSchema = schema?.GetColumn(name, i); columns[i] = new CsvColumn(name, i, columnSchema); @@ -1027,6 +1035,7 @@ ReadResult ConsumeLineEnd(char[] buffer, ref int idx) /// public override bool GetBoolean(int ordinal) { + ValidateState(); // four cases: // true and false both not null. Any other value raises error. // true not null, false null. True string true, anything else false. @@ -1088,6 +1097,7 @@ public override bool GetBoolean(int ordinal) /// public override byte GetByte(int ordinal) { + ValidateState(); #if SPAN return byte.Parse(this.GetFieldSpan(ordinal), provider: culture); #else @@ -1098,6 +1108,7 @@ public override byte GetByte(int ordinal) /// public override long GetBytes(int ordinal, long dataOffset, byte[]? buffer, int bufferOffset, int length) { + ValidateState(); if (buffer == null) { return GetBinaryLength(ordinal); @@ -1264,6 +1275,7 @@ static void FromBase64Chars(char[] chars, int charsOffset, int charsLen, byte[] /// public override char GetChar(int ordinal) { + ValidateState(); var s = GetField(ordinal); if (s.length == 1) { @@ -1275,6 +1287,7 @@ public override char GetChar(int ordinal) /// public override long GetChars(int ordinal, long dataOffset, char[]? buffer, int bufferOffset, int length) { + ValidateState(); if (buffer == null) { return this.GetCharLength(ordinal); @@ -1294,18 +1307,18 @@ public override long GetChars(int ordinal, long dataOffset, char[]? buffer, int /// public TimeSpan GetTimeSpan(int ordinal) { + ValidateState(); var format = columns[ordinal].Format; - var style = TimeSpanStyles.None; #if SPAN var span = this.GetFieldSpan(ordinal); - if (format != null && TimeSpan.TryParseExact(span, format, culture, style, out var value)) + if (format != null && TimeSpan.TryParseExact(span, format, culture, TimeSpanStyles.None, out var value)) { return value; } return TimeSpan.Parse(span, culture); #else var str = this.GetString(ordinal); - if (format != null && TimeSpan.TryParseExact(str, format, culture, style, out var value)) + if (format != null && TimeSpan.TryParseExact(str, format, culture, TimeSpanStyles.None, out var value)) { return value; } @@ -1318,8 +1331,8 @@ public TimeSpan GetTimeSpan(int ordinal) /// public DateTimeOffset GetDateTimeOffset(int ordinal) { + ValidateState(); var format = columns[ordinal].Format ?? this.dateTimeFormat; - var style = DateTimeStyles.RoundtripKind; DateTimeOffset value; #if SPAN var span = this.GetFieldSpan(ordinal); @@ -1327,25 +1340,25 @@ public DateTimeOffset GetDateTimeOffset(int ordinal) if (IsoDate.TryParse(span, out value)) return value; - if (format != null && DateTimeOffset.TryParseExact(span, format, culture, style, out value)) + if (format != null && DateTimeOffset.TryParseExact(span, format, culture, DateTimeStyles.RoundtripKind, out value)) { return value; } - return DateTimeOffset.Parse(span, culture, style); + return DateTimeOffset.Parse(span, culture, DateTimeStyles.RoundtripKind); #else var str = this.GetString(ordinal); - if (format != null && DateTimeOffset.TryParseExact(str, format, culture, style, out value)) + if (format != null && DateTimeOffset.TryParseExact(str, format, culture, DateTimeStyles.RoundtripKind, out value)) { return value; } - return DateTimeOffset.Parse(str, culture, style); + return DateTimeOffset.Parse(str, culture, DateTimeStyles.RoundtripKind); #endif } /// public override DateTime GetDateTime(int ordinal) { - var style = DateTimeStyles.RoundtripKind; + ValidateState(); DateTime value; #if SPAN var span = this.GetFieldSpan(ordinal); @@ -1353,26 +1366,27 @@ public override DateTime GetDateTime(int ordinal) return value; var format = columns[ordinal].Format ?? this.dateTimeFormat; - if (format != null && DateTime.TryParseExact(span, format, culture, style, out value)) + if (format != null && DateTime.TryParseExact(span, format, culture, DateTimeStyles.RoundtripKind, out value)) { return value; } - return DateTime.Parse(span, culture, style); + return DateTime.Parse(span, culture, DateTimeStyles.RoundtripKind); #else var dateStr = this.GetString(ordinal); var format = columns[ordinal].Format ?? this.dateTimeFormat ?? "O"; - if (format != null && DateTime.TryParseExact(dateStr, format, culture, style, out value)) + if (format != null && DateTime.TryParseExact(dateStr, format, culture, DateTimeStyles.RoundtripKind, out value)) { return value; } - return DateTime.Parse(dateStr, culture, style); + return DateTime.Parse(dateStr, culture, DateTimeStyles.RoundtripKind); #endif } /// public override decimal GetDecimal(int ordinal) { + ValidateState(); #if SPAN var field = this.GetField(ordinal); return @@ -1386,6 +1400,7 @@ public override decimal GetDecimal(int ordinal) /// public override double GetDouble(int ordinal) { + ValidateState(); #if SPAN var field = this.GetField(ordinal); return @@ -1418,6 +1433,7 @@ public override Type GetFieldType(int ordinal) /// public override float GetFloat(int ordinal) { + ValidateState(); #if SPAN var field = this.GetField(ordinal); return @@ -1431,6 +1447,7 @@ public override float GetFloat(int ordinal) /// public override Guid GetGuid(int ordinal) { + ValidateState(); #if SPAN return Guid.Parse(this.GetFieldSpan(ordinal)); #else @@ -1441,6 +1458,7 @@ public override Guid GetGuid(int ordinal) /// public override short GetInt16(int ordinal) { + ValidateState(); #if SPAN var field = this.GetField(ordinal); return @@ -1454,6 +1472,7 @@ public override short GetInt16(int ordinal) /// public override int GetInt32(int ordinal) { + ValidateState(); #if SPAN var field = this.GetField(ordinal); return @@ -1467,6 +1486,7 @@ public override int GetInt32(int ordinal) /// public override long GetInt64(int ordinal) { + ValidateState(); #if SPAN var field = this.GetField(ordinal); return @@ -1503,6 +1523,12 @@ public override int GetOrdinal(string name) /// public override string GetString(int ordinal) { + ValidateState(); + return GetStringRaw(ordinal); + } + + string GetStringRaw(int ordinal) + { if ((uint)ordinal < (uint)curFieldCount) { var s = GetFieldUnsafe(ordinal); @@ -1744,6 +1770,7 @@ CharSpan PrepareInvalidField(int offset, int len) /// public override object GetValue(int ordinal) { + ValidateState(); var max = Math.Max(this.fieldCount, this.curFieldCount); if (ordinal > max) @@ -1814,6 +1841,7 @@ int GetCharLength(int ordinal) /// public override int GetValues(object[] values) { + ValidateState(); var count = Math.Min(this.fieldCount, values.Length); for (int i = 0; i < count; i++) { @@ -1825,6 +1853,7 @@ public override int GetValues(object[] values) /// public override bool IsDBNull(int ordinal) { + ValidateState(); if (ordinal < 0) throw new ArgumentOutOfRangeException(nameof(ordinal)); if ((uint)ordinal < this.fieldCount) @@ -2012,6 +2041,7 @@ public override object? this[string property] /// A span containing the characters of the field. public ReadOnlySpan GetFieldSpan(int ordinal) { + ValidateState(); var s = GetField(ordinal); return s.ToSpan(); } @@ -2075,6 +2105,7 @@ static IFieldAccessor GetAccessor() /// public override T GetFieldValue(int ordinal) { + ValidateState(); var acc = Accessor.Instance; return acc.GetValue(this, ordinal); } diff --git a/source/Sylvan.Data.Csv/Sylvan.Data.Csv.csproj b/source/Sylvan.Data.Csv/Sylvan.Data.Csv.csproj index 12aef57..33fb643 100644 --- a/source/Sylvan.Data.Csv/Sylvan.Data.Csv.csproj +++ b/source/Sylvan.Data.Csv/Sylvan.Data.Csv.csproj @@ -2,7 +2,7 @@ net6.0;netstandard2.1;netstandard2.0 - 1.3.7 + 1.3.8 A .NET library for reading and writing delimited CSV data. csv;delimited;data;datareader;datawriter;simd enable