Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve XmlSerializationWriter.WriteTypedPrimitive #76436

Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
0010a7b
Introduce TryFormats for almost all primitive types
TrayanZapryanov Sep 29, 2022
7a773e1
Use primitive char buffer in XmlSerializationWriter
TrayanZapryanov Sep 29, 2022
dcc6180
Fix char cast
TrayanZapryanov Sep 30, 2022
f0558e4
Add tests for different types
TrayanZapryanov Sep 30, 2022
0f1db0d
Add byte type
TrayanZapryanov Sep 30, 2022
99b2169
Address feedback
TrayanZapryanov Oct 24, 2022
6d5f5f6
Fix tests
TrayanZapryanov Oct 25, 2022
a51e952
remove using
TrayanZapryanov Oct 25, 2022
b7e9b9d
Increase duration char buffer size as it is not enough for TimeSpan.M…
TrayanZapryanov Oct 25, 2022
7a1a1dc
Address feedback
TrayanZapryanov Nov 2, 2022
2c90324
Merge branch 'dotnet:main' into improve_xmlserializatiowriter_writety…
TrayanZapryanov Nov 10, 2022
ed66817
Added assert if we cannot format primitive value to the suppiled buffer
TrayanZapryanov Nov 23, 2022
edef33e
Merge branch 'improve_xmlserializatiowriter_writetypedprimitive' of h…
TrayanZapryanov Nov 23, 2022
f3ca604
Lazy create primitives buffer
TrayanZapryanov Nov 24, 2022
7125e17
Merge branch 'dotnet:main' into improve_xmlserializatiowriter_writety…
TrayanZapryanov Dec 16, 2022
6a3b784
Address new feadback
TrayanZapryanov Jan 22, 2023
80f2007
Merge branch 'improve_xmlserializatiowriter_writetypedprimitive' of h…
TrayanZapryanov Jan 22, 2023
f4f5a9e
Resolve feedback
TrayanZapryanov Jan 24, 2023
2b9e4ee
Merge branch 'dotnet:main' into improve_xmlserializatiowriter_writety…
TrayanZapryanov Jan 24, 2023
b8582e1
Optimize float and double TryFormat
TrayanZapryanov Jan 25, 2023
12a51de
Replace ArrayPool renting with Interlocked. Fix Debug.Assert
TrayanZapryanov Feb 4, 2023
c5bf23b
Do not expect concurrency when using primitives buffer
TrayanZapryanov Mar 24, 2023
23bf387
Merge branch 'dotnet:main' into improve_xmlserializatiowriter_writety…
TrayanZapryanov Mar 30, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,8 @@ private enum XsdDateTimeKind
private static readonly int[] DaysToMonth366 = {
0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366};

private const int CharStackBufferSize = 64;

/// <summary>
/// Constructs an XsdDateTime from a string using specific format.
/// </summary>
Expand Down Expand Up @@ -495,7 +497,17 @@ public static implicit operator DateTimeOffset(XsdDateTime xdt)
/// </summary>
public override string ToString()
{
var vsb = new ValueStringBuilder(stackalloc char[64]);
Span<char> destination = stackalloc char[CharStackBufferSize];
TrayanZapryanov marked this conversation as resolved.
Show resolved Hide resolved
bool success = TryFormat(destination, out int charsWritten);
Debug.Assert(success);

return destination.Slice(0, charsWritten).ToString();
}

public bool TryFormat(Span<char> destination, out int charsWritten)
{
var vsb = new ValueStringBuilder(destination);

switch (InternalTypeCode)
{
case DateTimeTypeCode.DateTime:
Expand Down Expand Up @@ -534,7 +546,9 @@ public override string ToString()
break;
}
PrintZone(ref vsb);
return vsb.ToString();

charsWritten = vsb.Length;
return destination.Length >= vsb.Length;
}

// Serialize year, month and day
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ internal struct XsdDuration
private uint _nanoseconds; // High bit is used to indicate whether duration is negative

private const uint NegativeBit = 0x80000000;
private const int CharStackBufferSize = 32;

private enum Parts
{
Expand Down Expand Up @@ -340,7 +341,16 @@ public override string ToString()
/// </summary>
internal string ToString(DurationType durationType)
{
var vsb = new ValueStringBuilder(stackalloc char[20]);
Span<char> destination = stackalloc char[CharStackBufferSize];
bool success = TryFormat(destination, out int charsWritten, durationType);
Debug.Assert(success);

return destination.Slice(0, charsWritten).ToString();
}

public bool TryFormat(Span<char> destination, out int charsWritten, DurationType durationType = DurationType.Duration)
{
var vsb = new ValueStringBuilder(destination);
int nanoseconds, digit, zeroIdx, len;

if (IsNegative)
Expand Down Expand Up @@ -410,7 +420,9 @@ internal string ToString(DurationType durationType)
}

vsb.EnsureCapacity(zeroIdx + 1);
vsb.Append(tmpSpan.Slice(0, zeroIdx - len + 1));
int nanoSpanLength = zeroIdx - len + 1;
bool successCopy = tmpSpan[..nanoSpanLength].TryCopyTo(vsb.AppendSpan(nanoSpanLength));
Debug.Assert(successCopy);
}
vsb.Append('S');
}
Expand All @@ -427,7 +439,8 @@ internal string ToString(DurationType durationType)
vsb.Append("0M");
}

return vsb.ToString();
charsWritten = vsb.Length;
return destination.Length >= vsb.Length;
}

internal static Exception? TryParse(string s, out XsdDuration result)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ public abstract class XmlSerializationWriter : XmlSerializationGeneratedCode
private bool _soap12;
private bool _escapeName = true;

//char buffer for serializing primitive values
private char[]? _primitivesBuffer;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As @mconnew mentioned, it should be safe to keep one buffer alongside the reference to a single instance of XmlWriter (aka, _w on line 27). Calling into here simultaneously would already be asking for trouble racing to the _w.Write*() calls at the end of the method.

Suggested change
private char[]? _primitivesBuffer;
private char[] _primitivesBuffer = new char[128];

And then use this buffer through the rest of WriteTypedPrimitive without the need for any interlocked exchange.

Copy link
Contributor Author

@TrayanZapryanov TrayanZapryanov Mar 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@StephenMolloy, @mconnew
I will remove it, but have two questions:

  • char[128] might be too big - Do you see any primitive type that cannot fit in char[64] for example?
  • If you check next changes, we will need same buffer in other classes that inherits it. Should we make it protected with this PR or later?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have following stopper if we cannot format some value in this buffer:
image

Maybe this is enough to experiment with 64, or if you prefer I can replace Debug.Assert with throw exception

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay - I set it to char[64]


// this method must be called before any generated serialization methods are called
internal void Init(XmlWriter w, XmlSerializerNamespaces? namespaces, string? encodingStyle, string? idBase)
{
Expand Down Expand Up @@ -120,6 +123,11 @@ protected static string FromDateTime(DateTime value)
return XmlCustomFormatter.FromDateTime(value);
}

internal static bool TryFormatDateTime(DateTime value, Span<char> destination, out int charsWritten)
{
return XmlCustomFormatter.TryFormatDateTime(value, destination, out charsWritten);
}

protected static string FromDate(DateTime value)
{
return XmlCustomFormatter.FromDate(value);
Expand Down Expand Up @@ -246,14 +254,17 @@ private XmlQualifiedName GetPrimitiveTypeName(Type type)
[RequiresUnreferencedCode(XmlSerializer.TrimSerializationWarning)]
protected void WriteTypedPrimitive(string? name, string? ns, object o, bool xsiType)
{
string? value;
string? value = null;
string type;
string typeNs = XmlSchema.Namespace;
bool writeRaw = true;
bool writeDirect = false;
Type t = o.GetType();
bool wroteStartElement = false;
bool? tryFormatResult = null;
int charsWritten = -1;

char[] buffer = _primitivesBuffer ??= new char[128];
switch (Type.GetTypeCode(t))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would ArrayPool<char>.Shared.Rent(128); and returning at the end be better? I think this class can be used by multiple threads simultaneously if you have more than 1 thread serializaing concurrently using the same serializer instance. This shared buffer will become corrupted in that case.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mconnew You are right for synchronization problems and I followed your suggestion and used ArrayPool.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mconnew Unfortunately this renting decreased performance even worse than before. Here are samples using benchmark from here : dotnet/performance#2623

Before:

Method Mean Error StdDev Median Min Max Gen0 Allocated
AddPrimitives 1.878 us 0.0175 us 0.0163 us 1.876 us 1.857 us 1.908 us 0.0900 792 B

After:

Method Mean Error StdDev Median Min Max Gen0 Allocated
AddPrimitives 2.192 μs 0.0433 μs 0.0384 μs 2.177 μs 2.155 μs 2.275 μs 0.0400 360 B

There is definitely win in memory, but regression in execution.
I've looked at where XmlSerializationWriter is instantiated and found only here :

public void Serialize(XmlWriter xmlWriter, object? o, XmlSerializerNamespaces? namespaces, string? encodingStyle, string? id)
.
Looks like it always creates new instance and there is method "Init" which also implies that this class have state and cannot be used concurrently.
Do you remember where this class is used concurrently ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the same benchmark, but with previous implementation

Method Mean Error StdDev Median Min Max Gen0 Allocated
AddPrimitives 1.906 us 0.0209 us 0.0185 us 1.908 us 1.855 us 1.936 us 0.0400 360 B

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When using a pre-generated serializer, it provides an instance derived from XmlSerializationWriter which I think is cached and reused with multiple calls to Serialize. There's also the reflection based serialization used when it's using SOAP mapping which might not use a new instance each time. The allocation is small, what's the cost of allocating it fresh each time? You could also implement it in such a way that it's optimal for single usage. Eg have a byte[] field and use Interlocked.Exchange replacing it with null to "rent" the buffer. If another thread is using it, you will get a null back from the exchange so you create a new array locally. When you are done with it, use Interlocked.Exchange to put it back. This avoids the overhead of renting, will have zero allocation in the common case, and degrades to something no worse than it was originally.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mconnew I like your idea and pushed new commit with it. Could you check again ?
Also results now are better then before:

Method Mean Error StdDev Median Min Max Gen0 Allocated
AddPrimitives 1.969 us 0.0351 us 0.0293 us 1.962 us 1.942 us 2.047 us 0.0400 360 B

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mconnew One more question. I create char[128] which might be too big. Any suggestion here? When I execute tests from Performance repo I see increase of bytes allocated(maximum 8), but I really hoped to see some reduction :(.
Should I increase code complexity by starting with lower buffer like char[64] and then to inspect TryFormat methods and retry with bigger buffer, or we can leave this regression for small number of primitives, and have bigger wins when xml produces is much bigger ?
Here are results from XML performance comparison tests:

Statistics

Total: 68
Same: 75.00 %
Slower: 5.88 %
Faster: 10.29 %
Noise: 8.82 %
Unknown: 0.00 %

Statistics per Architecture

Architecture Same Slower Faster Noise Unknown
X64 75.00 % 5.88 % 10.29 % 8.82 % 0.00 %

Statistics per Operating System

Operating System Same Slower Faster Noise Unknown
Windows 10 75.00 % 5.88 % 10.29 % 8.82 % 0.00 %

Statistics per Namespace

Namespace Same Slower Faster Noise Unknown
MicroBenchmarks.Serializers 78.12 % 9.38 % 12.50 % 0.00 % 0.00 %
Microsoft.Extensions.Configuration.Xml 75.00 % 0.00 % 25.00 % 0.00 % 0.00 %
System.Xml.Linq 73.68 % 0.00 % 0.00 % 26.32 % 0.00 %
System.Xml.Tests 66.67 % 16.67 % 16.67 % 0.00 % 0.00 %
XmlDocumentTests.XmlDocumentTests 66.67 % 0.00 % 33.33 % 0.00 % 0.00 %
XmlDocumentTests.XmlNodeListTests 100.00 % 0.00 % 0.00 % 0.00 % 0.00 %
XmlDocumentTests.XmlNodeTests 50.00 % 0.00 % 0.00 % 50.00 % 0.00 %

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have thought of a way to eliminate the allocation. You could use stackalloc to allocate the memory on the stack. The only problem with this is that XmlWriter needs a char[] passed which means you can't assign the stack allocated memory to a Span<char> initially, we need it as an array. marking the method as unsafe. WriteTypedPrimitive is a protected method on an existing public type. I don't know if adding the unsafe keyword is a breaking change. This is easily solved by moving the implementation to an inner private method which is marked unsafe and calling that from this method. So something like this:

protected void WriteTypedPrimitive(string? name, string? ns, object o, bool xsiType)
{
    UnsafeWriteTypedPrimitive(name, ns, o, xsiType);
}

private void UnsafeWriteTypedPrimitive(string? name, string? ns, object o, bool xsiType)
{
    char* buffer = stackalloc char[128];
    Span<char> bufferSpan = buffer;
    // rest of implementation
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mconnew Unfortunately I am not so good with pointer math.
How can I convert char* to char[]?
I cannot see any method of XmlWriter which accepts char*.
In XmlUtf8RawTextWriter I can see that there is iteration using char*, but that's all.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been thinking about this some more and realized multiple methods can't be called on different threads concurrently. The class wraps the XmlWriter (stored in the _w field) which means the thread usage must match that of XmlWriter. And as you can only have one thread at a time trying to write to an XmlWriter, only one thread at a time will be calling methods on this class. It's safe to have a single instance of the buffer which exists for the lifetime of this class.
I tried to work out a way to cast a char* to a char[] and there's no good clean way to do it.

{
case TypeCode.String:
Expand All @@ -262,60 +273,60 @@ protected void WriteTypedPrimitive(string? name, string? ns, object o, bool xsiT
writeRaw = false;
break;
case TypeCode.Int32:
value = XmlConvert.ToString((int)o);
tryFormatResult = XmlConvert.TryFormat((int)o, buffer, out charsWritten);
type = "int";
break;
case TypeCode.Boolean:
value = XmlConvert.ToString((bool)o);
tryFormatResult = XmlConvert.TryFormat((bool)o, buffer, out charsWritten);
type = "boolean";
break;
case TypeCode.Int16:
value = XmlConvert.ToString((short)o);
tryFormatResult = XmlConvert.TryFormat((short)o, buffer, out charsWritten);
type = "short";
break;
case TypeCode.Int64:
value = XmlConvert.ToString((long)o);
tryFormatResult = XmlConvert.TryFormat((long)o, buffer, out charsWritten);
type = "long";
break;
case TypeCode.Single:
value = XmlConvert.ToString((float)o);
tryFormatResult = XmlConvert.TryFormat((float)o, buffer, out charsWritten);
type = "float";
break;
case TypeCode.Double:
value = XmlConvert.ToString((double)o);
tryFormatResult = XmlConvert.TryFormat((double)o, buffer, out charsWritten);
type = "double";
break;
case TypeCode.Decimal:
value = XmlConvert.ToString((decimal)o);
tryFormatResult = XmlConvert.TryFormat((decimal)o, buffer, out charsWritten);
type = "decimal";
break;
case TypeCode.DateTime:
value = FromDateTime((DateTime)o);
tryFormatResult = TryFormatDateTime((DateTime)o, buffer, out charsWritten);
type = "dateTime";
break;
case TypeCode.Char:
value = FromChar((char)o);
tryFormatResult = XmlConvert.TryFormat((ushort)(char)o, buffer, out charsWritten);
type = "char";
typeNs = UrtTypes.Namespace;
break;
case TypeCode.Byte:
value = XmlConvert.ToString((byte)o);
tryFormatResult = XmlConvert.TryFormat((byte)o, buffer, out charsWritten);
type = "unsignedByte";
break;
case TypeCode.SByte:
value = XmlConvert.ToString((sbyte)o);
tryFormatResult = XmlConvert.TryFormat((sbyte)o, buffer, out charsWritten);
type = "byte";
break;
case TypeCode.UInt16:
value = XmlConvert.ToString((ushort)o);
tryFormatResult = XmlConvert.TryFormat((ushort)o, buffer, out charsWritten);
type = "unsignedShort";
break;
case TypeCode.UInt32:
value = XmlConvert.ToString((uint)o);
tryFormatResult = XmlConvert.TryFormat((uint)o, buffer, out charsWritten);
type = "unsignedInt";
break;
case TypeCode.UInt64:
value = XmlConvert.ToString((ulong)o);
tryFormatResult = XmlConvert.TryFormat((ulong)o, buffer, out charsWritten);
type = "unsignedLong";
break;

Expand All @@ -340,19 +351,19 @@ protected void WriteTypedPrimitive(string? name, string? ns, object o, bool xsiT
}
else if (t == typeof(Guid))
{
value = XmlConvert.ToString((Guid)o);
tryFormatResult = XmlConvert.TryFormat((Guid)o, buffer, out charsWritten);
type = "guid";
typeNs = UrtTypes.Namespace;
}
else if (t == typeof(TimeSpan))
{
value = XmlConvert.ToString((TimeSpan)o);
tryFormatResult = XmlConvert.TryFormat((TimeSpan)o, buffer, out charsWritten);
type = "TimeSpan";
typeNs = UrtTypes.Namespace;
}
else if (t == typeof(DateTimeOffset))
{
value = XmlConvert.ToString((DateTimeOffset)o);
tryFormatResult = XmlConvert.TryFormat((DateTimeOffset)o, buffer, out charsWritten);
type = "dateTimeOffset";
typeNs = UrtTypes.Namespace;
}
Expand Down Expand Up @@ -387,21 +398,29 @@ protected void WriteTypedPrimitive(string? name, string? ns, object o, bool xsiT

if (xsiType) WriteXsiType(type, typeNs);

if (value == null)
{
_w.WriteAttributeString("nil", XmlSchema.InstanceNamespace, "true");
}
else if (writeDirect)
if (writeDirect)
{
// only one type currently writes directly to XML stream
XmlCustomFormatter.WriteArrayBase64(_w, (byte[])o, 0, ((byte[])o).Length);
}
else if (writeRaw)
else if (tryFormatResult != null)
{
_w.WriteRaw(value);
Debug.Assert(tryFormatResult.Value, "Something goes wrong with formatting primitives to the buffer.");
//all the primitive types except string and XmlQualifiedName writes to the buffer
_w.WriteChars(buffer, 0, charsWritten);
}
else
_w.WriteString(value);
{
if (value == null)
_w.WriteAttributeString("nil", XmlSchema.InstanceNamespace, "true");
else if (writeRaw)
{
_w.WriteRaw(value);
}
else
_w.WriteString(value);
}

_w.WriteEndElement();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,17 @@ internal static string FromDateTime(DateTime value)
}
}

internal static bool TryFormatDateTime(DateTime value, Span<char> destination, out int charsWritten)
{
if (Mode == DateTimeSerializationSection.DateTimeSerializationMode.Local)
{
return XmlConvert.TryFormat(value, "yyyy-MM-ddTHH:mm:ss.fffffffzzzzzz", destination, out charsWritten);
}

// for mode DateTimeSerializationMode.Roundtrip and DateTimeSerializationMode.Default
return XmlConvert.TryFormat(value, XmlDateTimeSerializationMode.RoundtripKind, destination, out charsWritten);
}

internal static string FromChar(char value)
{
return XmlConvert.ToString((ushort)value);
Expand Down
Loading