Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

String and byte[] converters using partial reads/writes #112129

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/libraries/System.Text.Json/src/System.Text.Json.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ The System.Text.Json library is built-in as part of the shared framework in .NET
<Compile Include="System\Text\Json\Serialization\IJsonOnDeserializing.cs" />
<Compile Include="System\Text\Json\Serialization\IJsonOnSerialized.cs" />
<Compile Include="System\Text\Json\Serialization\IJsonOnSerializing.cs" />
<Compile Include="System\Text\Json\Serialization\JsonHybridResumableConverter.cs" />
<Compile Include="System\Text\Json\Serialization\JsonNumberEnumConverter.cs" />
<Compile Include="System\Text\Json\Serialization\JsonSerializer.Read.Document.cs" />
<Compile Include="System\Text\Json\Serialization\JsonSerializer.Read.Element.cs" />
Expand All @@ -158,6 +159,7 @@ The System.Text.Json library is built-in as part of the shared framework in .NET
<Compile Include="System\Text\Json\Serialization\Metadata\JsonTypeInfoResolverChain.cs" />
<Compile Include="System\Text\Json\Serialization\Metadata\JsonTypeInfoResolverWithAddedModifiers.cs" />
<Compile Include="System\Text\Json\Serialization\Metadata\PropertyRefCacheBuilder.cs" />
<Compile Include="System\Text\Json\Serialization\Metadata\StringTypeInfo.cs" />
<Compile Include="System\Text\Json\Serialization\PolymorphicSerializationState.cs" />
<Compile Include="System\Text\Json\StackHelper.cs" />
<Compile Include="System\Text\Json\ValueQueue.cs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ internal sealed class JsonValuePrimitive<TValue> : JsonValue<TValue>
public JsonValuePrimitive(TValue value, JsonConverter<TValue> converter, JsonNodeOptions? options) : base(value, options)
{
Debug.Assert(TypeIsSupportedPrimitive, $"The type {typeof(TValue)} is not a supported primitive.");
Debug.Assert(converter is { IsInternalConverter: true, ConverterStrategy: ConverterStrategy.Value });
Debug.Assert(converter is { IsInternalConverter: true, ConverterStrategy: ConverterStrategy.SimpleValue or ConverterStrategy.SegmentableValue });

_converter = converter;
_valueKind = DetermineValueKind(value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ public ref partial struct Utf8JsonReader
private SequencePosition _currentPosition;
private readonly ReadOnlySequence<byte> _sequence;

internal bool _hasPartialStringValue;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like with Utf8JsonWriter, couldn't this just use the same special JsonTokenType value to reflect segmented reads?


private readonly bool IsLastSpan => _isFinalBlock && (!_isMultiSegment || _isLastSegment);

internal readonly ReadOnlySequence<byte> OriginalSequence => _sequence;
Expand Down Expand Up @@ -1276,7 +1278,7 @@ private bool ConsumePropertyName()
return true;
}

private bool ConsumeString()
private bool ConsumeString(int offset = 0)
{
Debug.Assert(_buffer.Length >= _consumed + 1);
Debug.Assert(_buffer[_consumed] == JsonConstants.Quote);
Expand All @@ -1288,7 +1290,7 @@ private bool ConsumeString()
// If the first found byte is a quote, we have reached an end of string, and
// can avoid validation.
// Otherwise, in the uncommon case, iterate one character at a time and validate.
int idx = localBuffer.IndexOfQuoteOrAnyControlOrBackSlash();
int idx = localBuffer.Slice(offset).IndexOfQuoteOrAnyControlOrBackSlash() + offset;

if (idx >= 0)
{
Expand All @@ -1300,6 +1302,7 @@ private bool ConsumeString()
ValueIsEscaped = false;
_tokenType = JsonTokenType.String;
_consumed += idx + 2;
_hasPartialStringValue = false;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This flags is introducing a new state for Utf8JsonReader that could break user-defined converters not expecting it. We should make absolutely sure instances setting this flag should never leak via public APIs. I would recommend adding a few debug asserts e.g. in the Read() method checking that the flag is always unset.

return true;
}
else
Expand All @@ -1314,10 +1317,19 @@ private bool ConsumeString()
_bytePositionInLine += localBuffer.Length + 1; // Account for the start quote
ThrowHelper.ThrowJsonReaderException(ref this, ExceptionResource.EndOfStringNotFound);
}

ValueSpan = localBuffer;
ValueIsEscaped = false;
_hasPartialStringValue = true;
return false;
}
}

internal bool ContinueConsumeString()
{
return ConsumeString(ValueSpan.Length);
}

// Found a backslash or control characters which are considered invalid within a string.
// Search through the rest of the string one byte at a time.
// https://tools.ietf.org/html/rfc8259#section-7
Expand Down Expand Up @@ -1367,7 +1379,8 @@ private bool ConsumeStringAndValidate(ReadOnlySpan<byte> data, int idx)
else
{
// We found less than 4 hex digits. Check if there is more data to follow, otherwise throw.
idx = data.Length;
idx += 5;
Debug.Assert(idx > data.Length);
break;
}

Expand All @@ -1390,6 +1403,9 @@ private bool ConsumeStringAndValidate(ReadOnlySpan<byte> data, int idx)
}
_lineNumber = prevLineNumber;
_bytePositionInLine = prevLineBytePosition;
ValueSpan = idx > data.Length ? data.Slice(0, idx - 6) : data;
ValueIsEscaped = true;
_hasPartialStringValue = true;
return false;
}

Expand All @@ -1399,6 +1415,7 @@ private bool ConsumeStringAndValidate(ReadOnlySpan<byte> data, int idx)
ValueIsEscaped = true;
_tokenType = JsonTokenType.String;
_consumed += idx + 2;
_hasPartialStringValue = false;
return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@ internal enum ConverterStrategy : byte
/// <summary>
/// Simple values or user-provided custom converters.
/// </summary>
Value = 0x2,
SimpleValue = 0x2,
/// <summary>
/// Values that can participate in resumable serialization, for example by splitting the value and writing each split segment separately.
/// </summary>
SegmentableValue = 0x4,
/// <summary>
/// Enumerable collections except dictionaries.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ internal sealed override bool OnTryWrite(

if (!jsonPropertyInfo.GetMemberAndWriteJson(obj!, ref state, writer))
{
Debug.Assert(jsonPropertyInfo.EffectiveConverter.ConverterStrategy != ConverterStrategy.Value);
Debug.Assert(jsonPropertyInfo.EffectiveConverter.ConverterStrategy != ConverterStrategy.SimpleValue);
return false;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Buffers.Text;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Text.Json.Schema;

namespace System.Text.Json.Serialization.Converters
{
internal sealed class ByteArrayConverter : JsonConverter<byte[]?>
internal sealed class ByteArrayConverter : JsonHybridResumableConverter<byte[]?>
{
public override byte[]? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
{
if (reader.TokenType == JsonTokenType.Null)
{
return null;
}

return reader.GetBytesFromBase64();
}

Expand All @@ -29,6 +33,176 @@ public override void Write(Utf8JsonWriter writer, byte[]? value, JsonSerializerO
}
}

internal override bool OnTryRead(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options, scoped ref ReadStack state, out byte[]? value)
{
if (state.Current.ObjectState < StackFrameObjectState.CreatedObject)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like with existing resumable converters, there should be a fast path that runs when SupportContinuation is set to false. In this case we can assume that the string has been fully populated and therefore we can read it using the non-segmenting APIs.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is handled in JsonConverter.TryRead where we check ConverterStrategy == ConverterStrategy.SegmentableValue && !state.IsContinuation && !reader._hasPartialStringValue which guarantees that only partially populated buffers will go through this path since fully populated buffers would have reader._hasPartialStringValue == false. It's too late to do the check here since the stack already is pushed at this point so it needs to be in JsonConverter.TryRead or earlier.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

JsonConverter.TryRead is a hot path method and historically changing it has been causing a great deal of performance regressions. I would recommend moving as much of the logic as possible inside the resumable converters themselves. I would also recommend keeping as much consistency as possible with the existing resumable converters, since their code is extremely nontrivial and difficult to follow.

{
// This is the first segment so it can't be the only/last segment since we are on the slow read path.
Debug.Assert(reader._hasPartialStringValue);

state.Current.ObjectState = StackFrameObjectState.ReadElements;

ReadSegment(ref reader, ref state);

value = null;
return false;
}

Debug.Assert(state.Current.ObjectState == StackFrameObjectState.ReadElements);

bool consumedEntireString = reader.ContinueConsumeString();
if (!consumedEntireString && reader.IsFinalBlock)
{
// TODO
throw new Exception();
}

ReadSegment(ref reader, ref state);

if (consumedEntireString)
{
value = GetStringFromChunks(reader, state);
return true;
}

value = null;
return false;

static byte[] GetStringFromChunks(Utf8JsonReader reader, ReadStack state)
{
Debug.Assert(reader.TokenType == JsonTokenType.String);

List<ArraySegment<byte>>? chunks = (List<ArraySegment<byte>>?)state.Current.ReturnValue!;
if (chunks == null)
{
// Nothing escaped, so just use the raw value.
return reader.ValueSpan.ToArray();
}

int totalSize = 0;
foreach (ArraySegment<byte> c in chunks)
{
totalSize += c.Count;
}

byte[] ret = new byte[totalSize];
int idx = 0;
foreach (ArraySegment<byte> c in chunks)
{
c.AsSpan().CopyTo(ret.AsSpan(idx, c.Count));
idx += c.Count;
ArrayPool<byte>.Shared.Return(c.Array!);
}

return ret;
}
}

private static void ReadSegment(ref Utf8JsonReader reader, scoped ref ReadStack state)
{
ReadOnlySpan<byte> newSegment = reader.ValueSpan.Slice(state.Current.PropertyIndex);

if (reader.ValueIsEscaped)
{
int idx = newSegment.IndexOf(JsonConstants.BackSlash);
if (idx >= 0)
{
ReadSegmentEscaped(ref reader, ref state, newSegment, idx);
return;
}
}

state.Current.PropertyIndex = reader.ValueSpan.Length;
}

private static void ReadSegmentEscaped(ref Utf8JsonReader reader, scoped ref ReadStack state, ReadOnlySpan<byte> newSegment, int indexOfFirstCharToEscape)
{
List<ArraySegment<byte>>? chunks = (List<ArraySegment<byte>>?)state.Current.ReturnValue;
if (chunks == null)
{
// First time we are encountering an escaped character.
chunks = new List<ArraySegment<byte>>();
state.Current.ReturnValue = chunks;

// The chunk must include all the segments skipped so far.
indexOfFirstCharToEscape += state.Current.PropertyIndex;
newSegment = reader.ValueSpan;
}

byte[] unescaped = ArrayPool<byte>.Shared.Rent(newSegment.Length);
JsonReaderHelper.Unescape(newSegment, unescaped, indexOfFirstCharToEscape, out int written);
state.Current.PropertyIndex = reader.ValueSpan.Length;

chunks.Add(new ArraySegment<byte>(unescaped, 0, written));
}

internal override bool WriteWithoutStackFrame(Utf8JsonWriter writer, byte[]? value, JsonSerializerOptions options, ref WriteStack state)
{
if (value == null)
{
writer.WriteNullValue();
return true;
}
else if (state.FlushThreshold == 0 || value.Length < state.FlushThreshold)
{
// Fast write for small strings. Note that previous unflushed data may still be in the
// writer but we can let the enclosing container handle the flushing in this case.
writer.WriteBase64StringValue(value);
return true;
}

return WriteWithStackFrame(writer, value, options, ref state);
}

internal override bool OnTryWrite(Utf8JsonWriter writer, byte[]? value, JsonSerializerOptions options, ref WriteStack state)
{
if (!state.Current.ProcessedStartToken)
{
state.Current.ProcessedStartToken = true;
if (ShouldFlush(ref state, writer))
{
return false;
}
}

Debug.Assert(value != null);
bool isFinal = GetNextWriteSegment(value, ref state, out int writeIndex, out int writeLength);
writer.WriteBase64StringSegment(value.AsSpan(writeIndex, writeLength), isFinal);
state.Current.EnumeratorIndex += writeLength;

// We either wrote the entire input or hit the flush threshold.
Debug.Assert(ShouldFlush(ref state, writer) || isFinal);

return isFinal;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool GetNextWriteSegment(byte[] value, ref WriteStack state, out int start, out int length)
{
Debug.Assert(state.Current.EnumeratorIndex >= 0);
Debug.Assert(state.Current.EnumeratorIndex < value.Length);

int writeIndex = state.Current.EnumeratorIndex;

// Write enough to guarantee a flush. Base64 encoding expands the data by 4/3, so we can write less and still hit the threshold,
// but we don't need to be exact because the threshold is set very conservatively.
int writeLength = state.FlushThreshold == 0 ? int.MaxValue : state.FlushThreshold + 1;

// If the input isn't large enough to hit the flush threshold, write the entire input as the final segment.
bool isFinal = false;
int remainingInputBytes = value.Length - state.Current.EnumeratorIndex;
if (remainingInputBytes <= writeLength)
{
writeLength = remainingInputBytes;
isFinal = true;
}

start = writeIndex;
length = writeLength;

return isFinal;
}

internal override JsonSchema? GetSchema(JsonNumberHandling _) => new() { Type = JsonSchemaType.String };
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

namespace System.Text.Json.Serialization.Converters
{
// TODO
internal sealed class ReadOnlyMemoryByteConverter : JsonConverter<ReadOnlyMemory<byte>>
{
public override bool HandleNull => true;
Expand Down
Loading
Loading