-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
String and byte[] converters using partial reads/writes #112129
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,6 +46,8 @@ public ref partial struct Utf8JsonReader | |
private SequencePosition _currentPosition; | ||
private readonly ReadOnlySequence<byte> _sequence; | ||
|
||
internal bool _hasPartialStringValue; | ||
|
||
private readonly bool IsLastSpan => _isFinalBlock && (!_isMultiSegment || _isLastSegment); | ||
|
||
internal readonly ReadOnlySequence<byte> OriginalSequence => _sequence; | ||
|
@@ -1276,7 +1278,7 @@ private bool ConsumePropertyName() | |
return true; | ||
} | ||
|
||
private bool ConsumeString() | ||
private bool ConsumeString(int offset = 0) | ||
{ | ||
Debug.Assert(_buffer.Length >= _consumed + 1); | ||
Debug.Assert(_buffer[_consumed] == JsonConstants.Quote); | ||
|
@@ -1288,7 +1290,7 @@ private bool ConsumeString() | |
// If the first found byte is a quote, we have reached an end of string, and | ||
// can avoid validation. | ||
// Otherwise, in the uncommon case, iterate one character at a time and validate. | ||
int idx = localBuffer.IndexOfQuoteOrAnyControlOrBackSlash(); | ||
int idx = localBuffer.Slice(offset).IndexOfQuoteOrAnyControlOrBackSlash() + offset; | ||
|
||
if (idx >= 0) | ||
{ | ||
|
@@ -1300,6 +1302,7 @@ private bool ConsumeString() | |
ValueIsEscaped = false; | ||
_tokenType = JsonTokenType.String; | ||
_consumed += idx + 2; | ||
_hasPartialStringValue = false; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This flags is introducing a new state for |
||
return true; | ||
} | ||
else | ||
|
@@ -1314,10 +1317,19 @@ private bool ConsumeString() | |
_bytePositionInLine += localBuffer.Length + 1; // Account for the start quote | ||
ThrowHelper.ThrowJsonReaderException(ref this, ExceptionResource.EndOfStringNotFound); | ||
} | ||
|
||
ValueSpan = localBuffer; | ||
ValueIsEscaped = false; | ||
_hasPartialStringValue = true; | ||
return false; | ||
} | ||
} | ||
|
||
internal bool ContinueConsumeString() | ||
{ | ||
return ConsumeString(ValueSpan.Length); | ||
} | ||
|
||
// Found a backslash or control characters which are considered invalid within a string. | ||
// Search through the rest of the string one byte at a time. | ||
// https://tools.ietf.org/html/rfc8259#section-7 | ||
|
@@ -1367,7 +1379,8 @@ private bool ConsumeStringAndValidate(ReadOnlySpan<byte> data, int idx) | |
else | ||
{ | ||
// We found less than 4 hex digits. Check if there is more data to follow, otherwise throw. | ||
idx = data.Length; | ||
idx += 5; | ||
Debug.Assert(idx > data.Length); | ||
break; | ||
} | ||
|
||
|
@@ -1390,6 +1403,9 @@ private bool ConsumeStringAndValidate(ReadOnlySpan<byte> data, int idx) | |
} | ||
_lineNumber = prevLineNumber; | ||
_bytePositionInLine = prevLineBytePosition; | ||
ValueSpan = idx > data.Length ? data.Slice(0, idx - 6) : data; | ||
ValueIsEscaped = true; | ||
_hasPartialStringValue = true; | ||
return false; | ||
} | ||
|
||
|
@@ -1399,6 +1415,7 @@ private bool ConsumeStringAndValidate(ReadOnlySpan<byte> data, int idx) | |
ValueIsEscaped = true; | ||
_tokenType = JsonTokenType.String; | ||
_consumed += idx + 2; | ||
_hasPartialStringValue = false; | ||
return true; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,23 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
|
||
using System.Buffers; | ||
using System.Buffers.Text; | ||
using System.Collections.Generic; | ||
using System.Diagnostics; | ||
using System.Runtime.CompilerServices; | ||
using System.Text.Json.Schema; | ||
|
||
namespace System.Text.Json.Serialization.Converters | ||
{ | ||
internal sealed class ByteArrayConverter : JsonConverter<byte[]?> | ||
internal sealed class ByteArrayConverter : JsonHybridResumableConverter<byte[]?> | ||
{ | ||
public override byte[]? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) | ||
{ | ||
if (reader.TokenType == JsonTokenType.Null) | ||
{ | ||
return null; | ||
} | ||
|
||
return reader.GetBytesFromBase64(); | ||
} | ||
|
||
|
@@ -29,6 +33,176 @@ public override void Write(Utf8JsonWriter writer, byte[]? value, JsonSerializerO | |
} | ||
} | ||
|
||
internal override bool OnTryRead(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options, scoped ref ReadStack state, out byte[]? value) | ||
{ | ||
if (state.Current.ObjectState < StackFrameObjectState.CreatedObject) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Like with existing resumable converters, there should be a fast path that runs when There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is handled in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
{ | ||
// This is the first segment so it can't be the only/last segment since we are on the slow read path. | ||
Debug.Assert(reader._hasPartialStringValue); | ||
|
||
state.Current.ObjectState = StackFrameObjectState.ReadElements; | ||
|
||
ReadSegment(ref reader, ref state); | ||
|
||
value = null; | ||
return false; | ||
} | ||
|
||
Debug.Assert(state.Current.ObjectState == StackFrameObjectState.ReadElements); | ||
|
||
bool consumedEntireString = reader.ContinueConsumeString(); | ||
if (!consumedEntireString && reader.IsFinalBlock) | ||
{ | ||
// TODO | ||
throw new Exception(); | ||
} | ||
|
||
ReadSegment(ref reader, ref state); | ||
|
||
if (consumedEntireString) | ||
{ | ||
value = GetStringFromChunks(reader, state); | ||
return true; | ||
} | ||
|
||
value = null; | ||
return false; | ||
|
||
static byte[] GetStringFromChunks(Utf8JsonReader reader, ReadStack state) | ||
{ | ||
Debug.Assert(reader.TokenType == JsonTokenType.String); | ||
|
||
List<ArraySegment<byte>>? chunks = (List<ArraySegment<byte>>?)state.Current.ReturnValue!; | ||
if (chunks == null) | ||
{ | ||
// Nothing escaped, so just use the raw value. | ||
return reader.ValueSpan.ToArray(); | ||
} | ||
|
||
int totalSize = 0; | ||
foreach (ArraySegment<byte> c in chunks) | ||
{ | ||
totalSize += c.Count; | ||
} | ||
|
||
byte[] ret = new byte[totalSize]; | ||
int idx = 0; | ||
foreach (ArraySegment<byte> c in chunks) | ||
{ | ||
c.AsSpan().CopyTo(ret.AsSpan(idx, c.Count)); | ||
idx += c.Count; | ||
ArrayPool<byte>.Shared.Return(c.Array!); | ||
} | ||
|
||
return ret; | ||
} | ||
} | ||
|
||
private static void ReadSegment(ref Utf8JsonReader reader, scoped ref ReadStack state) | ||
{ | ||
ReadOnlySpan<byte> newSegment = reader.ValueSpan.Slice(state.Current.PropertyIndex); | ||
|
||
if (reader.ValueIsEscaped) | ||
{ | ||
int idx = newSegment.IndexOf(JsonConstants.BackSlash); | ||
if (idx >= 0) | ||
{ | ||
ReadSegmentEscaped(ref reader, ref state, newSegment, idx); | ||
return; | ||
} | ||
} | ||
|
||
state.Current.PropertyIndex = reader.ValueSpan.Length; | ||
} | ||
|
||
private static void ReadSegmentEscaped(ref Utf8JsonReader reader, scoped ref ReadStack state, ReadOnlySpan<byte> newSegment, int indexOfFirstCharToEscape) | ||
{ | ||
List<ArraySegment<byte>>? chunks = (List<ArraySegment<byte>>?)state.Current.ReturnValue; | ||
if (chunks == null) | ||
{ | ||
// First time we are encountering an escaped character. | ||
chunks = new List<ArraySegment<byte>>(); | ||
state.Current.ReturnValue = chunks; | ||
|
||
// The chunk must include all the segments skipped so far. | ||
indexOfFirstCharToEscape += state.Current.PropertyIndex; | ||
newSegment = reader.ValueSpan; | ||
} | ||
|
||
byte[] unescaped = ArrayPool<byte>.Shared.Rent(newSegment.Length); | ||
JsonReaderHelper.Unescape(newSegment, unescaped, indexOfFirstCharToEscape, out int written); | ||
state.Current.PropertyIndex = reader.ValueSpan.Length; | ||
|
||
chunks.Add(new ArraySegment<byte>(unescaped, 0, written)); | ||
} | ||
|
||
internal override bool WriteWithoutStackFrame(Utf8JsonWriter writer, byte[]? value, JsonSerializerOptions options, ref WriteStack state) | ||
{ | ||
if (value == null) | ||
{ | ||
writer.WriteNullValue(); | ||
return true; | ||
} | ||
else if (state.FlushThreshold == 0 || value.Length < state.FlushThreshold) | ||
{ | ||
// Fast write for small strings. Note that previous unflushed data may still be in the | ||
// writer but we can let the enclosing container handle the flushing in this case. | ||
writer.WriteBase64StringValue(value); | ||
return true; | ||
} | ||
|
||
return WriteWithStackFrame(writer, value, options, ref state); | ||
} | ||
|
||
internal override bool OnTryWrite(Utf8JsonWriter writer, byte[]? value, JsonSerializerOptions options, ref WriteStack state) | ||
{ | ||
if (!state.Current.ProcessedStartToken) | ||
{ | ||
state.Current.ProcessedStartToken = true; | ||
if (ShouldFlush(ref state, writer)) | ||
{ | ||
return false; | ||
} | ||
} | ||
|
||
Debug.Assert(value != null); | ||
bool isFinal = GetNextWriteSegment(value, ref state, out int writeIndex, out int writeLength); | ||
writer.WriteBase64StringSegment(value.AsSpan(writeIndex, writeLength), isFinal); | ||
state.Current.EnumeratorIndex += writeLength; | ||
|
||
// We either wrote the entire input or hit the flush threshold. | ||
Debug.Assert(ShouldFlush(ref state, writer) || isFinal); | ||
|
||
return isFinal; | ||
} | ||
|
||
[MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
private static bool GetNextWriteSegment(byte[] value, ref WriteStack state, out int start, out int length) | ||
{ | ||
Debug.Assert(state.Current.EnumeratorIndex >= 0); | ||
Debug.Assert(state.Current.EnumeratorIndex < value.Length); | ||
|
||
int writeIndex = state.Current.EnumeratorIndex; | ||
|
||
// Write enough to guarantee a flush. Base64 encoding expands the data by 4/3, so we can write less and still hit the threshold, | ||
// but we don't need to be exact because the threshold is set very conservatively. | ||
int writeLength = state.FlushThreshold == 0 ? int.MaxValue : state.FlushThreshold + 1; | ||
|
||
// If the input isn't large enough to hit the flush threshold, write the entire input as the final segment. | ||
bool isFinal = false; | ||
int remainingInputBytes = value.Length - state.Current.EnumeratorIndex; | ||
if (remainingInputBytes <= writeLength) | ||
{ | ||
writeLength = remainingInputBytes; | ||
isFinal = true; | ||
} | ||
|
||
start = writeIndex; | ||
length = writeLength; | ||
|
||
return isFinal; | ||
} | ||
|
||
internal override JsonSchema? GetSchema(JsonNumberHandling _) => new() { Type = JsonSchemaType.String }; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Like with Utf8JsonWriter, couldn't this just use the same special
JsonTokenType
value to reflect segmented reads?