-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NRBF] Don't use Unsafe.As when decoding DateTime(s) #105749
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -3,14 +3,18 @@ | |||||||
|
||||||||
using System.Globalization; | ||||||||
using System.IO; | ||||||||
using System.Reflection; | ||||||||
using System.Reflection.Metadata; | ||||||||
using System.Runtime.CompilerServices; | ||||||||
using System.Runtime.Serialization; | ||||||||
using System.Threading; | ||||||||
|
||||||||
namespace System.Formats.Nrbf.Utils; | ||||||||
|
||||||||
internal static class BinaryReaderExtensions | ||||||||
{ | ||||||||
private static object? s_baseAmbiguousDstDateTime; | ||||||||
|
||||||||
internal static BinaryArrayType ReadArrayType(this BinaryReader reader) | ||||||||
{ | ||||||||
byte arrayType = reader.ReadByte(); | ||||||||
|
@@ -70,36 +74,70 @@ internal static object ReadPrimitiveValue(this BinaryReader reader, PrimitiveTyp | |||||||
PrimitiveType.Single => reader.ReadSingle(), | ||||||||
PrimitiveType.Double => reader.ReadDouble(), | ||||||||
PrimitiveType.Decimal => decimal.Parse(reader.ReadString(), CultureInfo.InvariantCulture), | ||||||||
PrimitiveType.DateTime => CreateDateTimeFromData(reader.ReadInt64()), | ||||||||
PrimitiveType.DateTime => CreateDateTimeFromData(reader.ReadUInt64()), | ||||||||
_ => new TimeSpan(reader.ReadInt64()), | ||||||||
}; | ||||||||
|
||||||||
// TODO: fix https://github.com/dotnet/runtime/issues/102826 | ||||||||
/// <summary> | ||||||||
/// Creates a <see cref="DateTime"/> object from raw data with validation. | ||||||||
/// </summary> | ||||||||
/// <exception cref="SerializationException"><paramref name="data"/> was invalid.</exception> | ||||||||
internal static DateTime CreateDateTimeFromData(long data) | ||||||||
/// <exception cref="SerializationException"><paramref name="dateData"/> was invalid.</exception> | ||||||||
internal static DateTime CreateDateTimeFromData(ulong dateData) | ||||||||
{ | ||||||||
// Copied from System.Runtime.Serialization.Formatters.Binary.BinaryParser | ||||||||
|
||||||||
// Use DateTime's public constructor to validate the input, but we | ||||||||
// can't return that result as it strips off the kind. To address | ||||||||
// that, store the value directly into a DateTime via an unsafe cast. | ||||||||
// See BinaryFormatterWriter.WriteDateTime for details. | ||||||||
ulong ticks = dateData & 0x3FFFFFFF_FFFFFFFFUL; | ||||||||
DateTimeKind kind = (DateTimeKind)(dateData >> 62); | ||||||||
|
||||||||
try | ||||||||
{ | ||||||||
const long TicksMask = 0x3FFFFFFFFFFFFFFF; | ||||||||
_ = new DateTime(data & TicksMask); | ||||||||
return ((uint)kind <= (uint)DateTimeKind.Local) ? new DateTime((long)ticks, kind) : CreateFromAmbiguousDst(ticks); | ||||||||
} | ||||||||
catch (ArgumentException ex) | ||||||||
{ | ||||||||
// Bad data | ||||||||
throw new SerializationException(ex.Message, ex); | ||||||||
} | ||||||||
|
||||||||
return Unsafe.As<long, DateTime>(ref data); | ||||||||
[MethodImpl(MethodImplOptions.NoInlining)] | ||||||||
static DateTime CreateFromAmbiguousDst(ulong ticks) | ||||||||
{ | ||||||||
// There's no public API to create a DateTime from an ambiguous DST, and we | ||||||||
// can't use private reflection to access undocumented .NET Framework APIs. | ||||||||
// However, the ISerializable pattern *is* a documented protocol, so we can | ||||||||
// use DateTime's serialization ctor to create a zero-tick "ambiguous" instance, | ||||||||
// then keep reusing it as the base to which we can add our tick offsets. | ||||||||
|
||||||||
if (s_baseAmbiguousDstDateTime is not DateTime baseDateTime) | ||||||||
{ | ||||||||
#pragma warning disable SYSLIB0050 // Type or member is obsolete | ||||||||
SerializationInfo si = new(typeof(DateTime), new FormatterConverter()); | ||||||||
// We don't know the value of "ticks", so we don't specify it. | ||||||||
// If the code somehow runs on a very old runtime that does not know the concept of "dateData" | ||||||||
// (it should not be possible as the library targets .NET Standard 2.0) | ||||||||
// the ctor is going to throw rather than silently return an invalid value. | ||||||||
si.AddValue("dateData", 0xC0000000_00000000UL); // new value (serialized as ulong) | ||||||||
|
||||||||
#if NET | ||||||||
baseDateTime = CallPrivateSerializationConstructor(si, new StreamingContext(StreamingContextStates.All)); | ||||||||
#else | ||||||||
ConstructorInfo ci = typeof(DateTime).GetConstructor( | ||||||||
BindingFlags.Instance | BindingFlags.NonPublic, | ||||||||
binder: null, | ||||||||
new Type[] { typeof(SerializationInfo), typeof(StreamingContext) }, | ||||||||
modifiers: null); | ||||||||
|
||||||||
baseDateTime = (DateTime)ci.Invoke(new object[] { si, new StreamingContext(StreamingContextStates.All) }); | ||||||||
#endif | ||||||||
|
||||||||
#pragma warning restore SYSLIB0050 // Type or member is obsolete | ||||||||
Volatile.Write(ref s_baseAmbiguousDstDateTime, baseDateTime); // it's ok if two threads race here | ||||||||
} | ||||||||
|
||||||||
return baseDateTime.AddTicks((long)ticks); | ||||||||
} | ||||||||
|
||||||||
#if NET | ||||||||
[UnsafeAccessor(UnsafeAccessorKind.Constructor)] | ||||||||
extern static DateTime CallPrivateSerializationConstructor(SerializationInfo si, StreamingContext ct); | ||||||||
#endif | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using a private constructor like this from a separate package is considered safe / supported? We have a bunch of types now that implement ISerializable but that either throw from their deserialization ctor or don't have one at all... DateTime will never be on the same plan? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This ctor is part of the tagging @GrabYourPitchforks who suggested this solution in #102826 (comment) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
We've already made some of them throw PlatformNotSupportedException, e.g. runtime/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs Lines 175 to 176 in 0f05719
and entirely removed them from others, e.g.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we have a public API that allows us to create the specific data time value? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes, but it would not solve the problem as this package needs to support older monikers, including There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think both Unsafe.As and reflection are ok for existing targets. The existing targets are set in stone and we can make assumptions about them. Unsafe.As or reflection are less than ideal for future. They limit the changes we can do in future. |
||||||||
} | ||||||||
|
||||||||
internal static bool? IsDataAvailable(this BinaryReader reader, long requiredBytes) | ||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This assumes that the BF format is set is stone and that new key/value pairs won't be added in future. Is it safe assumption to make?
If somebody constructs malicious payload with extra TimeSpan, DateTime or Guid fields or with fields of unexpected type, this pattern match won't kick in, there won't be any exception thrown and we return the raw data. Is it the desired behavior for Nrbf reader? (As far as I can tell, the reader tends to throw on anything unexpected or invalid instead of accepting it silently.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These are great questions.
BinaryFormatter
can represent same primitive value using different record types based on the context.In this case, when
DateTime
is the root object it is expressed asSystemClassWithMembersAndTypesRecord
which is just a type name + key/value dictionary. In other cases, it can be represented asMemberPrimitiveTypedRecord<T>
(or a raw 8 bytes).My goal was to hide this from the end users and always map it to
PrimitiveTypeRecord<T>
so users don't need to become experts in this area.If we ever extend the binary representation of given types, we may need to handle the versioning here.
It's allowed to create a type that is called
System.DateTime
and has different layout, in such cases we are going to return aClassRecord
and the users will need to handle it.@jkotas this is just the way I see it, please let me know if something is not clear or some other changes are needed
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You are not always mapping it to
PrimitiveTypeRecord<T>
.You are only mapping it to
PrimitiveTypeRecord<T>
if the input has specific shape. You are not mapping it for all possible valid input shapes. For example, if the payload was produced by .NET Framework 1.x (I am sure there are a bunch of such payloads still alive in the wild), it will be missingdateData
field and it is not going to be mapped. However, the classic BF deserializer is going to handle it just fine. If somebody runs into this case, they will have to do double the work: They will need to handle both mapped and the non-mapped cases.In general, I would expect the behavior to be either: