Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.
/ corefx Public archive

Commit

Permalink
Use Encoding.Preamble in StreamReader/Writer (#23321)
Browse files Browse the repository at this point in the history
  • Loading branch information
justinvp authored and stephentoub committed Aug 17, 2017
1 parent 171820d commit e0c1f20
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 35 deletions.
46 changes: 13 additions & 33 deletions src/System.Runtime.Extensions/src/System/IO/StreamReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,6 @@ public class StreamReader : TextReader
// StreamReader.Null is threadsafe.
public new static readonly StreamReader Null = new NullStreamReader();

// Encoding.GetPreamble() always allocates and returns a new byte[] array for
// encodings that have a preamble.
// We can avoid repeated allocations for the default and commonly used Encoding.UTF8
// encoding by using our own private cached instance of the UTF8 preamble.
// This is lazily allocated the first time it is used.
private static byte[] s_utf8Preamble;

// Using a 1K byte buffer and a 4K FileStream buffer works out pretty well
// perf-wise. On even a 40 MB text file, any perf loss by using a 4K
// buffer is negated by the win of allocating a smaller byte[], which
Expand All @@ -39,7 +32,6 @@ public class StreamReader : TextReader
private Decoder _decoder;
private byte[] _byteBuffer;
private char[] _charBuffer;
private byte[] _preamble; // Encoding's preamble, which identifies this encoding.
private int _charPos;
private int _charLen;
// Record the number of valid bytes in the byteBuffer, for a few checks.
Expand Down Expand Up @@ -206,21 +198,7 @@ private void Init(Stream stream, Encoding encoding, bool detectEncodingFromByteO
_byteLen = 0;
_bytePos = 0;
_detectEncoding = detectEncodingFromByteOrderMarks;

// Encoding.GetPreamble() always allocates and returns a new byte[] array for
// encodings that have a preamble.
// We can avoid repeated allocations for the default and commonly used Encoding.UTF8
// encoding by using our own private cached instance of the UTF8 preamble.
// We specifically look for Encoding.UTF8 because we know it has a preamble,
// whereas other instances of UTF8Encoding may not have a preamble enabled, and
// there's no public way to tell if the preamble is enabled for an instance other
// than calling GetPreamble(), which we're trying to avoid.
// This means that other instances of UTF8Encoding are excluded from this optimization.
_preamble = object.ReferenceEquals(encoding, Encoding.UTF8) ?
(s_utf8Preamble ?? (s_utf8Preamble = encoding.GetPreamble())) :
encoding.GetPreamble();

_checkPreamble = (_preamble.Length > 0);
_checkPreamble = encoding.Preamble.Length > 0;
_isBlocked = false;
_closable = !leaveOpen;
}
Expand Down Expand Up @@ -560,27 +538,29 @@ private bool IsPreamble()
return _checkPreamble;
}

Debug.Assert(_bytePos <= _preamble.Length, "_compressPreamble was called with the current bytePos greater than the preamble buffer length. Are two threads using this StreamReader at the same time?");
int len = (_byteLen >= (_preamble.Length)) ? (_preamble.Length - _bytePos) : (_byteLen - _bytePos);
ReadOnlySpan<byte> preamble = _encoding.Preamble;

Debug.Assert(_bytePos <= preamble.Length, "_compressPreamble was called with the current bytePos greater than the preamble buffer length. Are two threads using this StreamReader at the same time?");
int len = (_byteLen >= (preamble.Length)) ? (preamble.Length - _bytePos) : (_byteLen - _bytePos);

for (int i = 0; i < len; i++, _bytePos++)
{
if (_byteBuffer[_bytePos] != _preamble[_bytePos])
if (_byteBuffer[_bytePos] != preamble[_bytePos])
{
_bytePos = 0;
_checkPreamble = false;
break;
}
}

Debug.Assert(_bytePos <= _preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?");
Debug.Assert(_bytePos <= preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?");

if (_checkPreamble)
{
if (_bytePos == _preamble.Length)
if (_bytePos == preamble.Length)
{
// We have a match
CompressBuffer(_preamble.Length);
CompressBuffer(preamble.Length);
_bytePos = 0;
_checkPreamble = false;
_detectEncoding = false;
Expand All @@ -604,7 +584,7 @@ internal virtual int ReadBuffer()
{
if (_checkPreamble)
{
Debug.Assert(_bytePos <= _preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?");
Debug.Assert(_bytePos <= _encoding.Preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?");
int len = _stream.Read(_byteBuffer, _bytePos, _byteBuffer.Length - _bytePos);
Debug.Assert(len >= 0, "Stream.Read returned a negative number! This is a bug in your stream class.");

Expand Down Expand Up @@ -701,7 +681,7 @@ private int ReadBuffer(char[] userBuffer, int userOffset, int desiredChars, out

if (_checkPreamble)
{
Debug.Assert(_bytePos <= _preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?");
Debug.Assert(_bytePos <= _encoding.Preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?");
int len = _stream.Read(_byteBuffer, _bytePos, _byteBuffer.Length - _bytePos);
Debug.Assert(len >= 0, "Stream.Read returned a negative number! This is a bug in your stream class.");

Expand Down Expand Up @@ -1058,7 +1038,7 @@ internal override async Task<int> ReadAsyncInternal(char[] buffer, int index, in

if (_checkPreamble)
{
Debug.Assert(_bytePos <= _preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?");
Debug.Assert(_bytePos <= _encoding.Preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?");
int tmpBytePos = _bytePos;
int len = await tmpStream.ReadAsync(tmpByteBuffer, tmpBytePos, tmpByteBuffer.Length - tmpBytePos).ConfigureAwait(false);
Debug.Assert(len >= 0, "Stream.Read returned a negative number! This is a bug in your stream class.");
Expand Down Expand Up @@ -1238,7 +1218,7 @@ private async Task<int> ReadBufferAsync()
{
if (_checkPreamble)
{
Debug.Assert(_bytePos <= _preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?");
Debug.Assert(_bytePos <= _encoding.Preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?");
int tmpBytePos = _bytePos;
int len = await tmpStream.ReadAsync(tmpByteBuffer, tmpBytePos, tmpByteBuffer.Length - tmpBytePos).ConfigureAwait(false);
Debug.Assert(len >= 0, "Stream.Read returned a negative number! This is a bug in your stream class.");
Expand Down
4 changes: 2 additions & 2 deletions src/System.Runtime.Extensions/src/System/IO/StreamWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -246,10 +246,10 @@ private void Flush(bool flushStream, bool flushEncoder)
if (!_haveWrittenPreamble)
{
_haveWrittenPreamble = true;
byte[] preamble = _encoding.GetPreamble();
ReadOnlySpan<byte> preamble = _encoding.Preamble;
if (preamble.Length > 0)
{
_stream.Write(preamble, 0, preamble.Length);
_stream.Write(preamble);
}
}

Expand Down

0 comments on commit e0c1f20

Please sign in to comment.