From 6c0ec8df5cbad271173024464d34d38d0a587920 Mon Sep 17 00:00:00 2001
From: Neal Beeken <neal.beeken@mongodb.com>
Date: Mon, 29 Jan 2024 13:57:22 -0500
Subject: [PATCH 1/9] feat(NODE-5861): optimize parsing basic latin strings

---
 src/utils/node_byte_utils.ts | 27 +++++++++++++++++++++++++++
 src/utils/web_byte_utils.ts  | 27 +++++++++++++++++++++++++++
 test/node/byte_utils.test.ts | 27 +++++++++++++++++++++++++--
 3 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/src/utils/node_byte_utils.ts b/src/utils/node_byte_utils.ts
index 214b1e39..1621811b 100644
--- a/src/utils/node_byte_utils.ts
+++ b/src/utils/node_byte_utils.ts
@@ -126,6 +126,33 @@ export const nodeJsByteUtils = {
   },
 
   toUTF8(buffer: Uint8Array, start: number, end: number): string {
+    if (buffer.length === 0) {
+      return '';
+    }
+
+    const stringByteLength = end - start;
+    if (stringByteLength === 0) {
+      return '';
+    }
+
+    if (stringByteLength < 200) {
+      let basicLatin = true;
+      const latinBytes = [];
+      for (let i = start; i < end; i++) {
+        const byte = buffer[i];
+        if (byte > 127) {
+          basicLatin = false;
+          break;
+        }
+        latinBytes.push(byte);
+      }
+
+      if (basicLatin) {
+        // eslint-disable-next-line prefer-spread
+        return String.fromCharCode.apply(String, latinBytes);
+      }
+    }
+
     return nodeJsByteUtils.toLocalBufferType(buffer).toString('utf8', start, end);
   },
 
diff --git a/src/utils/web_byte_utils.ts b/src/utils/web_byte_utils.ts
index cf93e43a..28dda05e 100644
--- a/src/utils/web_byte_utils.ts
+++ b/src/utils/web_byte_utils.ts
@@ -173,6 +173,33 @@ export const webByteUtils = {
   },
 
   toUTF8(uint8array: Uint8Array, start: number, end: number): string {
+    if (uint8array.length === 0) {
+      return '';
+    }
+
+    const stringByteLength = end - start;
+    if (stringByteLength === 0) {
+      return '';
+    }
+
+    if (stringByteLength < 200) {
+      let basicLatin = true;
+      const latinBytes = [];
+      for (let i = start; i < end; i++) {
+        const byte = uint8array[i];
+        if (byte > 127) {
+          basicLatin = false;
+          break;
+        }
+        latinBytes.push(byte);
+      }
+
+      if (basicLatin) {
+        // eslint-disable-next-line prefer-spread
+        return String.fromCharCode.apply(String, latinBytes);
+      }
+    }
+
     return new TextDecoder('utf8', { fatal: false }).decode(uint8array.slice(start, end));
   },
 
diff --git a/test/node/byte_utils.test.ts b/test/node/byte_utils.test.ts
index 81fba0c4..f2cac490 100644
--- a/test/node/byte_utils.test.ts
+++ b/test/node/byte_utils.test.ts
@@ -400,7 +400,7 @@ const fromUTF8Tests: ByteUtilTest<'fromUTF8'>[] = [
 const toUTF8Tests: ByteUtilTest<'toUTF8'>[] = [
   {
     name: 'should create utf8 string from buffer input',
-    inputs: [Buffer.from('abc\u{1f913}', 'utf8')],
+    inputs: [Buffer.from('abc\u{1f913}', 'utf8'), 0, 7],
     expectation({ output, error }) {
       expect(error).to.be.null;
       expect(output).to.deep.equal(Buffer.from('abc\u{1f913}', 'utf8').toString('utf8'));
@@ -408,7 +408,7 @@ const toUTF8Tests: ByteUtilTest<'toUTF8'>[] = [
   },
   {
     name: 'should return empty string for empty buffer input',
-    inputs: [Buffer.alloc(0)],
+    inputs: [Buffer.alloc(0), 0, 1],
     expectation({ output, error }) {
       expect(error).to.be.null;
       expect(output).to.be.a('string').with.lengthOf(0);
@@ -596,6 +596,29 @@ describe('ByteUtils', () => {
     });
   });
 
+  describe('toUTF8 basic latin optimization', () => {
+    afterEach(() => {
+      sinon.restore();
+    });
+
+    context('Given a basic latin string', () => {
+      it('should not invoke Buffer.toString', () => {
+        const buffer = Buffer.from('abcdef', 'utf8');
+        const spy = sinon.spy(buffer, 'toString');
+        nodeJsByteUtils.toUTF8(buffer, 0, 6);
+        expect(spy).to.not.have.been.called;
+      });
+
+      it('should not invoke TextDecoder.decode', () => {
+        const utf8Bytes = Buffer.from('abcdef', 'utf8');
+        const buffer = new Uint8Array(utf8Bytes.buffer, utf8Bytes.byteOffset, utf8Bytes.byteLength);
+        const spy = sinon.spy(TextDecoder.prototype, 'decode');
+        webByteUtils.toUTF8(buffer, 0, 6);
+        expect(spy).to.not.have.been.called;
+      });
+    });
+  });
+
   describe('randomBytes fallback case when crypto is not present', () => {
     describe('web', function () {
       let bsonWithNoCryptoCtx;

From e353a30c2e5b5459c74b5e56b1c15b1519c143d1 Mon Sep 17 00:00:00 2001
From: Neal Beeken <neal.beeken@mongodb.com>
Date: Mon, 29 Jan 2024 14:49:45 -0500
Subject: [PATCH 2/9] feat(NODE-5861): move UTF8 validation below basic latin
 parsing

---
 rollup.config.mjs            |  2 +-
 src/binary.ts                |  4 ++--
 src/error.ts                 |  6 +++---
 src/parser/deserializer.ts   | 41 +++++++++---------------------------
 src/utils/byte_utils.ts      |  4 ++--
 src/utils/node_byte_utils.ts | 17 +++++++++++++--
 src/utils/web_byte_utils.ts  | 11 ++++++++--
 test/node/byte_utils.test.ts | 24 +++++++++++++++++----
 tsconfig.json                |  1 +
 9 files changed, 63 insertions(+), 47 deletions(-)

diff --git a/rollup.config.mjs b/rollup.config.mjs
index b44e4cd7..17c402d8 100644
--- a/rollup.config.mjs
+++ b/rollup.config.mjs
@@ -13,7 +13,7 @@ const tsConfig = {
   module: 'esnext',
   moduleResolution: 'node',
   removeComments: true,
-  lib: ['es2021'],
+  lib: ['es2021', 'ES2022.Error'],
   importHelpers: false,
   noEmitHelpers: false,
   noEmitOnError: true,
diff --git a/src/binary.ts b/src/binary.ts
index b08067af..6c4d54fc 100644
--- a/src/binary.ts
+++ b/src/binary.ts
@@ -191,8 +191,8 @@ export class Binary extends BSONValue {
     if (encoding === 'hex') return ByteUtils.toHex(this.buffer);
     if (encoding === 'base64') return ByteUtils.toBase64(this.buffer);
     if (encoding === 'utf8' || encoding === 'utf-8')
-      return ByteUtils.toUTF8(this.buffer, 0, this.buffer.byteLength);
-    return ByteUtils.toUTF8(this.buffer, 0, this.buffer.byteLength);
+      return ByteUtils.toUTF8(this.buffer, 0, this.buffer.byteLength, false);
+    return ByteUtils.toUTF8(this.buffer, 0, this.buffer.byteLength, false);
   }
 
   /** @internal */
diff --git a/src/error.ts b/src/error.ts
index 63e63517..5ae2495c 100644
--- a/src/error.ts
+++ b/src/error.ts
@@ -4,7 +4,7 @@ import { BSON_MAJOR_VERSION } from './constants';
  * @public
  * @category Error
  *
- * `BSONError` objects are thrown when BSON ecounters an error.
+ * `BSONError` objects are thrown when BSON encounters an error.
  *
  * This is the parent class for all the other errors thrown by this library.
  */
@@ -23,8 +23,8 @@ export class BSONError extends Error {
     return 'BSONError';
   }
 
-  constructor(message: string) {
-    super(message);
+  constructor(message: string, options?: ErrorOptions) {
+    super(message, options);
   }
 
   /**
diff --git a/src/parser/deserializer.ts b/src/parser/deserializer.ts
index abb61046..28183387 100644
--- a/src/parser/deserializer.ts
+++ b/src/parser/deserializer.ts
@@ -236,7 +236,7 @@ function deserializeObject(
     if (i >= buffer.byteLength) throw new BSONError('Bad BSON Document: illegal CString');
 
     // Represents the key
-    const name = isArray ? arrayIndex++ : ByteUtils.toUTF8(buffer, index, i);
+    const name = isArray ? arrayIndex++ : ByteUtils.toUTF8(buffer, index, i, false);
 
     // shouldValidateKey is true if the key should be validated, false otherwise
     let shouldValidateKey = true;
@@ -266,7 +266,7 @@ function deserializeObject(
       ) {
         throw new BSONError('bad string length in bson');
       }
-      value = getValidatedString(buffer, index, index + stringSize - 1, shouldValidateKey);
+      value = ByteUtils.toUTF8(buffer, index, index + stringSize - 1, shouldValidateKey);
       index = index + stringSize;
     } else if (elementType === constants.BSON_DATA_OID) {
       const oid = ByteUtils.allocate(12);
@@ -476,7 +476,7 @@ function deserializeObject(
       // If are at the end of the buffer there is a problem with the document
       if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
       // Return the C string
-      const source = ByteUtils.toUTF8(buffer, index, i);
+      const source = ByteUtils.toUTF8(buffer, index, i, false);
       // Create the regexp
       index = i + 1;
 
@@ -489,7 +489,7 @@ function deserializeObject(
       // If are at the end of the buffer there is a problem with the document
       if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
       // Return the C string
-      const regExpOptions = ByteUtils.toUTF8(buffer, index, i);
+      const regExpOptions = ByteUtils.toUTF8(buffer, index, i, false);
       index = i + 1;
 
       // For each option add the corresponding one for javascript
@@ -521,7 +521,7 @@ function deserializeObject(
       // If are at the end of the buffer there is a problem with the document
       if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
       // Return the C string
-      const source = ByteUtils.toUTF8(buffer, index, i);
+      const source = ByteUtils.toUTF8(buffer, index, i, false);
       index = i + 1;
 
       // Get the start search index
@@ -533,7 +533,7 @@ function deserializeObject(
       // If are at the end of the buffer there is a problem with the document
       if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
       // Return the C string
-      const regExpOptions = ByteUtils.toUTF8(buffer, index, i);
+      const regExpOptions = ByteUtils.toUTF8(buffer, index, i, false);
       index = i + 1;
 
       // Set the object
@@ -551,7 +551,7 @@ function deserializeObject(
       ) {
         throw new BSONError('bad string length in bson');
       }
-      const symbol = getValidatedString(buffer, index, index + stringSize - 1, shouldValidateKey);
+      const symbol = ByteUtils.toUTF8(buffer, index, index + stringSize - 1, shouldValidateKey);
       value = promoteValues ? symbol : new BSONSymbol(symbol);
       index = index + stringSize;
     } else if (elementType === constants.BSON_DATA_TIMESTAMP) {
@@ -587,7 +587,7 @@ function deserializeObject(
       ) {
         throw new BSONError('bad string length in bson');
       }
-      const functionString = getValidatedString(
+      const functionString = ByteUtils.toUTF8(
         buffer,
         index,
         index + stringSize - 1,
@@ -626,7 +626,7 @@ function deserializeObject(
       }
 
       // Javascript function
-      const functionString = getValidatedString(
+      const functionString = ByteUtils.toUTF8(
         buffer,
         index,
         index + stringSize - 1,
@@ -678,7 +678,7 @@ function deserializeObject(
           throw new BSONError('Invalid UTF-8 string in BSON document');
         }
       }
-      const namespace = ByteUtils.toUTF8(buffer, index, index + stringSize - 1);
+      const namespace = ByteUtils.toUTF8(buffer, index, index + stringSize - 1, false);
       // Update parse index position
       index = index + stringSize;
 
@@ -728,24 +728,3 @@ function deserializeObject(
 
   return object;
 }
-
-function getValidatedString(
-  buffer: Uint8Array,
-  start: number,
-  end: number,
-  shouldValidateUtf8: boolean
-) {
-  const value = ByteUtils.toUTF8(buffer, start, end);
-  // if utf8 validation is on, do the check
-  if (shouldValidateUtf8) {
-    for (let i = 0; i < value.length; i++) {
-      if (value.charCodeAt(i) === 0xfffd) {
-        if (!validateUtf8(buffer, start, end)) {
-          throw new BSONError('Invalid UTF-8 string in BSON document');
-        }
-        break;
-      }
-    }
-  }
-  return value;
-}
diff --git a/src/utils/byte_utils.ts b/src/utils/byte_utils.ts
index 5f8d05ef..d611c906 100644
--- a/src/utils/byte_utils.ts
+++ b/src/utils/byte_utils.ts
@@ -25,8 +25,8 @@ export type ByteUtils = {
   toHex: (buffer: Uint8Array) => string;
   /** Create a Uint8Array containing utf8 code units from a string */
   fromUTF8: (text: string) => Uint8Array;
-  /** Create a string from utf8 code units */
-  toUTF8: (buffer: Uint8Array, start: number, end: number) => string;
+  /** Create a string from utf8 code units, fatal=true will throw an error if UTF-8 bytes are invalid, fatal=false will insert replacement characters */
+  toUTF8: (buffer: Uint8Array, start: number, end: number, fatal: boolean) => string;
   /** Get the utf8 code unit count from a string if it were to be transformed to utf8 */
   utf8ByteLength: (input: string) => number;
   /** Encode UTF8 bytes generated from `source` string into `destination` at byteOffset. Returns the number of bytes encoded. */
diff --git a/src/utils/node_byte_utils.ts b/src/utils/node_byte_utils.ts
index 1621811b..961be476 100644
--- a/src/utils/node_byte_utils.ts
+++ b/src/utils/node_byte_utils.ts
@@ -1,4 +1,5 @@
 import { BSONError } from '../error';
+import { validateUtf8 } from '../validate_utf8';
 
 type NodeJsEncoding = 'base64' | 'hex' | 'utf8' | 'binary';
 type NodeJsBuffer = ArrayBufferView &
@@ -125,7 +126,7 @@ export const nodeJsByteUtils = {
     return Buffer.from(text, 'utf8');
   },
 
-  toUTF8(buffer: Uint8Array, start: number, end: number): string {
+  toUTF8(buffer: Uint8Array, start: number, end: number, fatal: boolean): string {
     if (buffer.length === 0) {
       return '';
     }
@@ -153,7 +154,19 @@ export const nodeJsByteUtils = {
       }
     }
 
-    return nodeJsByteUtils.toLocalBufferType(buffer).toString('utf8', start, end);
+    const string = nodeJsByteUtils.toLocalBufferType(buffer).toString('utf8', start, end);
+    if (fatal) {
+      // TODO(NODE-4930): Insufficiently strict BSON UTF8 validation
+      for (let i = 0; i < string.length; i++) {
+        if (string.charCodeAt(i) === 0xfffd) {
+          if (!validateUtf8(buffer, start, end)) {
+            throw new BSONError('Invalid UTF-8 string in BSON document');
+          }
+          break;
+        }
+      }
+    }
+    return string;
   },
 
   utf8ByteLength(input: string): number {
diff --git a/src/utils/web_byte_utils.ts b/src/utils/web_byte_utils.ts
index 28dda05e..5dee3732 100644
--- a/src/utils/web_byte_utils.ts
+++ b/src/utils/web_byte_utils.ts
@@ -172,7 +172,7 @@ export const webByteUtils = {
     return new TextEncoder().encode(text);
   },
 
-  toUTF8(uint8array: Uint8Array, start: number, end: number): string {
+  toUTF8(uint8array: Uint8Array, start: number, end: number, fatal: boolean): string {
     if (uint8array.length === 0) {
       return '';
     }
@@ -200,7 +200,14 @@ export const webByteUtils = {
       }
     }
 
-    return new TextDecoder('utf8', { fatal: false }).decode(uint8array.slice(start, end));
+    if (fatal) {
+      try {
+        return new TextDecoder('utf8', { fatal }).decode(uint8array.slice(start, end));
+      } catch (cause) {
+        throw new BSONError('Invalid UTF-8 string in BSON document', { cause });
+      }
+    }
+    return new TextDecoder('utf8', { fatal }).decode(uint8array.slice(start, end));
   },
 
   utf8ByteLength(input: string): number {
diff --git a/test/node/byte_utils.test.ts b/test/node/byte_utils.test.ts
index f2cac490..49156739 100644
--- a/test/node/byte_utils.test.ts
+++ b/test/node/byte_utils.test.ts
@@ -8,6 +8,7 @@ import { webByteUtils } from '../../src/utils/web_byte_utils';
 import * as sinon from 'sinon';
 import { loadCJSModuleBSON, loadReactNativeCJSModuleBSON, loadESModuleBSON } from '../load_bson';
 import * as crypto from 'node:crypto';
+import { BSONError } from '../register-bson';
 
 type ByteUtilTest<K extends keyof ByteUtils> = {
   name: string;
@@ -400,7 +401,7 @@ const fromUTF8Tests: ByteUtilTest<'fromUTF8'>[] = [
 const toUTF8Tests: ByteUtilTest<'toUTF8'>[] = [
   {
     name: 'should create utf8 string from buffer input',
-    inputs: [Buffer.from('abc\u{1f913}', 'utf8'), 0, 7],
+    inputs: [Buffer.from('abc\u{1f913}', 'utf8'), 0, 7, false],
     expectation({ output, error }) {
       expect(error).to.be.null;
       expect(output).to.deep.equal(Buffer.from('abc\u{1f913}', 'utf8').toString('utf8'));
@@ -408,11 +409,26 @@ const toUTF8Tests: ByteUtilTest<'toUTF8'>[] = [
   },
   {
     name: 'should return empty string for empty buffer input',
-    inputs: [Buffer.alloc(0), 0, 1],
+    inputs: [Buffer.alloc(0), 0, 1, false],
     expectation({ output, error }) {
       expect(error).to.be.null;
       expect(output).to.be.a('string').with.lengthOf(0);
     }
+  },
+  {
+    name: 'should throw an error if fatal is set and string is invalid',
+    inputs: [Buffer.from('616263f09fa4', 'hex'), 0, 7, true],
+    expectation({ error }) {
+      expect(error).to.be.instanceOf(BSONError);
+    }
+  },
+  {
+    name: 'should insert replacement character fatal is false and string is invalid',
+    inputs: [Buffer.from('616263f09fa4', 'hex'), 0, 7, false],
+    expectation({ error, output }) {
+      expect(error).to.not.exist;
+      expect(output).to.equal('abc\uFFFD');
+    }
   }
 ];
 const utf8ByteLengthTests: ByteUtilTest<'utf8ByteLength'>[] = [
@@ -605,7 +621,7 @@ describe('ByteUtils', () => {
       it('should not invoke Buffer.toString', () => {
         const buffer = Buffer.from('abcdef', 'utf8');
         const spy = sinon.spy(buffer, 'toString');
-        nodeJsByteUtils.toUTF8(buffer, 0, 6);
+        nodeJsByteUtils.toUTF8(buffer, 0, 6, false);
         expect(spy).to.not.have.been.called;
       });
 
@@ -613,7 +629,7 @@ describe('ByteUtils', () => {
         const utf8Bytes = Buffer.from('abcdef', 'utf8');
         const buffer = new Uint8Array(utf8Bytes.buffer, utf8Bytes.byteOffset, utf8Bytes.byteLength);
         const spy = sinon.spy(TextDecoder.prototype, 'decode');
-        webByteUtils.toUTF8(buffer, 0, 6);
+        webByteUtils.toUTF8(buffer, 0, 6, false);
         expect(spy).to.not.have.been.called;
       });
     });
diff --git a/tsconfig.json b/tsconfig.json
index b439bf5b..d11664fe 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -10,6 +10,7 @@
     "skipLibCheck": true,
     "lib": [
       "es2021",
+      "ES2022.Error"
     ],
     "outDir": "lib",
     "importHelpers": false,

From 7e3bcd0c02c902b4fc66233b785519ecd3390a18 Mon Sep 17 00:00:00 2001
From: Neal Beeken <neal.beeken@mongodb.com>
Date: Mon, 29 Jan 2024 14:59:38 -0500
Subject: [PATCH 3/9] fix: error ctor and assertion for error

---
 src/error.ts                 | 2 +-
 test/node/byte_utils.test.ts | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/error.ts b/src/error.ts
index 5ae2495c..3f2711e9 100644
--- a/src/error.ts
+++ b/src/error.ts
@@ -23,7 +23,7 @@ export class BSONError extends Error {
     return 'BSONError';
   }
 
-  constructor(message: string, options?: ErrorOptions) {
+  constructor(message: string, options?: { cause?: unknown }) {
     super(message, options);
   }
 
diff --git a/test/node/byte_utils.test.ts b/test/node/byte_utils.test.ts
index 49156739..31ba4242 100644
--- a/test/node/byte_utils.test.ts
+++ b/test/node/byte_utils.test.ts
@@ -419,7 +419,7 @@ const toUTF8Tests: ByteUtilTest<'toUTF8'>[] = [
     name: 'should throw an error if fatal is set and string is invalid',
     inputs: [Buffer.from('616263f09fa4', 'hex'), 0, 7, true],
     expectation({ error }) {
-      expect(error).to.be.instanceOf(BSONError);
+      expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
     }
   },
   {

From d956aeaafbbaca224a012b9151b28e037d145843 Mon Sep 17 00:00:00 2001
From: Neal Beeken <neal.beeken@mongodb.com>
Date: Mon, 29 Jan 2024 15:01:27 -0500
Subject: [PATCH 4/9] fix: lint

---
 test/node/byte_utils.test.ts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/node/byte_utils.test.ts b/test/node/byte_utils.test.ts
index 31ba4242..9526329a 100644
--- a/test/node/byte_utils.test.ts
+++ b/test/node/byte_utils.test.ts
@@ -8,7 +8,6 @@ import { webByteUtils } from '../../src/utils/web_byte_utils';
 import * as sinon from 'sinon';
 import { loadCJSModuleBSON, loadReactNativeCJSModuleBSON, loadESModuleBSON } from '../load_bson';
 import * as crypto from 'node:crypto';
-import { BSONError } from '../register-bson';
 
 type ByteUtilTest<K extends keyof ByteUtils> = {
   name: string;

From 9ef267c1ba1ba619c1d6bff710d8134d1dcffd86 Mon Sep 17 00:00:00 2001
From: Neal Beeken <neal.beeken@mongodb.com>
Date: Tue, 30 Jan 2024 16:18:16 -0500
Subject: [PATCH 5/9] feat: lower stringByteLength cap to 20

---
 src/utils/latin.ts           | 66 ++++++++++++++++++++++++++++++++++++
 src/utils/node_byte_utils.ts | 29 +++-------------
 src/utils/web_byte_utils.ts  | 29 +++-------------
 3 files changed, 74 insertions(+), 50 deletions(-)
 create mode 100644 src/utils/latin.ts

diff --git a/src/utils/latin.ts b/src/utils/latin.ts
new file mode 100644
index 00000000..3dcf8045
--- /dev/null
+++ b/src/utils/latin.ts
@@ -0,0 +1,66 @@
+/**
+ * This function is an optimization for small basic latin strings.
+ *
+ * @remarks
+ * ### Important characteristics:
+ * - If the uint8array or distance between start and end is 0 this function returns an empty string
+ * - If the byteLength of the string is 1, 2, or 3 we invoke String.fromCharCode and manually offset into the buffer
+ * - If the byteLength of the string is less than or equal to 20 an array of bytes is built and `String.fromCharCode.apply` is called with the result
+ * - If any byte exceeds 128 this function returns null
+ *
+ * @param uint8array - A sequence of bytes that may contain basic latin characters
+ * @param start - The start index from which to search the uint8array
+ * @param end - The index to stop searching the uint8array
+ * @returns string if all bytes are within the basic latin range, otherwise null
+ */
+export function tryLatin(uint8array: Uint8Array, start: number, end: number): string | null {
+  if (uint8array.length === 0) {
+    return '';
+  }
+
+  const stringByteLength = end - start;
+  if (stringByteLength === 0) {
+    return '';
+  }
+
+  if (stringByteLength === 1 && uint8array[start] < 128) {
+    return String.fromCharCode(uint8array[start]);
+  }
+
+  if (stringByteLength === 2 && uint8array[start] < 128 && uint8array[start + 1] < 128) {
+    return String.fromCharCode(uint8array[start]) + String.fromCharCode(uint8array[start + 1]);
+  }
+
+  if (
+    stringByteLength === 3 &&
+    uint8array[start] < 128 &&
+    uint8array[start + 1] < 128 &&
+    uint8array[start + 2] < 128
+  ) {
+    return (
+      String.fromCharCode(uint8array[start]) +
+      String.fromCharCode(uint8array[start + 1]) +
+      String.fromCharCode(uint8array[start + 2])
+    );
+  }
+
+  if (stringByteLength <= 20) {
+    let basicLatin = true;
+    const latinBytes = [];
+    for (let i = start; i < end; i++) {
+      const byte = uint8array[i];
+      if (byte > 127) {
+        basicLatin = false;
+        break;
+      }
+      latinBytes.push(byte);
+    }
+
+    if (basicLatin) {
+      // eslint-disable-next-line prefer-spread
+      return String.fromCharCode.apply(String, latinBytes);
+    }
+  }
+
+  return null;
+}
diff --git a/src/utils/node_byte_utils.ts b/src/utils/node_byte_utils.ts
index 961be476..85811e14 100644
--- a/src/utils/node_byte_utils.ts
+++ b/src/utils/node_byte_utils.ts
@@ -1,5 +1,6 @@
 import { BSONError } from '../error';
 import { validateUtf8 } from '../validate_utf8';
+import { tryLatin } from './latin';
 
 type NodeJsEncoding = 'base64' | 'hex' | 'utf8' | 'binary';
 type NodeJsBuffer = ArrayBufferView &
@@ -127,31 +128,9 @@ export const nodeJsByteUtils = {
   },
 
   toUTF8(buffer: Uint8Array, start: number, end: number, fatal: boolean): string {
-    if (buffer.length === 0) {
-      return '';
-    }
-
-    const stringByteLength = end - start;
-    if (stringByteLength === 0) {
-      return '';
-    }
-
-    if (stringByteLength < 200) {
-      let basicLatin = true;
-      const latinBytes = [];
-      for (let i = start; i < end; i++) {
-        const byte = buffer[i];
-        if (byte > 127) {
-          basicLatin = false;
-          break;
-        }
-        latinBytes.push(byte);
-      }
-
-      if (basicLatin) {
-        // eslint-disable-next-line prefer-spread
-        return String.fromCharCode.apply(String, latinBytes);
-      }
+    const basicLatin = end - start <= 20 ? tryLatin(buffer, start, end) : null;
+    if (basicLatin != null) {
+      return basicLatin;
     }
 
     const string = nodeJsByteUtils.toLocalBufferType(buffer).toString('utf8', start, end);
diff --git a/src/utils/web_byte_utils.ts b/src/utils/web_byte_utils.ts
index 5dee3732..9e38efd4 100644
--- a/src/utils/web_byte_utils.ts
+++ b/src/utils/web_byte_utils.ts
@@ -1,4 +1,5 @@
 import { BSONError } from '../error';
+import { tryLatin } from './latin';
 
 type TextDecoder = {
   readonly encoding: string;
@@ -173,31 +174,9 @@ export const webByteUtils = {
   },
 
   toUTF8(uint8array: Uint8Array, start: number, end: number, fatal: boolean): string {
-    if (uint8array.length === 0) {
-      return '';
-    }
-
-    const stringByteLength = end - start;
-    if (stringByteLength === 0) {
-      return '';
-    }
-
-    if (stringByteLength < 200) {
-      let basicLatin = true;
-      const latinBytes = [];
-      for (let i = start; i < end; i++) {
-        const byte = uint8array[i];
-        if (byte > 127) {
-          basicLatin = false;
-          break;
-        }
-        latinBytes.push(byte);
-      }
-
-      if (basicLatin) {
-        // eslint-disable-next-line prefer-spread
-        return String.fromCharCode.apply(String, latinBytes);
-      }
+    const basicLatin = end - start <= 20 ? tryLatin(uint8array, start, end) : null;
+    if (basicLatin != null) {
+      return basicLatin;
     }
 
     if (fatal) {

From 5837166077770450240f435976de270ded89d690 Mon Sep 17 00:00:00 2001
From: Neal Beeken <neal.beeken@mongodb.com>
Date: Tue, 30 Jan 2024 16:21:55 -0500
Subject: [PATCH 6/9] test: release new file

---
 src/utils/latin.ts        | 2 +-
 test/node/release.test.ts | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/utils/latin.ts b/src/utils/latin.ts
index 3dcf8045..016eb4d8 100644
--- a/src/utils/latin.ts
+++ b/src/utils/latin.ts
@@ -1,6 +1,6 @@
 /**
  * This function is an optimization for small basic latin strings.
- *
+ * @internal
  * @remarks
  * ### Important characteristics:
  * - If the uint8array or distance between start and end is 0 this function returns an empty string
diff --git a/test/node/release.test.ts b/test/node/release.test.ts
index b34f0517..c2a20cf4 100644
--- a/test/node/release.test.ts
+++ b/test/node/release.test.ts
@@ -46,6 +46,7 @@ const REQUIRED_FILES = [
   'src/utils/byte_utils.ts',
   'src/utils/node_byte_utils.ts',
   'src/utils/web_byte_utils.ts',
+  'src/utils/latin.ts',
   'src/validate_utf8.ts',
   'vendor/base64/base64.js',
   'vendor/base64/package.json',

From 49d96aff9b0ba8ec293739997c1ee594b6b617d1 Mon Sep 17 00:00:00 2001
From: Neal Beeken <neal.beeken@mongodb.com>
Date: Wed, 31 Jan 2024 09:19:41 -0500
Subject: [PATCH 7/9] test: add unit tests

---
 src/utils/latin.ts            |   3 +-
 test/node/utils/latin.test.ts | 118 ++++++++++++++++++++++++++++++++++
 2 files changed, 120 insertions(+), 1 deletion(-)
 create mode 100644 test/node/utils/latin.test.ts

diff --git a/src/utils/latin.ts b/src/utils/latin.ts
index 016eb4d8..76f9ad26 100644
--- a/src/utils/latin.ts
+++ b/src/utils/latin.ts
@@ -1,3 +1,5 @@
+/* eslint-disable prefer-spread */
+
 /**
  * This function is an optimization for small basic latin strings.
  * @internal
@@ -57,7 +59,6 @@ export function tryLatin(uint8array: Uint8Array, start: number, end: number): st
     }
 
     if (basicLatin) {
-      // eslint-disable-next-line prefer-spread
       return String.fromCharCode.apply(String, latinBytes);
     }
   }
diff --git a/test/node/utils/latin.test.ts b/test/node/utils/latin.test.ts
new file mode 100644
index 00000000..96e0e6fa
--- /dev/null
+++ b/test/node/utils/latin.test.ts
@@ -0,0 +1,118 @@
+import { expect } from 'chai';
+import { tryLatin } from '../../../src/utils/latin';
+import * as sinon from 'sinon';
+
+describe('tryLatin()', () => {
+  context('when given a buffer of length 0', () => {
+    it('returns an empty string', () => {
+      expect(tryLatin(new Uint8Array(), 0, 10)).to.equal('');
+    });
+  });
+
+  context('when the distance between end and start is 0', () => {
+    it('returns an empty string', () => {
+      expect(tryLatin(new Uint8Array([1, 2, 3]), 0, 0)).to.equal('');
+    });
+  });
+
+  let pushSpy;
+  let fromCharCodeSpy;
+
+  beforeEach(() => {
+    pushSpy = sinon.spy(Array.prototype, 'push');
+    fromCharCodeSpy = sinon.spy(String, 'fromCharCode');
+  });
+
+  afterEach(() => {
+    sinon.restore();
+  });
+
+  context('when there is 1 byte', () => {
+    context('that exceed 127', () => {
+      it('returns null', () => {
+        expect(tryLatin(new Uint8Array([128]), 0, 1)).be.null;
+      });
+    });
+
+    it('calls fromCharCode once', () => {
+      tryLatin(new Uint8Array([95]), 0, 1);
+      expect(fromCharCodeSpy).to.have.been.calledOnce;
+    });
+
+    it('never calls array.push', () => {
+      tryLatin(new Uint8Array([95]), 0, 1);
+      expect(pushSpy).to.have.not.been.called;
+    });
+  });
+
+  context('when there is 2 bytes', () => {
+    context('that exceed 127', () => {
+      it('returns null', () => {
+        expect(tryLatin(new Uint8Array([0, 128]), 0, 2)).be.null;
+        expect(tryLatin(new Uint8Array([128, 0]), 0, 2)).be.null;
+        expect(tryLatin(new Uint8Array([128, 128]), 0, 2)).be.null;
+      });
+    });
+
+    it('calls fromCharCode twice', () => {
+      tryLatin(new Uint8Array([95, 105]), 0, 2);
+      expect(fromCharCodeSpy).to.have.been.calledTwice;
+    });
+
+    it('never calls array.push', () => {
+      tryLatin(new Uint8Array([95, 105]), 0, 2);
+      expect(pushSpy).to.have.not.been.called;
+    });
+  });
+
+  context('when there is 3 bytes', () => {
+    context('that exceed 127', () => {
+      it('returns null', () => {
+        expect(tryLatin(new Uint8Array([0, 0, 128]), 0, 3)).be.null;
+        expect(tryLatin(new Uint8Array([0, 128, 0]), 0, 3)).be.null;
+        expect(tryLatin(new Uint8Array([128, 0, 0]), 0, 3)).be.null;
+        expect(tryLatin(new Uint8Array([128, 128, 128]), 0, 3)).be.null;
+        expect(tryLatin(new Uint8Array([128, 128, 0]), 0, 3)).be.null;
+        expect(tryLatin(new Uint8Array([128, 0, 128]), 0, 3)).be.null;
+        expect(tryLatin(new Uint8Array([0, 128, 128]), 0, 3)).be.null;
+      });
+    });
+
+    it('calls fromCharCode thrice', () => {
+      tryLatin(new Uint8Array([95, 105, 100]), 0, 3);
+      expect(fromCharCodeSpy).to.have.been.calledThrice;
+    });
+
+    it('never calls array.push', () => {
+      tryLatin(new Uint8Array([95, 105, 100]), 0, 3);
+      expect(pushSpy).to.have.not.been.called;
+    });
+  });
+
+  for (let stringLength = 4; stringLength <= 20; stringLength++) {
+    context(`when there is ${stringLength} bytes`, () => {
+      context('that exceed 127', () => {
+        it('returns null', () => {
+          expect(tryLatin(new Uint8Array(stringLength).fill(128), 0, stringLength)).be.null;
+        });
+      });
+
+      it('calls fromCharCode once', () => {
+        tryLatin(new Uint8Array(stringLength).fill(95), 0, stringLength);
+        expect(fromCharCodeSpy).to.have.been.calledOnce;
+      });
+
+      it(`calls array.push ${stringLength}`, () => {
+        tryLatin(new Uint8Array(stringLength).fill(95), 0, stringLength);
+        expect(pushSpy).to.have.callCount(stringLength);
+      });
+    });
+  }
+
+  context('when there is >21 bytes', () => {
+    it('returns null', () => {
+      expect(tryLatin(new Uint8Array(21).fill(95), 0, 21)).be.null;
+      expect(tryLatin(new Uint8Array(201).fill(95), 0, 201)).be.null;
+    });
+  });
+});

From 3e609bf2cfbe67adf0ee5f38f7c6018809e495e8 Mon Sep 17 00:00:00 2001
From: Neal Beeken <neal.beeken@mongodb.com>
Date: Wed, 31 Jan 2024 14:22:27 -0500
Subject: [PATCH 8/9] fix: remove apply

---
 src/utils/latin.ts | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/utils/latin.ts b/src/utils/latin.ts
index 76f9ad26..f50805dd 100644
--- a/src/utils/latin.ts
+++ b/src/utils/latin.ts
@@ -1,5 +1,3 @@
-/* eslint-disable prefer-spread */
-
 /**
  * This function is an optimization for small basic latin strings.
  * @internal
@@ -59,7 +57,7 @@ export function tryLatin(uint8array: Uint8Array, start: number, end: number): st
     }
 
     if (basicLatin) {
-      return String.fromCharCode.apply(String, latinBytes);
+      return String.fromCharCode(...latinBytes);
     }
   }
 

From 267cc51ef0a984b4e7ea1f3514894f87a1c836a0 Mon Sep 17 00:00:00 2001
From: Neal Beeken <neal.beeken@mongodb.com>
Date: Wed, 31 Jan 2024 14:54:14 -0500
Subject: [PATCH 9/9] fix: move len 20 check up and remove basicLatin boolean

---
 src/utils/latin.ts | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/src/utils/latin.ts b/src/utils/latin.ts
index f50805dd..860497db 100644
--- a/src/utils/latin.ts
+++ b/src/utils/latin.ts
@@ -23,6 +23,10 @@ export function tryLatin(uint8array: Uint8Array, start: number, end: number): st
     return '';
   }
 
+  if (stringByteLength > 20) {
+    return null;
+  }
+
   if (stringByteLength === 1 && uint8array[start] < 128) {
     return String.fromCharCode(uint8array[start]);
   }
@@ -44,22 +48,14 @@ export function tryLatin(uint8array: Uint8Array, start: number, end: number): st
     );
   }
 
-  if (stringByteLength <= 20) {
-    let basicLatin = true;
-    const latinBytes = [];
-    for (let i = start; i < end; i++) {
-      const byte = uint8array[i];
-      if (byte > 127) {
-        basicLatin = false;
-        break;
-      }
-      latinBytes.push(byte);
-    }
-
-    if (basicLatin) {
-      return String.fromCharCode(...latinBytes);
+  const latinBytes = [];
+  for (let i = start; i < end; i++) {
+    const byte = uint8array[i];
+    if (byte > 127) {
+      return null;
     }
+    latinBytes.push(byte);
   }
 
-  return null;
+  return String.fromCharCode(...latinBytes);
 }