diff --git a/src/test/unit_tests/jest_tests/test_chunked_content_decoder.test.js b/src/test/unit_tests/jest_tests/test_chunked_content_decoder.test.js new file mode 100644 index 0000000000..54b078dbac --- /dev/null +++ b/src/test/unit_tests/jest_tests/test_chunked_content_decoder.test.js @@ -0,0 +1,259 @@ +/* Copyright (C) 2025 NooBaa */ +'use strict'; + +const stream = require('stream'); +const assert = require('assert'); +const ChunkedContentDecoder = require('../../../util/chunked_content_decoder'); +const buffer_utils = require('../../../util/buffer_utils'); + +describe('ChunkedContentDecoder', function() { + + // Reminder about chunk structure: + // \r\n + // + //.... + // the end of the chunk: + // 0\r\n + // \r\n + // + // The following example was copied from: + // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Transfer-Encoding + // 7\r\n + // Mozilla\r\n + // 11\r\n + // Developer Network\r\n + // 0\r\n + // \r\n + + // for easier debugging you can set the number of iteration here: + const NUMBER_OF_ITERATIONS_IMPORTANT_CASE = 100; + const NUMBER_OF_ITERATIONS_DEFAULT = 2; + + describe('expected to parse the input', function() { + test_parse_output({ + name: 'one_chunk', + input: + '3\r\n' + + 'foo\r\n' + + '0\r\n' + + '\r\n', + output: 'foo', + iterations: NUMBER_OF_ITERATIONS_DEFAULT, + }); + + test_parse_output({ + name: 'two_chunks', + input: + '3\r\n' + + 'foo\r\n' + + '3\r\n' + + 'bar\r\n' + + '0\r\n' + + '\r\n', + output: 'foobar', + iterations: NUMBER_OF_ITERATIONS_IMPORTANT_CASE, + }); + + test_parse_output({ + name: 'three_chunks_with_trailers', + input: + '3\r\n' + + 'foo\r\n' + + '6\r\n' + + 'barbaz\r\n' + + 'ff\r\n' + + 'f'.repeat(255) + '\r\n' + + '0\r\n' + + 'x-trailer-1:value\r\n' + + 'x-trailer-2:value\r\n' + + '\r\n', + output: 'foobarbaz' + 'f'.repeat(255), + iterations: NUMBER_OF_ITERATIONS_IMPORTANT_CASE, + check: decoder => { + assert.deepStrictEqual(decoder.trailers, [ + 'x-trailer-1:value', + 'x-trailer-2:value', + ]); + }, + }); + + test_parse_output({ + name: 'no_chunk_with_trailers', + input: + '0\r\n' + + 'movie:trailer\r\n' + + 'semi:trailer\r\n' + + '\r\n', + output: '', + iterations: NUMBER_OF_ITERATIONS_IMPORTANT_CASE, + check: decoder => { + assert.deepStrictEqual(decoder.trailers, [ + 'movie:trailer', + 'semi:trailer', + ]); + }, + }); + + test_parse_output({ + name: 'one_chunk_with_extension', + input: + '3;crc=1a2b3c4d\r\n' + + 'EXT\r\n' + + '0\r\n' + + '\r\n', + output: 'EXT', + iterations: NUMBER_OF_ITERATIONS_IMPORTANT_CASE, + }); + + test_parse_output({ + name: 'one_chunk_with_extension_and_trailer', + input: + '3;crc=1a2b3c4d\r\n' + + 'EXT\r\n' + + '0\r\n' + + create_trailers(1) + + '\r\n', + output: 'EXT', + iterations: NUMBER_OF_ITERATIONS_IMPORTANT_CASE, + }); + + test_parse_output({ + name: 'one_chunk_with_trailers', // lower than MAX_CHUNK_HEADER_SIZE + input: + '3\r\n' + + 'foo\r\n' + + '0\r\n' + + create_trailers(19) + + '\r\n', + output: 'foo', + iterations: NUMBER_OF_ITERATIONS_DEFAULT, + }); + + }); + + describe('expected to have an error on parse', function() { + + test_parse_error({ + name: 'chunk_size_not_hex', + input: 'invalid\r\n\r\n', + error_pos: 7, // end of header + iterations: NUMBER_OF_ITERATIONS_IMPORTANT_CASE, + }); + + test_parse_error({ + name: 'chunk_size_too_big', // according to MAX_CHUNK_SIZE + input: '10000000001\r\n\r\n', + error_pos: 11, // end of header + iterations: NUMBER_OF_ITERATIONS_IMPORTANT_CASE, + }); + + test_parse_error({ + name: 'header_too_long', // according to MAX_CHUNK_HEADER_SIZE + input: '0' + ';'.repeat(1024) + '\r\n\r\n', + error_pos: 1025, // end of header + iterations: NUMBER_OF_ITERATIONS_IMPORTANT_CASE, + }); + + test_parse_error({ + name: 'too_many_trailers', // according to MAX_CHUNK_HEADER_SIZE + input: + '3\r\n' + + 'foo\r\n' + + '0\r\n' + + create_trailers(21) + + '\r\n', + error_pos: 420, // last trailer position + iterations: NUMBER_OF_ITERATIONS_DEFAULT, + }); + + }); + + /** + * @param {{ + * name: string, + * input: string, + * output: string, + * iterations?: number + * check?: (decoder: ChunkedContentDecoder) => void, + * }} params + */ + function test_parse_output({ name, input, output, check, iterations = NUMBER_OF_ITERATIONS_DEFAULT}) { + it(name, async function() { + for (let i = 0; i < iterations; ++i) { + const decoder = new ChunkedContentDecoder(); + console.log(`test_parse_output(${name}): decoder input`, input, decoder.get_debug_info()); + const readable = new stream.Readable({ + read() { + // split at random position + const sp = Math.floor(input.length * Math.random()); + this.push(input.slice(0, sp)); + this.push(input.slice(sp)); + this.push(null); + } + }); + const writable = buffer_utils.write_stream(); + await stream.promises.pipeline(readable, decoder, writable); + const decoded = buffer_utils.join(writable.buffers, writable.total_length); + console.log(`test_parse_output(${name}): decoder returned`, decoded, decoder.get_debug_info()); + assert.deepStrictEqual(decoded, Buffer.from(output)); + if (check) check(decoder); + } + }); + } + + /** + * @param {{ + * name: string, + * input: string, + * error_pos?: number, + * iterations?: number + * }} params + */ + function test_parse_error({ name, input, error_pos, iterations = NUMBER_OF_ITERATIONS_DEFAULT }) { + it(name, async function() { + for (let i = 0; i < iterations; ++i) { + const decoder = new ChunkedContentDecoder(); + console.log(`test_parse_error(${name}): decoder input`, input, decoder.get_debug_info()); + console.log(name, 'decode', decoder); + try { + const readable = new stream.Readable({ + read() { + // split at random position + const sp = Math.floor(input.length * Math.random()); + this.push(input.slice(0, sp)); + this.push(input.slice(sp)); + this.push(null); + } + }); + const writable = buffer_utils.write_stream(); + await stream.promises.pipeline(readable, decoder, writable); + const decoded = buffer_utils.join(writable.buffers, writable.total_length); + console.log(`test_parse_error(${name}): decoder returned`, decoded, decoder.get_debug_info()); + assert.fail('Should have failed'); + } catch (err) { + if (err.message === 'Should have failed') throw err; + console.log(`test_parse_error(${name}): decoder caught`, err, decoder.get_debug_info()); + if (error_pos !== undefined) { + assert.strictEqual(decoder.stream_pos, error_pos); + } + } + } + }); + } + + + /** + * create_trailers will return a single string with the number of trailers + * @param {number} number_of_trailers + * @returns string + */ + function create_trailers(number_of_trailers) { + const trailers = []; + for (let index = 1; index <= number_of_trailers; ++index) { + const trailer = `x-trailer-${index}:value\r\n`; + trailers.push(trailer); + } + return trailers.join(''); + } + +}); diff --git a/src/util/chunked_content_decoder.js b/src/util/chunked_content_decoder.js index 605c033b8a..322ffeb844 100644 --- a/src/util/chunked_content_decoder.js +++ b/src/util/chunked_content_decoder.js @@ -8,27 +8,52 @@ const STATE_WAIT_NL_HEADER = 'STATE_WAIT_NL_HEADER'; const STATE_SEND_DATA = 'STATE_SEND_DATA'; const STATE_WAIT_CR_DATA = 'STATE_WAIT_CR_DATA'; const STATE_WAIT_NL_DATA = 'STATE_WAIT_NL_DATA'; +const STATE_READ_TRAILER = 'STATE_READ_TRAILER'; +const STATE_WAIT_NL_TRAILER = 'STATE_WAIT_NL_TRAILER'; +const STATE_WAIT_NL_END = 'STATE_WAIT_NL_END'; const STATE_CONTENT_END = 'STATE_CONTENT_END'; const STATE_ERROR = 'STATE_ERROR'; const CR_CODE = '\r'.charCodeAt(0); const NL_CODE = '\n'.charCodeAt(0); +// lenient limits to avoid abuse +const MAX_CHUNK_SIZE = 1024 * 1024 * 1024 * 1024; +const MAX_CHUNK_HEADER_SIZE = 1024; +const MAX_TRAILER_SIZE = 1024; +const MAX_TRAILERS = 20; + /** * * ChunkedContentDecoder * * Take a data stream and removes chunking signatures from it * + * Basic encoding structure: (combined with example) + * More info about the structure can be found in: + * https://en.wikipedia.org/wiki/Chunked_transfer_encoding + * --------------------------------------------------- + * 1fff;chunk-signature=1a2b\r\n - chunk header (optional extension) <- 1fff is the size in hex + * <1fff bytes of data>\r\n - chunk data + * 2fff;chunk-signature=1a2b\r\n - chunk header (optional extension) <- 2fff is the size in hex + * <2fff bytes of data>\r\n - chunk data + * 0\r\n - last chunk + * \r\n - optional trailer <- example of trailer (key:value): x-amz-checksum-crc32:uOMGCw==\r\n + * \r\n - optional trailer + * \r\n - end of content + * --------------------------------------------------- */ class ChunkedContentDecoder extends stream.Transform { constructor(params) { super(params); this.state = STATE_READ_CHUNK_HEADER; - this.chunk_header_str = ''; + this.chunk_header = ''; this.chunk_size = 0; - this.chunk_signature = ''; + this.last_chunk = false; + this.trailer = ''; + this.trailers = []; + this.stream_pos = 0; } _transform(buf, encoding, callback) { @@ -41,57 +66,212 @@ class ChunkedContentDecoder extends stream.Transform { } _flush(callback) { - if (this.state !== STATE_CONTENT_END) return this.error_state(); + if (this.state !== STATE_CONTENT_END) return this.error_state(undefined, 0, ''); return callback(); } + /** + * Parse the buffer and update the state machine. + * The buffer is parsed in a loop to handle multiple chunks in the same buffer, + * and to handle the case where the buffer ends in the middle of a chunk. + * The state machine is updated according to the current state and the buffer content. + * The state machine is updated by the following rules: + * 1. STATE_READ_CHUNK_HEADER - read the chunk header until CR and parse it. + * 2. STATE_WAIT_NL_HEADER - wait for NL after the chunk header. + * 3. STATE_SEND_DATA - send chunk data to the stream until chunk size bytes sent. + * 4. STATE_WAIT_CR_DATA - wait for CR after the chunk data. + * 5. STATE_WAIT_NL_DATA - wait for NL after the chunk data. + * 6. STATE_READ_TRAILER - read optional trailer until CR and save it. + * 7. STATE_WAIT_NL_TRAILER - wait for NL after non empty trailer. + * 8. STATE_WAIT_NL_END - wait for NL after the last empty trailer. + * 9. STATE_CONTENT_END - the stream is done. + * 10. STATE_ERROR - an error occurred. + * @param {Buffer} buf + * @returns {boolean} false on error state + */ parse(buf) { for (let index = 0; index < buf.length; ++index) { + + //---------------// + // header states // + //---------------// + if (this.state === STATE_READ_CHUNK_HEADER) { - for (; index < buf.length; ++index) { - if (buf[index] === CR_CODE) { - const header_items = this.chunk_header_str.split(';'); - this.chunk_size = parseInt(header_items[0], 16); - if (!(this.chunk_size >= 0)) return this.error_state(); - this.last_chunk = this.chunk_size === 0; - const header1 = header_items[1].split('='); - this.chunk_signature = header1[0] === 'chunk-signature' ? header1[1] : ''; - this.chunk_header_str = ''; - this.state = STATE_WAIT_NL_HEADER; - break; - } else { - this.chunk_header_str += String.fromCharCode(buf[index]); - } + const { str, next, finished } = this.read_string_until_cr(buf, index); + index = next; + this.chunk_header += str; + if (this.chunk_header.length > MAX_CHUNK_HEADER_SIZE) { + return this.error_state(buf, index, + `chunk_header exceeded MAX_CHUNK_HEADER_SIZE ${MAX_CHUNK_HEADER_SIZE}`); } + if (finished) { + if (!this.parse_chunk_header(buf, index)) return false; + this.state = STATE_WAIT_NL_HEADER; + } + } else if (this.state === STATE_WAIT_NL_HEADER) { - if (buf[index] !== NL_CODE) return this.error_state(); - this.state = STATE_SEND_DATA; + if (buf[index] !== NL_CODE) return this.error_state(buf, index, `expect NL`); + if (this.last_chunk) { + this.state = STATE_READ_TRAILER; + } else { + this.state = STATE_SEND_DATA; + } + + //-------------// + // data states // + //-------------// + } else if (this.state === STATE_SEND_DATA) { - const content = (index === 0 && buf.length <= this.chunk_size) ? buf : buf.slice(index, index + this.chunk_size); - this.chunk_size -= content.length; - index += content.length - 1; - if (content.length) this.push(content); + index = this.send_data(buf, index); if (!this.chunk_size) this.state = STATE_WAIT_CR_DATA; + } else if (this.state === STATE_WAIT_CR_DATA) { - if (buf[index] !== CR_CODE) return this.error_state(); + if (buf[index] !== CR_CODE) return this.error_state(buf, index, `expect CR`); this.state = STATE_WAIT_NL_DATA; + } else if (this.state === STATE_WAIT_NL_DATA) { - if (buf[index] !== NL_CODE) return this.error_state(); - if (this.last_chunk) { - this.state = STATE_CONTENT_END; - } else { - this.state = STATE_READ_CHUNK_HEADER; + if (buf[index] !== NL_CODE) return this.error_state(buf, index, `expect NL`); + this.state = STATE_READ_CHUNK_HEADER; + + //----------------// + // trailer states // + //----------------// + + } else if (this.state === STATE_READ_TRAILER) { + const { str, next, finished } = this.read_string_until_cr(buf, index); + index = next; + this.trailer += str; + if (this.trailer.length > MAX_TRAILER_SIZE) { + return this.error_state(buf, index, `trailer exceeded MAX_TRAILER_SIZE ${MAX_TRAILER_SIZE}`); } + if (finished) { + if (this.trailer) { + if (this.trailers.length >= MAX_TRAILERS) { + return this.error_state(buf, index, `number of trailers exceeded the MAX_TRAILERS ${MAX_TRAILERS}`); + } + this.trailers.push(this.trailer); + this.trailer = ''; + this.state = STATE_WAIT_NL_TRAILER; // next trailer + } else { + this.state = STATE_WAIT_NL_END; // got last empty trailer + } + } + + } else if (this.state === STATE_WAIT_NL_TRAILER) { + if (buf[index] !== NL_CODE) return this.error_state(buf, index, `expect NL`); + this.state = STATE_READ_TRAILER; + + //------------// + // end states // + //------------// + + } else if (this.state === STATE_WAIT_NL_END) { + if (buf[index] !== NL_CODE) return this.error_state(buf, index, `expect NL`); + this.state = STATE_CONTENT_END; + } else { - return this.error_state(); + return this.error_state(buf, index, `State machine in an invalid state`); } } + + this.stream_pos += buf.length; + return true; + } + + /** + * find index of next CR in this buffer, if exists, + * and extracts the string from the current index to the CR index + * @param {Buffer} buf + * @param {number} index + */ + read_string_until_cr(buf, index) { + const start = index; + while (index < buf.length && buf[index] !== CR_CODE) index += 1; + const str = buf.toString('utf8', start, index); + return { str, next: index, finished: index < buf.length }; } - error_state() { + /** + * Parse the chunk size and extensions from `chunk_header`. + * Will set error state if the chunk size is not a valid integer >= 0. + * The buf and index are used for better debugging info. + * Chunk header starts with a hex size and then optional extensions separated by ';' + * + * Example: 0 - last chunk + * Example: f00 - chunk length f00 = 3840 bytes + * Example: 1ff;chunk-signature=1a2b3c4d + * Example: 1000;a=1;b=2;c=3 + * + * @param {Buffer} buf + * @param {number} index + * @returns {boolean} false on error state + */ + parse_chunk_header(buf, index) { + const [chunk_size_hex, extension] = this.chunk_header.split(';', 2); + const chunk_size = parseInt(chunk_size_hex, 16); + if (isNaN(chunk_size) || chunk_size < 0 || chunk_size > MAX_CHUNK_SIZE) { + return this.error_state(buf, index, `chunk_size has invalid value ${chunk_size}`); + } + if (extension) { + // TODO check for chunk-signature + // const [key, value] = extension.split('=', 2); + // const chunk_signature = key === 'chunk-signature' ? value : undefined; + } + this.chunk_size = chunk_size; + this.last_chunk = chunk_size === 0; + this.chunk_header = ''; + return true; + } + + /** + * Send the chunk data to the stream. + * @param {Buffer} buf + * @param {number} index + * @returns {number} next index + */ + send_data(buf, index) { + const content = (index === 0 && buf.length <= this.chunk_size) ? + buf : buf.subarray(index, index + this.chunk_size); + this.chunk_size -= content.length; + if (content.length) this.push(content); + return index + content.length - 1; // -1 because top loop increments + } + + /** + * Set the state to error and emit stream error. + * The buf and index are used for better debugging info. + * @param {Buffer|undefined} buf + * @param {number} index + * @param {string} [reason] + * @returns {boolean} false, for easy return by caller on error + */ + error_state(buf, index, reason = '') { + // add index to stream_pos to get the exact position in the stream + index ||= 0; + this.stream_pos += index; + const reason_statement = reason ? `due to ${reason} . ` : ''; + + const message = `Failed parsing aws-chunked data ` + reason_statement + this.get_debug_info() + + // since the state machine is changing according to each byte attached the buffer view of the next 10 bytes + (buf ? ` buf[index..10]=[${buf.toString('hex', index, index + 10)}]` : ''); + this.state = STATE_ERROR; - this.emit('error', new Error('problem in parsing aws-chunked data')); + this.emit('error', new Error(message)); + return false; + } + + get_debug_info() { + const debug_info = `ChunkedContentDecoder:` + + ` pos=${this.stream_pos}` + + ` state=${this.state}` + + ` chunk_header=${this.chunk_header}` + + ` chunk_size=${this.chunk_size}` + + ` last_chunk=${this.last_chunk}` + + ` trailer=${this.trailer}` + + ` trailers=${this.trailers}`; + return debug_info; } + } module.exports = ChunkedContentDecoder; diff --git a/src/util/http_utils.js b/src/util/http_utils.js index 7c04f1407b..390b8975dd 100644 --- a/src/util/http_utils.js +++ b/src/util/http_utils.js @@ -25,6 +25,8 @@ const ssl_utils = require('../util/ssl_utils'); const UNSIGNED_PAYLOAD = 'UNSIGNED-PAYLOAD'; const STREAMING_PAYLOAD = 'STREAMING-AWS4-HMAC-SHA256-PAYLOAD'; +const STREAMING_UNSIGNED_PAYLOAD_TRAILER = 'STREAMING-UNSIGNED-PAYLOAD-TRAILER'; +const STREAMING_AWS4_HMAC_SHA256_PAYLOAD_TRAILER = 'STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER'; const CONTENT_TYPE_TEXT_PLAIN = 'text/plain'; const CONTENT_TYPE_APP_OCTET_STREAM = 'application/octet-stream'; @@ -577,7 +579,9 @@ function check_headers(req, options) { content_sha256_hdr; if (typeof content_sha256_hdr === 'string' && content_sha256_hdr !== UNSIGNED_PAYLOAD && - content_sha256_hdr !== STREAMING_PAYLOAD) { + content_sha256_hdr !== STREAMING_PAYLOAD && + content_sha256_hdr !== STREAMING_UNSIGNED_PAYLOAD_TRAILER && + content_sha256_hdr !== STREAMING_AWS4_HMAC_SHA256_PAYLOAD_TRAILER) { req.content_sha256_buf = Buffer.from(content_sha256_hdr, 'hex'); if (req.content_sha256_buf.length !== 32) { throw new options.ErrorClass(options.error_invalid_digest);