Skip to content

Commit

Permalink
Add support for bundled strings extension, #52
Browse files Browse the repository at this point in the history
  • Loading branch information
kriszyp committed Dec 20, 2021
1 parent e36d305 commit 1ab9938
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 3 deletions.
24 changes: 24 additions & 0 deletions pack.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ let target
let targetView
let position = 0
let safeEnd
let bundledStrings = null
const hasNonLatin = /[\u0080-\uFFFF]/
const RECORD_SYMBOL = Symbol('record-id')
export class Packr extends Unpackr {
constructor(options) {
Expand Down Expand Up @@ -75,6 +77,14 @@ export class Packr extends Unpackr {
position = (position + 7) & 0x7ffffff8 // Word align to make any future copying of this buffer faster
start = position
referenceMap = packr.structuredClone ? new Map() : null
if (packr.bundleStrings) {
bundledStrings = ['', '']
target[position++] = 0xd6
target[position++] = 0x62 // 'b'
bundledStrings.position = position - start
position += 4
} else
bundledStrings = null
sharedStructures = packr.structures
if (sharedStructures) {
if (sharedStructures.uninitialized)
Expand Down Expand Up @@ -113,6 +123,13 @@ export class Packr extends Unpackr {
structures = sharedStructures || []
try {
pack(value)
if (bundledStrings) {
targetView.setUint32(bundledStrings.position + start, position - bundledStrings.position - start)
let writeStrings = bundledStrings
bundledStrings = null
pack(writeStrings[0])
pack(writeStrings[1])
}
packr.offset = position // update the offset so next serialization doesn't write over our buffer, but can continue writing to same buffer sequentially
if (referenceMap && referenceMap.idsToInsert) {
position += referenceMap.idsToInsert.length * 6
Expand Down Expand Up @@ -174,6 +191,13 @@ export class Packr extends Unpackr {
var length
if (type === 'string') {
let strLength = value.length
if (bundledStrings && strLength >= 8 && strLength < 0x1000) {
let twoByte = hasNonLatin.test(value)
bundledStrings[twoByte ? 0 : 1] += value
target[position++] = 0xc1
pack(twoByte ? -strLength : strLength);
return
}
let headerSize
// first we estimate the header size, so we can write to the correct location
if (strLength < 0x20) {
Expand Down
7 changes: 7 additions & 0 deletions tests/benchmark.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ if (msgpackr) {
obj = bench('require("msgpackr").unpack(buf);', msgpackr.unpack, buf);
test(obj);

packr = new msgpackr.Packr({ bundleStrings: true, structures: [] })
buf = bench('bundled strings packr.pack(obj);', packr.pack.bind(packr), data);
//buf = bench('require("msgpackr").pack(obj);', data => {let result = packr.pack(data); packr.resetMemory(); return result;}, data);

obj = bench('bundled strings packr.unpack(buf);', packr.unpack.bind(packr), buf);
test(obj);

packr = new msgpackr.Packr({ structures: [] })
buf = bench('msgpackr w/ shared structures: packr.pack(obj);', packr.pack.bind(packr), data);
//buf = bench('msgpackr w/ shared structures: packr.pack(obj);', data => {let result = packr.pack(data); packr.resetMemory(); return result;}, data);
Expand Down
12 changes: 10 additions & 2 deletions tests/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,14 @@ suite('msgpackr basic tests', function(){
var deserialized = packr.unpack(serialized)
assert.deepEqual(deserialized, data)
})
test('pack/unpack sample data with bundled strings', function(){
var data = sampleData
let structures = []
let packr = new Packr({ structures, bundleStrings: true })
var serialized = packr.pack(data)
var deserialized = packr.unpack(serialized)
assert.deepEqual(deserialized, data)
})
if (typeof Buffer != 'undefined')
test('replace data', function(){
var data1 = {
Expand Down Expand Up @@ -630,7 +638,7 @@ suite('msgpackr performance tests', function(){
let structures = []
var serialized = pack(data)
console.log('MessagePack size', serialized.length)
let packr = new Packr({ structures })
let packr = new Packr({ structures, bundleStrings: false })
var serialized = packr.pack(data)
console.log('msgpackr w/ record ext size', serialized.length)
for (var i = 0; i < ITERATIONS; i++) {
Expand All @@ -641,7 +649,7 @@ suite('msgpackr performance tests', function(){
var data = sampleData
this.timeout(10000)
let structures = []
let packr = new Packr({ structures })
let packr = new Packr({ structures, bundleStrings: false })
let buffer = typeof Buffer != 'undefined' ? Buffer.alloc(0x10000) : new Uint8Array(0x10000)

for (var i = 0; i < ITERATIONS; i++) {
Expand Down
30 changes: 29 additions & 1 deletion unpack.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ var currentStructures
var srcString
var srcStringStart = 0
var srcStringEnd = 0
var bundledStrings
var referenceMap
var currentExtensions = []
var dataView
Expand Down Expand Up @@ -55,6 +56,7 @@ export class Unpackr {
srcStringEnd = 0
srcString = null
strings = EMPTY_ARRAY
bundledStrings = null
src = source
// this provides cached access to the data view for a buffer if it is getting reused, which is a recommend
// technique for getting data from a database where it can be copied into an existing buffer instead of creating
Expand Down Expand Up @@ -248,7 +250,15 @@ export function read() {
let value
switch (token) {
case 0xc0: return null
case 0xc1: return C1; // "never-used", return special object to denote that
case 0xc1:
if (bundledStrings) {
value = read() // followed by the length of the string in characters (not bytes!)
if (value > 0)
return bundledStrings[1].slice(bundledStrings.position1, bundledStrings.position1 += value)
else
return bundledStrings[0].slice(bundledStrings.position0, bundledStrings.position0 -= value)
}
return C1; // "never-used", return special object to denote that
case 0xc2: return false
case 0xc3: return true
case 0xc4:
Expand Down Expand Up @@ -898,6 +908,22 @@ currentExtensions[0x78] = () => {
return new RegExp(data[0], data[1])
}

currentExtensions[0x62] = (data) => {
let dataSize = (data[0] << 24) + (data[1] << 16) + (data[2] << 8) + data[3]
let dataPosition = position
position += dataSize - 4
bundledStrings = [read(), read()]
bundledStrings.position0 = 0
bundledStrings.position1 = 0
let postBundlePosition = position
position = dataPosition
try {
return read()
} finally {
position = postBundlePosition
}
}

currentExtensions[0xff] = (data) => {
// 32-bit date extension
if (data.length == 4)
Expand Down Expand Up @@ -925,6 +951,7 @@ function saveState(callback) {
let savedSrcString = srcString
let savedStrings = strings
let savedReferenceMap = referenceMap
let savedBundledStrings = bundledStrings

// TODO: We may need to revisit this if we do more external calls to user code (since it could be slow)
let savedSrc = new Uint8Array(src.slice(0, srcEnd)) // we copy the data in case it changes while external data is processed
Expand All @@ -941,6 +968,7 @@ function saveState(callback) {
srcString = savedSrcString
strings = savedStrings
referenceMap = savedReferenceMap
bundledStrings = savedBundledStrings
src = savedSrc
sequentialMode = savedSequentialMode
currentStructures = savedStructures
Expand Down

0 comments on commit 1ab9938

Please sign in to comment.