diff --git a/emcc.py b/emcc.py index 24ed2ff0f74c8..2653a23f273b6 100755 --- a/emcc.py +++ b/emcc.py @@ -1840,8 +1840,6 @@ def default_setting(name, new_default): exit_with_error('USE_PTHREADS=2 is no longer supported') if settings.ALLOW_MEMORY_GROWTH: diagnostics.warning('pthreads-mem-growth', 'USE_PTHREADS + ALLOW_MEMORY_GROWTH may run non-wasm code slowly, see https://github.com/WebAssembly/design/issues/1271') - # UTF8Decoder.decode may not work with a view of a SharedArrayBuffer, see https://github.com/whatwg/encoding/issues/172 - settings.TEXTDECODER = 0 settings.SYSTEM_JS_LIBRARIES.append((0, shared.path_from_root('src', 'library_pthread.js'))) settings.EXPORTED_FUNCTIONS += [ '___emscripten_pthread_data_constructor', diff --git a/src/library_strings.js b/src/library_strings.js index 4631191ede86c..bfb8e31977439 100644 --- a/src/library_strings.js +++ b/src/library_strings.js @@ -6,6 +6,14 @@ #if MINIMAL_RUNTIME +// runtime_strings_extra.js defines a wrapper around TextDecoder, which is added +// in the generated code. The minimal runtime logic here actually runs the +// library code at compile time (as a way to create a library*.js file around +// non-library JS), and so we must define it here as well. +var TextDecoderWrapper = TextDecoder; + +// TODO: stop including this in such a manner, and instead make it a normal +// library file in all modes. #include "runtime_strings_extra.js" #include "arrayUtils.js" diff --git a/src/runtime_strings.js b/src/runtime_strings.js index 8f111c3793fe7..a37f184f561c4 100644 --- a/src/runtime_strings.js +++ b/src/runtime_strings.js @@ -9,11 +9,34 @@ // Given a pointer 'ptr' to a null-terminated UTF8-encoded string in the given array that contains uint8 values, returns // a copy of that string as a Javascript String object. +#if USE_PTHREADS && TEXTDECODER +// UTF8Decoder.decode may not work with a view of a SharedArrayBuffer, see +// https://github.com/whatwg/encoding/issues/172 +// To avoid that, we wrap around it and add a copy into a normal ArrayBuffer, +// which can still be much faster than creating a string character by +// character. +function TextDecoderWrapper(encoding) { + var textDecoder = new TextDecoder(encoding); + this.decode = function(data) { +#if ASSERTIONS + assert(data instanceof Uint8Array); +#endif + // While we compile with pthreads, this method can be called on side buffers + // as well, such as the stdout buffer in the filesystem code. Only copy when + // we have to. + if (data.buffer instanceof SharedArrayBuffer) { + data = new Uint8Array(data); + } + return textDecoder.decode.call(textDecoder, data); + }; +} +#endif + #if TEXTDECODER == 2 -var UTF8Decoder = new TextDecoder('utf8'); +var UTF8Decoder = new TextDecoder{{{ USE_PTHREADS ? 'Wrapper' : ''}}}('utf8'); #else // TEXTDECODER == 2 #if TEXTDECODER -var UTF8Decoder = typeof TextDecoder !== 'undefined' ? new TextDecoder('utf8') : undefined; +var UTF8Decoder = typeof TextDecoder !== 'undefined' ? new TextDecoder{{{ USE_PTHREADS ? 'Wrapper' : ''}}}('utf8') : undefined; #endif // TEXTDECODER #endif // TEXTDECODER == 2 diff --git a/src/runtime_strings_extra.js b/src/runtime_strings_extra.js index 185a02f21796a..67f570d87212f 100644 --- a/src/runtime_strings_extra.js +++ b/src/runtime_strings_extra.js @@ -32,10 +32,10 @@ function stringToAscii(str, outPtr) { // a copy of that string as a Javascript String object. #if TEXTDECODER == 2 -var UTF16Decoder = new TextDecoder('utf-16le'); +var UTF16Decoder = new TextDecoder{{{ USE_PTHREADS ? 'Wrapper' : ''}}}('utf-16le'); #else // TEXTDECODER == 2 #if TEXTDECODER -var UTF16Decoder = typeof TextDecoder !== 'undefined' ? new TextDecoder('utf-16le') : undefined; +var UTF16Decoder = typeof TextDecoder !== 'undefined' ? new TextDecoder{{{ USE_PTHREADS ? 'Wrapper' : ''}}}('utf-16le') : undefined; #endif // TEXTDECODER #endif // TEXTDECODER == 2 diff --git a/tests/benchmark_utf8.cpp b/tests/benchmark_utf8.cpp index 6b541fabbe1b0..8dd3d5d8627d8 100644 --- a/tests/benchmark_utf8.cpp +++ b/tests/benchmark_utf8.cpp @@ -57,8 +57,10 @@ int main() { double t = 0; double t2 = emscripten_get_now(); for(int i = 0; i < 100000; ++i) { - // FF Nightly: Already on small strings of 64 bytes in length, TextDecoder trumps in performance. - char *str = randomString(8); + // Create strings of lengths 1-32, because the internals of text decoding + // have a cutoff of 16 for when to use TextDecoder, and we wish to test both + // (see UTF8ArrayToString). + char *str = randomString((i % 32) + 1); t += test(str); delete [] str; } diff --git a/tests/test_browser.py b/tests/test_browser.py index 999553a79c2c5..6140953822686 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -131,6 +131,16 @@ def decorated(self, *args, **kwargs): return decorated +def also_with_threads(f): + def decorated(self): + f(self) + if not os.environ.get('EMTEST_LACKS_THREAD_SUPPORT'): + print('(threads)') + self.emcc_args += ['-pthread'] + f(self) + return decorated + + # Today we only support the wasm backend so any tests that is disabled under the llvm # backend is always disabled. # TODO(sbc): Investigate all tests with this decorator and either fix of remove the test. @@ -4231,12 +4241,15 @@ def test_wasm_locate_file(self): shutil.move('test.wasm', Path('cdn/test.wasm')) self.run_browser('test.html', '', '/report_result?0') + @also_with_threads def test_utf8_textdecoder(self): self.btest_exit('benchmark_utf8.cpp', 0, args=['--embed-file', test_file('utf8_corpus.txt') + '@/utf8_corpus.txt', '-s', 'EXPORTED_RUNTIME_METHODS=[UTF8ToString]']) + @also_with_threads def test_utf16_textdecoder(self): self.btest_exit('benchmark_utf16.cpp', 0, args=['--embed-file', test_file('utf16_corpus.txt') + '@/utf16_corpus.txt', '-s', 'EXPORTED_RUNTIME_METHODS=[UTF16ToString,stringToUTF16,lengthBytesUTF16]']) + @also_with_threads def test_TextDecoder(self): self.btest('browser_test_hello_world.c', '0', args=['-s', 'TEXTDECODER=0']) just_fallback = os.path.getsize('test.js') @@ -4244,7 +4257,13 @@ def test_TextDecoder(self): td_with_fallback = os.path.getsize('test.js') self.btest('browser_test_hello_world.c', '0', args=['-s', 'TEXTDECODER=2']) td_without_fallback = os.path.getsize('test.js') - self.assertLess(td_without_fallback, just_fallback) + # pthread TextDecoder support is more complex due to + # https://github.com/whatwg/encoding/issues/172 + # and therefore the expected code size win there is actually a loss + if '-pthread' not in self.emcc_args: + self.assertLess(td_without_fallback, just_fallback) + else: + self.assertGreater(td_without_fallback, just_fallback) self.assertLess(just_fallback, td_with_fallback) def test_small_js_flags(self):