From 625691784f43db75a66e5eb713513a8d62819e9b Mon Sep 17 00:00:00 2001 From: lindstro Date: Wed, 4 Aug 2021 10:35:05 -0700 Subject: [PATCH] Bug fix: overflow due to subnormals (resolves #119) --- CMakeLists.txt | 6 ++++++ Config | 10 ++++++++++ docs/source/installation.rst | 22 +++++++++++++++++++++- src/cuda_zfp/encode.cuh | 14 ++++++++++---- src/cuda_zfp/type_info.cuh | 14 +++++++++++--- src/template/encodef.c | 16 ++++++++++++---- src/traitsd.h | 1 + src/traitsf.h | 1 + tests/testzfp.cpp | 16 ++++++++++++++++ 9 files changed, 88 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index dd18c1bb8..4b3cb30b8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -122,6 +122,8 @@ set(ZFP_ROUNDING_MODE ZFP_ROUND_NEVER CACHE STRING "Rounding mode for reducing bias") set_property(CACHE ZFP_ROUNDING_MODE PROPERTY STRINGS "ZFP_ROUND_NEVER;ZFP_ROUND_FIRST;ZFP_ROUND_LAST") +option(ZFP_WITH_DAZ "Treat subnormals as zero to avoid overflow" OFF) + option(ZFP_WITH_CUDA "Enable CUDA parallel compression" OFF) option(ZFP_WITH_BIT_STREAM_STRIDED "Enable strided access for progressive zfp streams" OFF) @@ -212,6 +214,10 @@ if(ZFP_WITH_TIGHT_ERROR) list(APPEND zfp_private_defs ZFP_WITH_TIGHT_ERROR) endif() +if(ZFP_WITH_DAZ) + list(APPEND zfp_private_defs ZFP_WITH_DAZ) +endif() + if(ZFP_WITH_ALIGNED_ALLOC) list(APPEND zfp_compressed_array_defs ZFP_WITH_ALIGNED_ALLOC) endif() diff --git a/Config b/Config index c3af21469..a2d7aba5f 100644 --- a/Config +++ b/Config @@ -58,6 +58,9 @@ OMPFLAGS = -fopenmp # DEFS += -DZFP_ROUNDING_MODE=ZFP_ROUND_LAST # DEFS += -DZFP_WITH_TIGHT_ERROR +# treat subnormals as zero to avoid overflow +# DEFS += -DZFP_WITH_DAZ + # build targets --------------------------------------------------------------- # default targets @@ -124,6 +127,13 @@ ifdef ZFP_ROUNDING_MODE endif endif +# treat subnormals as zero to avoid overflow +ifdef ZFP_WITH_DAZ + ifneq ($(ZFP_WITH_DAZ),0) + FLAGS += -DZFP_WITH_DAZ + endif +endif + # chroma mode for ppm example ifdef PPM_CHROMA PPM_FLAGS += -DPPM_CHROMA=$(PPM_CHROMA) diff --git a/docs/source/installation.rst b/docs/source/installation.rst index a1d6b19cc..e7ae4c2d4 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -268,7 +268,11 @@ in the same manner that :ref:`build targets ` are specified, e.g., effective at reducing bias, but is invoked only with :ref:`fixed-precision ` and :ref:`fixed-accuracy ` compression modes. - Either of these rounding modes break the regression tests. + Both of these rounding modes break the regression tests since they alter + the compressed or decompressed representation, but they may be used with + libraries built with the default rounding mode, :code:`ZFP_ROUND_NEVER`, + and versions of |zfp| that do not support a rounding mode with no adverse + effects. Default: :code:`ZFP_ROUND_NEVER`. .. c:macro:: ZFP_WITH_TIGHT_ERROR @@ -283,6 +287,22 @@ in the same manner that :ref:`build targets ` are specified, e.g., to be :code:`ZFP_ROUND_FIRST` or :code:`ZFP_ROUND_LAST`. Default: undefined/off. +.. c:macro:: ZFP_WITH_DAZ + + When enabled, blocks consisting solely of subnormal floating-point numbers + (tiny numbers close to zero) are treated as blocks of all zeros + (DAZ = denormals-are-zero). The main purpose of this option is to avoid the + potential for floating-point overflow in the |zfp| implementation that may + occur in step 2 of the + :ref:`lossy compression algorithm ` when converting to + |zfp|'s block-floating-point representation (see + `Issue #119 `__). + Such overflow tends to be benign but loses all precision and usually + results in "random" subnormals upon decompression. When enabled, compressed + streams may differ slightly but are decompressed correctly by libraries + built without this option. This option may break some regression tests. + Default: undefined/off. + .. c:macro:: ZFP_WITH_ALIGNED_ALLOC Use aligned memory allocation in an attempt to align compressed blocks diff --git a/src/cuda_zfp/encode.cuh b/src/cuda_zfp/encode.cuh index 8c4ad3e69..995c9c321 100644 --- a/src/cuda_zfp/encode.cuh +++ b/src/cuda_zfp/encode.cuh @@ -42,13 +42,19 @@ __device__ static int exponent(Scalar x) { + int e = -get_ebias(); +#ifdef ZFP_WITH_DAZ + // treat subnormals as zero; resolves issue #119 by avoiding overflow + if (x >= get_scalar_min()) + frexp(x, &e); +#else if (x > 0) { - int e; frexp(x, &e); - // clamp exponent in case x is denormalized - return max(e, 1 - get_ebias()); + // clamp exponent in case x is subnormal; may still result in overflow + e = max(e, 1 - get_ebias()); } - return -get_ebias(); +#endif + return e; } template diff --git a/src/cuda_zfp/type_info.cuh b/src/cuda_zfp/type_info.cuh index 1253701ad..25d76922c 100644 --- a/src/cuda_zfp/type_info.cuh +++ b/src/cuda_zfp/type_info.cuh @@ -1,6 +1,8 @@ #ifndef cuZFP_TYPE_INFO #define cuZFP_TYPE_INFO +#include + namespace cuZFP { template inline __host__ __device__ int get_ebias(); @@ -27,15 +29,19 @@ template<> inline __host__ __device__ int get_min_exp() { return -1074; } template<> inline __host__ __device__ int get_min_exp() { return 0; } template<> inline __host__ __device__ int get_min_exp() { return 0; } -template inline __host__ __device__ int scalar_sizeof(); +template inline __host__ __device__ T get_scalar_min(); +template<> inline __host__ __device__ float get_scalar_min() { return FLT_MIN; } +template<> inline __host__ __device__ double get_scalar_min() { return DBL_MIN; } +template<> inline __host__ __device__ long long int get_scalar_min() { return 0; } +template<> inline __host__ __device__ int get_scalar_min() { return 0; } +template inline __host__ __device__ int scalar_sizeof(); template<> inline __host__ __device__ int scalar_sizeof() { return 8; } template<> inline __host__ __device__ int scalar_sizeof() { return 8; } template<> inline __host__ __device__ int scalar_sizeof() { return 4; } template<> inline __host__ __device__ int scalar_sizeof() { return 4; } template inline __host__ __device__ T get_nbmask(); - template<> inline __host__ __device__ unsigned int get_nbmask() { return 0xaaaaaaaau; } template<> inline __host__ __device__ unsigned long long int get_nbmask() { return 0xaaaaaaaaaaaaaaaaull; } @@ -80,6 +86,7 @@ template<> inline __host__ __device__ bool is_int() return true; } +#if 0 template struct block_traits; template<> struct block_traits<1> @@ -91,7 +98,8 @@ template<> struct block_traits<2> { typedef unsigned short PlaneType; }; - +#endif } // namespace cuZFP + #endif diff --git a/src/template/encodef.c b/src/template/encodef.c index 9bf558e74..86da10c75 100644 --- a/src/template/encodef.c +++ b/src/template/encodef.c @@ -1,3 +1,4 @@ +#include #include #include @@ -9,13 +10,20 @@ static uint _t2(rev_encode_block, Scalar, DIMS)(zfp_stream* zfp, const Scalar* f static int _t1(exponent, Scalar)(Scalar x) { + /* use e = -EBIAS when x = 0 */ + int e = -EBIAS; +#ifdef ZFP_WITH_DAZ + /* treat subnormals as zero; resolves issue #119 by avoiding overflow */ + if (x >= SCALAR_MIN) + FREXP(x, &e); +#else if (x > 0) { - int e; FREXP(x, &e); - /* clamp exponent in case x is denormal */ - return MAX(e, 1 - EBIAS); + /* clamp exponent in case x is subnormal; may still result in overflow */ + e = MAX(e, 1 - EBIAS); } - return -EBIAS; +#endif + return e; } /* compute maximum floating-point exponent in block of n values */ diff --git a/src/traitsd.h b/src/traitsd.h index 4dfb271b8..05110d556 100644 --- a/src/traitsd.h +++ b/src/traitsd.h @@ -7,6 +7,7 @@ #define PBITS 6 /* number of bits needed to encode precision */ #define NBMASK UINT64C(0xaaaaaaaaaaaaaaaa) /* negabinary mask */ #define TCMASK UINT64C(0x7fffffffffffffff) /* two's complement mask */ +#define SCALAR_MIN DBL_MIN /* smallest positive normal number */ #define FABS(x) fabs(x) #define FREXP(x, e) frexp(x, e) diff --git a/src/traitsf.h b/src/traitsf.h index 408337e1e..7e85299d0 100644 --- a/src/traitsf.h +++ b/src/traitsf.h @@ -7,6 +7,7 @@ #define PBITS 5 /* number of bits needed to encode precision */ #define NBMASK 0xaaaaaaaau /* negabinary mask */ #define TCMASK 0x7fffffffu /* two's complement mask */ +#define SCALAR_MIN FLT_MIN /* smallest positive normal number */ #if __STDC_VERSION__ >= 199901L #define FABS(x) fabsf(x) diff --git a/tests/testzfp.cpp b/tests/testzfp.cpp index b9fd565c8..9c0bdb7f4 100644 --- a/tests/testzfp.cpp +++ b/tests/testzfp.cpp @@ -1023,6 +1023,7 @@ inline uint common_tests() { uint failures = 0; + uint warnings = 0; // test library version if (zfp_codec_version != ZFP_CODEC || zfp_library_version != ZFP_VERSION) { std::cout << "library header and binary version mismatch" << std::endl; @@ -1065,6 +1066,21 @@ common_tests() std::cout << "regression testing requires BIT_STREAM_WORD_TYPE=uint64" << std::endl; failures++; } + // warn if non-default compiler options are used +#if ZFP_ROUNDING_MODE != 0 + std::cout << "warning: selected ZFP_ROUNDING_MODE may break tests" << std::endl; + warnings++; +#ifdef ZFP_WITH_TIGHT_ERROR + std::cout << "warning: ZFP_WITH_TIGHT_ERROR option may break tests" << std::endl; + warnings++; +#endif +#endif +#ifdef ZFP_WITH_DAZ + std::cout << "warning: ZFP_WITH_DAZ option may break tests" << std::endl; + warnings++; +#endif + if (failures || warnings) + std::cout << std::endl; return failures; }