diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 9c039096d3..e375f0eba6 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -160,3 +160,4 @@ their individual contributions. * `Tariq Khokhar `_ (`tariq@khokhar.net `_) * `Will Hall `_ (`wrsh07@gmail.com `_) * `Will Thompson `_ (`will@willthompson.co.uk `_) +* `Zac Hatfield-Dodds `_ (`zac.hatfield.dodds@gmail.com `_) diff --git a/docs/conf.py b/docs/conf.py index 84ce410168..c7990bea2f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -64,6 +64,8 @@ 'python': ('http://docs.python.org/', None), } +autodoc_mock_imports = ['numpy'] + # -- Options for HTML output ---------------------------------------------- diff --git a/docs/extras.rst b/docs/extras.rst index 62778a27bb..3cd18a0c8d 100644 --- a/docs/extras.rst +++ b/docs/extras.rst @@ -188,56 +188,9 @@ It's large enough that it is :doc:`documented elsewhere `. hypothesis[numpy] ------------------ -hypothesis.extra.numpy adds support for testing -`NumPy `_-based implementations with Hypothesis by -providing an ``arrays`` function. +hypothesis.extra.numpy adds support for testing your Numpy code with Hypothesis. -It lives in the ``hypothesis.extra.numpy`` package. +This includes generating arrays, array shapes, and both scalar or compound dtypes. -.. method:: arrays(dtype, shape, elements=None) +Like the Django extra, :doc:`Numpy has it's own page `. - Arrays of specified `dtype` and `shape` are generated for example - like this: - - .. code-block:: pycon - - >>> import numpy as np - >>> arrays(np.int8, (2, 3)).example() - array([[-8, 6, 3], - [-6, 4, 6]], dtype=int8) - - - However, to obtain more fine grained control over the elements, use - the `elements` keyword (see also :doc:`What you can generate and how `): - - .. code-block:: pycon - - >>> import numpy as np - >>> from hypothesis.strategies import floats - >>> arrays(np.float, 3, elements=floats(min_value=0, max_value=1)).example() - array([ 0.88974794, 0.77387938, 0.1977879 ]) - - By combining different strategies, the shape of the array can be modified as well: - - .. code-block:: pycon - - >>> import numpy as np - >>> from hypothesis.strategies import integers, floats - >>> - >>> def rnd_len_arrays(dtype, min_len=0, max_len=3, elements=None): - ... lengths = integers(min_value=min_len, max_value=max_len) - ... return lengths.flatmap(lambda n: arrays(dtype, n, elements=elements)) - ... - >>> - >>> rla = rnd_len_arrays(np.int8) - >>> rla.example() - array([], dtype=int8) - >>> rla.example() - array([-2], dtype=int8) - >>> rla.example() - array([ 7, -6, -2], dtype=int8) - -**Note**: To generate large arrays/matrices, or even medium-sized matrices that -have 3 or more dimensions, it's necessary to increase the ``buffer_size`` -setting to be greater than its default of ``8192`` (see the -:doc:`Settings documentation ` for details on how to do this). diff --git a/docs/index.rst b/docs/index.rst index aee3dd57fa..58c5c30002 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,7 +8,7 @@ finding edge cases in your code you wouldn't have thought to look for. It is stable, powerful and easy to add to any existing test suite. It works by letting you write tests that assert that something should be true -for every case, not just the ones you happen to think of. +for every case, not just the ones you happen to think of. Think of a normal unit test as being something like the following: @@ -57,11 +57,12 @@ check out some of the :hidden: quickstart - django details settings data extras + django + numpy healthchecks database stateful diff --git a/docs/numpy.rst b/docs/numpy.rst new file mode 100644 index 0000000000..23c1a3ad82 --- /dev/null +++ b/docs/numpy.rst @@ -0,0 +1,18 @@ +.. _hypothesis-numpy: + +================================= +Scientific Hypothesis (for NumPy) +================================= + +Hypothesis offers a number of strategies for `NumPy `_ testing, +available in the :mod:`hypothesis[numpy]` :doc:`extra `. +It lives in the ``hypothesis.extra.numpy`` package. + +The centerpiece is the ``arrays`` strategy, which generates arrays with +any dtype, shape, and contents you can specify or give a strategy for. +To make this as useful as possible, strategies are provided to generate array +shapes and generate all kinds of fixed-size or compound dtypes. + + +.. automodule:: hypothesis.extra.numpy + :members: arrays, array_shapes, scalar_dtypes, boolean_dtypes, unsigned_integer_dtypes, integer_dtypes, floating_dtypes, complex_number_dtypes, datetime64_dtypes, timedelta64_dtypes, byte_string_dtypes, unicode_string_dtypes, array_dtypes, nested_dtypes diff --git a/src/hypothesis/errors.py b/src/hypothesis/errors.py index 0a403b152a..7c2952eacf 100644 --- a/src/hypothesis/errors.py +++ b/src/hypothesis/errors.py @@ -115,7 +115,7 @@ class Flaky(HypothesisException): depends sensitively on where it's been called from. 3. The function is timing sensitive and can fail or pass depending on how long it takes. Try breaking it up into smaller functions which - dont' do that and testing those instead. + don't do that and testing those instead. """ diff --git a/src/hypothesis/extra/numpy.py b/src/hypothesis/extra/numpy.py index 97cb158772..44868d78ba 100644 --- a/src/hypothesis/extra/numpy.py +++ b/src/hypothesis/extra/numpy.py @@ -17,24 +17,39 @@ from __future__ import division, print_function, absolute_import -import operator +import functools import numpy as np import hypothesis.strategies as st +from hypothesis import settings +from hypothesis.errors import InvalidArgument from hypothesis.searchstrategy import SearchStrategy -from hypothesis.internal.compat import hrange, reduce, text_type, \ - binary_type +from hypothesis.internal.compat import hrange, text_type, binary_type + +TIME_RESOLUTIONS = tuple('Y M D h m s ms us ns ps fs as'.split()) def from_dtype(dtype): + # Compound datatypes, eg 'f4,f4,f4' + if dtype.names is not None: + # mapping np.void.type over a strategy is nonsense, so return now. + return st.tuples( + *[from_dtype(dtype.fields[name][0]) for name in dtype.names]) + + # Subarray datatypes, eg '(2, 3)i4' + if dtype.subdtype is not None: + subtype, shape = dtype.subdtype + return arrays(subtype, shape) + + # Scalar datatypes if dtype.kind == u'b': result = st.booleans() elif dtype.kind == u'f': result = st.floats() elif dtype.kind == u'c': result = st.complex_numbers() - elif dtype.kind in (u'S', u'a', u'V'): + elif dtype.kind in (u'S', u'a'): result = st.binary() elif dtype.kind == u'u': result = st.integers( @@ -44,24 +59,54 @@ def from_dtype(dtype): result = st.integers(min_value=min_integer, max_value=-min_integer - 1) elif dtype.kind == u'U': result = st.text() + elif dtype.kind in (u'm', u'M'): + res = st.just(dtype.str[-2]) if '[' in dtype.str else \ + st.sampled_from(TIME_RESOLUTIONS) + result = st.builds(dtype.type, st.integers(1 - 2**63, 2**63 - 1), res) else: - raise NotImplementedError( - u'No strategy implementation for %r' % (dtype,) - ) + raise InvalidArgument(u'No strategy inference for {}'.format(dtype)) return result.map(dtype.type) +def check_argument(condition, fail_message, *f_args, **f_kwargs): + if not condition: + raise InvalidArgument(fail_message.format(*f_args, **f_kwargs)) + + +def order_check(name, floor, small, large): + if floor is None: + floor = -np.inf + if floor > small > large: + check_argument(u'min_{name} was {}, must be at least {} and not more ' + u'than max_{name} (was {})', small, floor, large, + name=name, condition=False) + + class ArrayStrategy(SearchStrategy): def __init__(self, element_strategy, shape, dtype): self.shape = tuple(shape) - assert shape - self.array_size = reduce(operator.mul, shape) + check_argument(shape, + u'Array shape must have at least one dimension, ' + u'provided shape was {}', shape) + check_argument(all(isinstance(s, int) for s in shape), + u'Array shape must be integer in each dimension, ' + u'provided shape was {}', shape) + self.array_size = np.prod(shape) + buff_size = settings.default.buffer_size + check_argument( + self.array_size * dtype.itemsize <= buff_size, + u'Insufficient bytes of entropy to draw requested array. ' + u'shape={}, dtype={}. Can you reduce the size or dimensions ' + u'of the array? What about using a smaller dtype? If slow ' + u'test runs and minimisation are acceptable, you could ' + u'increase settings().buffer_size from {} to at least {}.', + shape, dtype, buff_size, self.array_size * buff_size) self.dtype = dtype self.element_strategy = element_strategy def do_draw(self, data): - result = np.zeros(dtype=self.dtype, shape=self.array_size) + result = np.empty(dtype=self.dtype, shape=self.array_size) for i in hrange(self.array_size): result[i] = self.element_strategy.do_draw(data) return result.reshape(self.shape) @@ -73,20 +118,244 @@ def is_scalar(spec): ) -def arrays(dtype, shape, elements=None): - if not isinstance(dtype, np.dtype): - dtype = np.dtype(dtype) +@st.composite +def arrays(draw, dtype, shape, elements=None): + """`dtype` may be any valid input to ``np.dtype`` (this includes + ``np.dtype`` objects), or a strategy that generates such values. `shape` + may be an integer >= 0, a tuple of length >= of such integers, or a + strategy that generates such values. + + Arrays of specified `dtype` and `shape` are generated for example + like this: + + .. code-block:: pycon + + >>> import numpy as np + >>> arrays(np.int8, (2, 3)).example() + array([[-8, 6, 3], + [-6, 4, 6]], dtype=int8) + + If elements is None, Hypothesis infers a strategy based on the dtype, + which may give any legal value (including eg ``NaN`` for floats). If you + have more specific requirements, you can supply your own elements strategy + - see :doc:`What you can generate and how `. + + .. code-block:: pycon + + >>> import numpy as np + >>> from hypothesis.strategies import floats + >>> arrays(np.float, 3, elements=floats(0, 1)).example() + array([ 0.88974794, 0.77387938, 0.1977879 ]) + + .. warning:: + Hypothesis works really well with NumPy, but is designed for small + data. The default entropy is 8192 bytes - it is impossible to draw + an example where ``example_array.nbytes`` is greater than + ``settings.default.buffer_size``. + See the :doc:`settings documentation ` if you need to + increase this value, but be aware that Hypothesis may take much + longer to produce a minimal failure case. + + """ + if isinstance(dtype, SearchStrategy): + dtype = draw(dtype) + dtype = np.dtype(dtype) if elements is None: elements = from_dtype(dtype) + if isinstance(shape, SearchStrategy): + shape = draw(shape) if isinstance(shape, int): shape = (shape,) shape = tuple(shape) if not shape: if dtype.kind != u'O': - return elements - else: - return ArrayStrategy( - shape=shape, - dtype=dtype, - element_strategy=elements - ) + return draw(elements) + return draw(ArrayStrategy(elements, shape, dtype)) + + +@st.defines_strategy +def array_shapes(min_dims=1, max_dims=3, min_side=1, max_side=10): + """Return a strategy for array shapes (tuples of int >= 1).""" + order_check('dims', 1, min_dims, max_dims) + order_check('side', 1, min_side, max_side) + return st.lists(st.integers(min_side, max_side), + min_size=min_dims, max_size=max_dims).map(tuple) + + +@st.defines_strategy +def scalar_dtypes(): + """Return a strategy that can return any non-flexible scalar dtype.""" + return st.one_of(boolean_dtypes(), + integer_dtypes(), unsigned_integer_dtypes(), + floating_dtypes(), complex_number_dtypes(), + datetime64_dtypes(), timedelta64_dtypes()) + + +def defines_dtype_strategy(strat): + @st.defines_strategy + @functools.wraps(strat) + def inner(*args, **kwargs): + return strat(*args, **kwargs).map(np.dtype) + return inner + + +@defines_dtype_strategy +def boolean_dtypes(): + return st.just('?') + + +def dtype_factory(kind, sizes, valid_sizes, endianness): + # Utility function, shared logic for most integer and string types + valid_endian = ('?', '<', '=', '>') + check_argument(endianness in valid_endian, + u'Unknown endianness: was {}, must be in {}', valid_endian) + if valid_sizes is not None: + if isinstance(sizes, int): + sizes = (sizes,) + check_argument(sizes, 'Dtype must have at least one possible size.') + check_argument(all(s in valid_sizes for s in sizes), + u'Invalid sizes: was {} must be an item or sequence ' + u'in {}', sizes, valid_sizes) + if all(isinstance(s, int) for s in sizes): + sizes = sorted(set(s // 8 for s in sizes)) + strat = st.sampled_from(sizes) + if '{}' not in kind: + kind += '{}' + if endianness == '?': + return strat.map(('<' + kind).format) | strat.map(('>' + kind).format) + return strat.map((endianness + kind).format) + + +@defines_dtype_strategy +def unsigned_integer_dtypes(endianness='?', sizes=(8, 16, 32, 64)): + """Return a strategy for unsigned integer dtypes. + + endianness may be ``<`` for little-endian, ``>`` for big-endian, + ``=`` for native byte order, or ``?`` to allow either byte order. + This argument only applies to dtypes of more than one byte. + + sizes must be a collection of integer sizes in bits. The default + (8, 16, 32, 64) covers the full range of sizes. + + """ + return dtype_factory('u', sizes, (8, 16, 32, 64), endianness) + + +@defines_dtype_strategy +def integer_dtypes(endianness='?', sizes=(8, 16, 32, 64)): + """Return a strategy for signed integer dtypes. + + endianness and sizes are treated as for `unsigned_integer_dtypes`. + + """ + return dtype_factory('i', sizes, (8, 16, 32, 64), endianness) + + +@defines_dtype_strategy +def floating_dtypes(endianness='?', sizes=(16, 32, 64)): + """Return a strategy for floating-point dtypes. + + sizes is the size in bits of floating-point number. Some machines support + 96- or 128-bit floats, but these are not generated by default. + + Larger floats (96 and 128 bit real parts) are not supported on all + platforms and therefore disabled by default. To generate these dtypes, + include these values in the sizes argument. + + """ + return dtype_factory('f', sizes, (16, 32, 64, 96, 128), endianness) + + +@defines_dtype_strategy +def complex_number_dtypes(endianness='?', sizes=(64, 128)): + """Return a strategy complex-number dtypes. + + sizes is the total size in bits of a complex number, which consists + of two floats. Complex halfs (a 16-bit real part) are not supported + by numpy and will not be generated by this strategy. + + """ + return dtype_factory('c', sizes, (64, 128, 192, 256), endianness) + + +def validate_time_slice(max_period, min_period): + check_argument(max_period in TIME_RESOLUTIONS, + u'max_period {} must be a valid resolution in {}', + max_period, TIME_RESOLUTIONS) + check_argument(min_period in TIME_RESOLUTIONS, + u'min_period {} must be a valid resolution in {}', + min_period, TIME_RESOLUTIONS) + start = TIME_RESOLUTIONS.index(max_period) + end = TIME_RESOLUTIONS.index(min_period) + 1 + check_argument(start < end, + u'max_period {} must be earlier in sequence {} than ' + u'min_period {}', max_period, TIME_RESOLUTIONS, min_period) + return TIME_RESOLUTIONS[start:end] + + +@defines_dtype_strategy +def datetime64_dtypes(max_period='Y', min_period='ns', endianness='?'): + """Return a strategy for datetime64 dtypes, with various precisions from + year to attosecond.""" + return dtype_factory('datetime64[{}]', + validate_time_slice(max_period, min_period), + TIME_RESOLUTIONS, endianness) + + +@defines_dtype_strategy +def timedelta64_dtypes(max_period='Y', min_period='ns', endianness='?'): + """Return a strategy for timedelta64 dtypes, with various precisions from + year to attosecond.""" + return dtype_factory('timedelta64[{}]', + validate_time_slice(max_period, min_period), + TIME_RESOLUTIONS, endianness) + + +@defines_dtype_strategy +def byte_string_dtypes(endianness='?', min_len=0, max_len=16): + """Return a strategy for generating bytestring dtypes, of various lengths + and byteorder.""" + order_check('len', 0, min_len, max_len) + return dtype_factory('S', list(range(min_len, max_len + 1)), + None, endianness) + + +@defines_dtype_strategy +def unicode_string_dtypes(endianness='?', min_len=0, max_len=16): + """Return a strategy for generating unicode string dtypes, of various + lengths and byteorder.""" + order_check('len', 0, min_len, max_len) + return dtype_factory('u', list(range(min_len, max_len + 1)), + None, endianness) + + +@defines_dtype_strategy +def array_dtypes(subtype_strategy=scalar_dtypes(), + min_size=1, max_size=5, allow_subarrays=False): + """Return a strategy for generating array (compound) dtypes, with members + drawn from the given subtype strategy.""" + order_check('size', 0, min_size, max_size) + native_strings = st.text if text_type is str else st.binary + elements = st.tuples(native_strings(), subtype_strategy) + if allow_subarrays: + elements |= st.tuples(native_strings(), subtype_strategy, + array_shapes(max_dims=2, max_side=2)) + return st.lists(elements=elements, min_size=min_size, max_size=max_size, + unique_by=lambda d: d[0]) + + +@st.defines_strategy +def nested_dtypes(subtype_strategy=scalar_dtypes(), + max_leaves=10, max_itemsize=None): + """Return the most-general dtype strategy. + + Elements drawn from this strategy may be simple (from the + subtype_strategy), or several such values drawn from `array_dtypes` + with ``allow_subarrays=True``. Subdtypes in an array dtype may be + nested to any depth, subject to the max_leaves argument. + + """ + return st.recursive(subtype_strategy, + lambda x: array_dtypes(x, allow_subarrays=True), + max_leaves).filter( + lambda d: max_itemsize is None or d.itemsize <= max_itemsize) diff --git a/tests/numpy/test_gen_data.py b/tests/numpy/test_gen_data.py index e61af50ab0..ab265c5c0a 100644 --- a/tests/numpy/test_gen_data.py +++ b/tests/numpy/test_gen_data.py @@ -22,9 +22,12 @@ from flaky import flaky import hypothesis.strategies as st -from hypothesis import find, given, settings -from hypothesis.extra.numpy import arrays, from_dtype +from hypothesis import given, settings +from hypothesis.extra.numpy import arrays, from_dtype, array_shapes, \ + nested_dtypes, scalar_dtypes from hypothesis.strategytests import strategy_test_suite +from hypothesis.internal.debug import minimal +from hypothesis.searchstrategy import SearchStrategy from hypothesis.internal.compat import text_type, binary_type TestFloats = strategy_test_suite(arrays(float, ())) @@ -36,6 +39,7 @@ u'int8', u'int32', u'int64', u'float', u'float32', u'float64', complex, + u'datetime64', u'timedelta64', bool, text_type, binary_type ])) @@ -65,20 +69,16 @@ def test_assert_fits_in_machine_size(x): def test_generates_and_minimizes(): - x = find(arrays(float, (2, 2)), lambda t: True) - assert (x == np.zeros(shape=(2, 2), dtype=float)).all() + assert (minimal(arrays(float, (2, 2))) == np.zeros(shape=(2, 2))).all() def test_can_minimize_large_arrays(): - x = find(arrays(u'uint32', 500), lambda t: t.any()) - assert x.sum() == 1 + assert minimal(arrays(u'uint32', 500), np.any, timeout_after=60).sum() == 1 @flaky(max_runs=5, min_passes=1) def test_can_minimize_float_arrays(): - x = find( - arrays(float, 50), lambda t: t.sum() >= 1.0, - settings=settings(database=None)) + x = minimal(arrays(float, 50), lambda t: t.sum() >= 1.0) assert 1.0 <= x.sum() <= 1.1 @@ -90,14 +90,67 @@ class Foo(object): def test_can_create_arrays_of_composite_types(): - arr = find(arrays(object, 100, foos), lambda x: True) + arr = minimal(arrays(object, 100, foos)) for x in arr: assert isinstance(x, Foo) def test_can_create_arrays_of_tuples(): - arr = find( - arrays(object, 10, st.tuples(st.integers(), st.integers())), - lambda x: all(t[0] != t[1] for t in x)) - for a in arr: - assert a in ((1, 0), (0, 1)) + arr = minimal(arrays(object, 10, st.tuples(st.integers(), st.integers())), + lambda x: all(t0 != t1 for t0, t1 in x)) + assert all(a in ((1, 0), (0, 1)) for a in arr) + + +@given(array_shapes()) +def test_can_generate_array_shapes(shape): + assert isinstance(shape, tuple) + assert all(isinstance(i, int) for i in shape) + + +@given(st.integers(1, 10), st.integers(0, 9), st.integers(1), st.integers(0)) +def test_minimise_array_shapes(min_dims, dim_range, min_side, side_range): + smallest = minimal(array_shapes(min_dims, min_dims + dim_range, + min_side, min_side + side_range)) + assert len(smallest) == min_dims and all(k == min_side for k in smallest) + + +@given(scalar_dtypes()) +def test_can_generate_scalar_dtypes(dtype): + assert isinstance(dtype, np.dtype) + + +@given(nested_dtypes()) +def test_can_generate_compound_dtypes(dtype): + assert isinstance(dtype, np.dtype) + + +@given(nested_dtypes(max_itemsize=settings.default.buffer_size // 10), + st.data()) +def test_infer_strategy_from_dtype(dtype, data): + # Given a dtype + assert isinstance(dtype, np.dtype) + # We can infer a strategy + strat = from_dtype(dtype) + assert isinstance(strat, SearchStrategy) + # And use it to fill an array of that dtype + data.draw(arrays(dtype, 10, strat)) + + +@given(nested_dtypes()) +def test_np_dtype_is_idempotent(dtype): + assert dtype == np.dtype(dtype) + + +def test_minimise_scalar_dtypes(): + assert minimal(scalar_dtypes()) == np.dtype(u'bool') + + +def test_minimise_nested_types(): + assert minimal(nested_dtypes()) == np.dtype(u'bool') + + +def test_minimise_array_strategy(): + smallest = minimal(arrays( + nested_dtypes(max_itemsize=settings.default.buffer_size // 3**3), + array_shapes(max_dims=3, max_side=3))) + assert smallest.dtype == np.dtype(u'bool') and not smallest.any()