-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix the nx_char type for numpy to and . #554
base: master
Are you sure you want to change the base?
Changes from 4 commits
0f46c34
dd8beb4
351f377
00b78aa
3ad40a0
6cf6f6d
539bdea
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -591,71 +591,93 @@ def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]: | |||||||
np.uint16, | ||||||||
np.uint32, | ||||||||
np.uint64, | ||||||||
np.uint, | ||||||||
np.unsignedinteger, | ||||||||
np.signedinteger, | ||||||||
) | ||||||||
np_float = (np.float16, np.float32, np.float64, np.floating) | ||||||||
np_bytes = (np.bytes_, np.byte, np.ubyte) | ||||||||
np_char = (np.str_, np.char.chararray, *np_bytes) | ||||||||
# Not to be confused with `np.byte` and `np.ubyte`, these store | ||||||||
# and integer of `8bit` and `unsigned 8bit` respectively. | ||||||||
np_bytes = (np.bytes_,) | ||||||||
np_char = (np.str_, np.bytes_) # Only numpy Unicode string and Byte string | ||||||||
np_bool = (np.bool_,) | ||||||||
np_complex = (np.complex64, np.complex128, np.cdouble, np.csingle) | ||||||||
NEXUS_TO_PYTHON_DATA_TYPES = { | ||||||||
"ISO8601": (str,), | ||||||||
"NX_BINARY": ( | ||||||||
bytes, | ||||||||
bytearray, | ||||||||
np.ndarray, | ||||||||
*np_bytes, | ||||||||
), | ||||||||
"NX_BOOLEAN": (bool, np.ndarray, *np_bool), | ||||||||
"NX_CHAR": (str, np.ndarray, *np_char), | ||||||||
"NX_BOOLEAN": (bool, *np_bool), | ||||||||
"NX_CHAR": (str, *np_char), | ||||||||
"NX_DATE_TIME": (str,), | ||||||||
"NX_FLOAT": (float, np.ndarray, *np_float), | ||||||||
"NX_INT": (int, np.ndarray, *np_int), | ||||||||
"NX_UINT": (np.ndarray, np.unsignedinteger), | ||||||||
"NX_FLOAT": (float, *np_float), | ||||||||
"NX_INT": (int, *np_int), | ||||||||
"NX_UINT": ( | ||||||||
np.unsignedinteger, | ||||||||
np.uint, | ||||||||
), | ||||||||
"NX_NUMBER": ( | ||||||||
int, | ||||||||
float, | ||||||||
np.ndarray, | ||||||||
*np_int, | ||||||||
*np_float, | ||||||||
dict, | ||||||||
), | ||||||||
"NX_POSINT": ( | ||||||||
int, | ||||||||
np.ndarray, | ||||||||
np.signedinteger, | ||||||||
), # > 0 is checked in is_valid_data_field() | ||||||||
"NX_COMPLEX": (complex, np.ndarray, *np_complex), | ||||||||
"NXDL_TYPE_UNAVAILABLE": (str,), # Defaults to a string if a type is not provided. | ||||||||
"NX_COMPLEX": (complex, *np_complex), | ||||||||
"NXDL_TYPE_UNAVAILABLE": ( | ||||||||
str, | ||||||||
*np_char, | ||||||||
), # Defaults to a string if a type is not provided. | ||||||||
"NX_CHAR_OR_NUMBER": ( | ||||||||
str, | ||||||||
int, | ||||||||
float, | ||||||||
np.ndarray, | ||||||||
*np_char, | ||||||||
*np_int, | ||||||||
*np_float, | ||||||||
dict, | ||||||||
), | ||||||||
} | ||||||||
|
||||||||
|
||||||||
def check_all_children_for_callable(objects: list, check: Callable, *args) -> bool: | ||||||||
"""Checks whether all objects in list are validated by given callable.""" | ||||||||
for obj in objects: | ||||||||
if not check(obj, *args): | ||||||||
return False | ||||||||
def check_all_children_for_callable( | ||||||||
objects: Union[list, np.ndarray], | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should extend this to all iterable data types (e.g. tuples, ...) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All the datastructure are not officially mentioned in |
||||||||
checker: Optional[Callable] = None, | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suggest to rename this to |
||||||||
accepted_types: Optional[tuple] = None, | ||||||||
) -> bool: | ||||||||
"""Checks whether all objects in list or numpy array are validated | ||||||||
by given callable and types. | ||||||||
""" | ||||||||
|
||||||||
return True | ||||||||
if checker is not None: | ||||||||
for obj in objects: | ||||||||
args = (obj, accepted_types) if accepted_types is not None else (obj,) | ||||||||
if not checker(*args): | ||||||||
return False | ||||||||
return True | ||||||||
|
||||||||
# default checker | ||||||||
tmp_arr = None | ||||||||
if isinstance(objects, list): | ||||||||
# Handles list and list of list | ||||||||
tmp_arr = np.array(objects) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will this work of the dtypes within a list are not the same? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it will work. Because, numpy will check if all the elments have the same size of bit atleast converable to a common datatype, Otherwise it fails. If there is heterogeneous datatype (like string, nemeric) numpy will choose unicode There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ahh, it will not work in specificatino of |
||||||||
elif isinstance(objects, np.ndarray): | ||||||||
tmp_arr = objects | ||||||||
if tmp_arr is not None: | ||||||||
return any([np.issubdtype(tmp_arr.dtype, type_) for type_ in accepted_types]) | ||||||||
return False | ||||||||
|
||||||||
|
||||||||
def is_valid_data_type(value, accepted_types): | ||||||||
"""Checks whether the given value or its children are of an accepted type.""" | ||||||||
if not isinstance(value, list): | ||||||||
|
||||||||
if not isinstance(value, (list, np.ndarray)): | ||||||||
return isinstance(value, accepted_types) | ||||||||
|
||||||||
return check_all_children_for_callable(value, isinstance, accepted_types) | ||||||||
return check_all_children_for_callable(objects=value, accepted_types=accepted_types) | ||||||||
|
||||||||
|
||||||||
def is_positive_int(value): | ||||||||
|
@@ -665,7 +687,7 @@ def is_greater_than(num): | |||||||
return num.flat[0] > 0 if isinstance(num, np.ndarray) else num > 0 | ||||||||
|
||||||||
if isinstance(value, list): | ||||||||
return check_all_children_for_callable(value, is_greater_than) | ||||||||
return check_all_children_for_callable(objects=value, checker=is_greater_than) | ||||||||
|
||||||||
return value.flat[0] > 0 if isinstance(value, np.ndarray) else value > 0 | ||||||||
|
||||||||
|
@@ -685,28 +707,31 @@ def convert_str_to_bool_safe(value): | |||||||
def is_valid_data_field(value, nxdl_type, path): | ||||||||
# todo: Check this funciton and wtire test for it. It seems the funciton is not | ||||||||
# working as expected. | ||||||||
"""Checks whether a given value is valid according to what is defined in the NXDL. | ||||||||
"""Checks whether a given value is valid according to the type defined in the NXDL. | ||||||||
|
||||||||
This function will also try to convert typical types, for example int to float, | ||||||||
and return the successful conversion. | ||||||||
This function also converts bool value comes in str format. In case, it fails to | ||||||||
convert, it raises an Exception. | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
It doesn't really raise an Exception, but just a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do not see the InvalidDateTime warning anywhere. |
||||||||
|
||||||||
If it fails to convert, it raises an Exception. | ||||||||
|
||||||||
Returns two values: first, boolean (True if the the value corresponds to nxdl_type, | ||||||||
False otherwise) and second, result of attempted conversion or the original value | ||||||||
(if conversion is not needed or impossible) | ||||||||
Returns two values: | ||||||||
boolean (True if the the value corresponds to nxdl_type, False otherwise) | ||||||||
converted_value bool value. | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add typing annotation. The calling function expects a single bool. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The return handling is correct, nevertheless I suggest to add typing everywhere. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doc is updated. You might be talking about the |
||||||||
""" | ||||||||
accepted_types = NEXUS_TO_PYTHON_DATA_TYPES[nxdl_type] | ||||||||
output_value = value | ||||||||
|
||||||||
accepted_types = NEXUS_TO_PYTHON_DATA_TYPES[nxdl_type] | ||||||||
# Do not count the dict as it represents a link value | ||||||||
if not isinstance(value, dict) and not is_valid_data_type(value, accepted_types): | ||||||||
try: | ||||||||
if accepted_types[0] is bool and isinstance(value, str): | ||||||||
value = convert_str_to_bool_safe(value) | ||||||||
if value is None: | ||||||||
raise ValueError | ||||||||
output_value = accepted_types[0](value) | ||||||||
except ValueError: | ||||||||
return True, value | ||||||||
|
||||||||
collector.collect_and_log( | ||||||||
path, ValidationProblem.InvalidType, accepted_types, nxdl_type | ||||||||
) | ||||||||
return False, value | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This return value False here is interpreted as "undocumented field" in the calling function. So we get an additional wrong warning if the dtype does not match. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see that it is just for internal logic (please see line 750-766 in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand what you are trying to say here. My point is that is_documented returns False if the datatype does not match, which I would say is wrong, because it is documented, but just has the wrong data type. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This behavior was btw. introduced in #522, I believe. Maybe @GinzburgLev or @lukaspie can comment? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wanted to explain the code a bit, to clarify that in that code, no different type validation errors are collected, as you are assuming |
||||||||
except (ValueError, TypeError): | ||||||||
collector.collect_and_log( | ||||||||
path, ValidationProblem.InvalidType, accepted_types, nxdl_type | ||||||||
) | ||||||||
|
@@ -726,7 +751,7 @@ def is_valid_data_field(value, nxdl_type, path): | |||||||
collector.collect_and_log(path, ValidationProblem.InvalidDatetime, value) | ||||||||
return False, value | ||||||||
|
||||||||
return True, output_value | ||||||||
return True, value | ||||||||
|
||||||||
|
||||||||
@lru_cache(maxsize=None) | ||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adjust all occurences of is_valid_data_field (e.g. line 552) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @RubelMozumder @sanbrock Returning False in L552 if the data type of a field does not fit effectively makes this field "undocumented". Is this intentional? The produced warnings at least are rather contradictive:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @sanbrock @sherjeelshabih Also, apparently during this second round of checking for undocumented fields, that are not being found before, no checks for enumerations are being done. I don't really understand the logic of this checking completely, but I suggest a checking for enums should happen here as well. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What exactly is not to be confused? Maybe write "np.xxx is not to be confused..."