From 104cb9280778ea632a7a36bc3b7e04c2c58fb08d Mon Sep 17 00:00:00 2001 From: Ethan Blackwood Date: Wed, 26 Feb 2025 22:30:48 -0500 Subject: [PATCH 01/10] Update ground truths to have correctly reshaped masks --- tests/test_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_core.py b/tests/test_core.py index 878a43a..d6b391b 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -44,7 +44,7 @@ def _download_ground_truths(): print(f"Downloading ground truths") - url = f"https://zenodo.org/record/13732996/files/ground_truths.zip" + url = f"https://zenodo.org/record/14934373/files/ground_truths.zip" # basically from https://stackoverflow.com/questions/37573483/progress-bar-while-download-file-over-http-with-requests/37573701 response = requests.get(url, stream=True) From 860dd90688d3405c1a8f3fb9761d5914a1474bb6 Mon Sep 17 00:00:00 2001 From: Ethan Blackwood Date: Wed, 26 Feb 2025 23:27:37 -0500 Subject: [PATCH 02/10] Update ground truths for 'ixs' tests --- tests/test_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_core.py b/tests/test_core.py index d6b391b..5901ac0 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -44,7 +44,7 @@ def _download_ground_truths(): print(f"Downloading ground truths") - url = f"https://zenodo.org/record/14934373/files/ground_truths.zip" + url = f"https://zenodo.org/record/14934525/files/ground_truths.zip" # basically from https://stackoverflow.com/questions/37573483/progress-bar-while-download-file-over-http-with-requests/37573701 response = requests.get(url, stream=True) From 4b5573885cdc13339f5f6af6f013b0acd870bd8d Mon Sep 17 00:00:00 2001 From: Ethan Blackwood Date: Wed, 26 Feb 2025 23:44:54 -0500 Subject: [PATCH 03/10] Fix escaping in regexes to fix some warnings --- mesmerize_core/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mesmerize_core/utils.py b/mesmerize_core/utils.py index 02ca460..5287cdc 100644 --- a/mesmerize_core/utils.py +++ b/mesmerize_core/utils.py @@ -51,7 +51,7 @@ def fn(self, *args, **kwargs): def validate_path(path: Union[str, Path]): - if not regex.match("^[A-Za-z0-9@\/\\\:._-]*$", str(path)): + if not regex.match(r"^[A-Za-z0-9@/\\:._-]*$", str(path)): raise ValueError( "Paths must only contain alphanumeric characters, " "hyphens ( - ), underscores ( _ ) or periods ( . )" @@ -140,7 +140,7 @@ def make_runfile( else: with open(sh_file, "w") as f: for k, v in os.environ.items(): # copy the current environment - if regex.match("^.*[\(\)]", str(k)) or regex.match("^.*[\(\)]", str(v)): + if regex.match(r"^.*[()]", str(k)) or regex.match(r"^.*[()]", str(v)): continue with NamedTemporaryFile(suffix=".ps1", delete=False) as tmp: try: # windows powershell is stupid so make sure all the env var names work From 07bbc0755c80bd20b19d32a827159e6eb8bf3154 Mon Sep 17 00:00:00 2001 From: Ethan Blackwood Date: Wed, 26 Feb 2025 23:47:56 -0500 Subject: [PATCH 04/10] Run through black and update gitignore --- .gitignore | 3 + docs/source/conf.py | 45 +- mesmerize_core/algorithms/cnmf.py | 14 +- mesmerize_core/algorithms/cnmfe.py | 8 +- mesmerize_core/algorithms/mcorr.py | 15 +- mesmerize_core/arrays/__init__.py | 2 +- mesmerize_core/arrays/_base.py | 36 +- mesmerize_core/arrays/_cnmf.py | 63 +-- mesmerize_core/arrays/_video.py | 19 +- mesmerize_core/batch_utils.py | 31 +- .../caiman_extensions/_batch_exceptions.py | 9 +- mesmerize_core/caiman_extensions/_utils.py | 19 +- mesmerize_core/caiman_extensions/cache.py | 8 +- mesmerize_core/caiman_extensions/cnmf.py | 78 ++-- mesmerize_core/caiman_extensions/common.py | 169 +++++--- mesmerize_core/caiman_extensions/mcorr.py | 8 +- mesmerize_core/movie_readers.py | 9 +- mesmerize_core/utils.py | 19 +- setup.py | 27 +- tests/params.py | 2 +- tests/test_core.py | 405 ++++++++++-------- 21 files changed, 566 insertions(+), 423 deletions(-) diff --git a/.gitignore b/.gitignore index a75004f..fbe257b 100644 --- a/.gitignore +++ b/.gitignore @@ -131,6 +131,9 @@ dmypy.json # pycharm .idea/ +# vscode +.vscode/ + # test files tests/tmp tests/videos diff --git a/docs/source/conf.py b/docs/source/conf.py index 198783d..6b09588 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -19,9 +19,9 @@ # -- Project information ----------------------------------------------------- -project = 'mesmerize-core' -copyright = '2023, Kushal Kolar, Caitlin Lewis, Arjun Putcha' -author = 'Kushal Kolar, Caitlin Lewis, Arjun Putcha' +project = "mesmerize-core" +copyright = "2023, Kushal Kolar, Caitlin Lewis, Arjun Putcha" +author = "Kushal Kolar, Caitlin Lewis, Arjun Putcha" # The full version, including alpha/beta/rc tags release = mesmerize_core.__version__ @@ -35,7 +35,7 @@ autodoc_typehints = "description" # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -48,51 +48,56 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'pydata_sphinx_theme' +html_theme = "pydata_sphinx_theme" html_theme_options = {"page_sidebar_items": ["class_page_toc"]} autoclass_content = "both" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] -autodoc_member_order = 'bysource' +autodoc_member_order = "bysource" def _setup_navbar_side_toctree(app: Any): - def add_class_toctree_function(app: Any, pagename: Any, templatename: Any, context: Any, doctree: Any): + def add_class_toctree_function( + app: Any, pagename: Any, templatename: Any, context: Any, doctree: Any + ): def get_class_toc() -> Any: soup = BeautifulSoup(context["body"], "html.parser") - matches = soup.find_all('dl') + matches = soup.find_all("dl") if matches is None or len(matches) == 0: return "" items = [] - deeper_depth = matches[0].find('dt').get('id').count(".") + deeper_depth = matches[0].find("dt").get("id").count(".") for match in matches: - match_dt = match.find('dt') - if match_dt is not None and match_dt.get('id') is not None: - current_title = match_dt.get('id') - current_depth = match_dt.get('id').count(".") + match_dt = match.find("dt") + if match_dt is not None and match_dt.get("id") is not None: + current_title = match_dt.get("id") + current_depth = match_dt.get("id").count(".") current_link = match.find(class_="headerlink") if current_link is not None: if deeper_depth > current_depth: deeper_depth = current_depth if deeper_depth == current_depth: - items.append({ - "title": current_title.split('.')[-1], - "link": current_link["href"], - "attributes_and_methods": [] - }) + items.append( + { + "title": current_title.split(".")[-1], + "link": current_link["href"], + "attributes_and_methods": [], + } + ) if deeper_depth < current_depth: items[-1]["attributes_and_methods"].append( { - "title": current_title.split('.')[-1], + "title": current_title.split(".")[-1], "link": current_link["href"], } ) return items + context["get_class_toc"] = get_class_toc app.connect("html-page-context", add_class_toctree_function) diff --git a/mesmerize_core/algorithms/cnmf.py b/mesmerize_core/algorithms/cnmf.py index ca8f599..8df931a 100644 --- a/mesmerize_core/algorithms/cnmf.py +++ b/mesmerize_core/algorithms/cnmf.py @@ -1,4 +1,5 @@ """Performs CNMF in a separate process""" + import click import caiman as cm from caiman.source_extraction.cnmf import cnmf as cnmf @@ -115,12 +116,14 @@ def run_algo(batch_path, uuid, data_path: str = None): # save paths as relative path strings with forward slashes cnmf_hdf5_path = str(PurePosixPath(output_path.relative_to(output_dir.parent))) - cnmf_memmap_path = str(PurePosixPath(cnmf_memmap_path.relative_to(output_dir.parent))) + cnmf_memmap_path = str( + PurePosixPath(cnmf_memmap_path.relative_to(output_dir.parent)) + ) corr_img_path = str(PurePosixPath(corr_img_path.relative_to(output_dir.parent))) for proj_type in proj_paths.keys(): - d[f"{proj_type}-projection-path"] = str(PurePosixPath(proj_paths[proj_type].relative_to( - output_dir.parent - ))) + d[f"{proj_type}-projection-path"] = str( + PurePosixPath(proj_paths[proj_type].relative_to(output_dir.parent)) + ) d.update( { @@ -136,10 +139,11 @@ def run_algo(batch_path, uuid, data_path: str = None): d = {"success": False, "traceback": traceback.format_exc()} cm.stop_server(dview=dview) - + runtime = round(time.time() - algo_start, 2) df.caiman.update_item_with_results(uuid, d, runtime) + @click.command() @click.option("--batch-path", type=str) @click.option("--uuid", type=str) diff --git a/mesmerize_core/algorithms/cnmfe.py b/mesmerize_core/algorithms/cnmfe.py index e053869..0f0b4ff 100644 --- a/mesmerize_core/algorithms/cnmfe.py +++ b/mesmerize_core/algorithms/cnmfe.py @@ -81,9 +81,7 @@ def run_algo(batch_path, uuid, data_path: str = None): params_dict = {**cnmfe_params_dict, **params["main"]} cnmfe_params_dict = CNMFParams(params_dict=params_dict) - cnm = cnmf.CNMF( - n_processes=n_processes, dview=dview, params=cnmfe_params_dict - ) + cnm = cnmf.CNMF(n_processes=n_processes, dview=dview, params=cnmfe_params_dict) print("Performing CNMFE") cnm = cnm.fit(images) print("evaluating components") @@ -106,7 +104,9 @@ def run_algo(batch_path, uuid, data_path: str = None): move_file(fname_new, cnmf_memmap_path) # save path as relative path strings with forward slashes - cnmfe_memmap_path = str(PurePosixPath(cnmf_memmap_path.relative_to(output_dir.parent))) + cnmfe_memmap_path = str( + PurePosixPath(cnmf_memmap_path.relative_to(output_dir.parent)) + ) d.update( { diff --git a/mesmerize_core/algorithms/mcorr.py b/mesmerize_core/algorithms/mcorr.py index 3bac29e..484130d 100644 --- a/mesmerize_core/algorithms/mcorr.py +++ b/mesmerize_core/algorithms/mcorr.py @@ -102,16 +102,15 @@ def run_algo(batch_path, uuid, data_path: str = None): Cn[np.isnan(Cn)] = 0 cn_path = output_dir.joinpath(f"{uuid}_cn.npy") np.save(str(cn_path), Cn, allow_pickle=False) - - print("finished computing correlation image") + print("finished computing correlation image") # Compute shifts if opts.motion["pw_rigid"] == True: x_shifts = mc.x_shifts_els y_shifts = mc.y_shifts_els shifts = [x_shifts, y_shifts] - if hasattr(mc, 'z_shifts_els'): + if hasattr(mc, "z_shifts_els"): shifts.append(mc.z_shifts_els) shift_path = output_dir.joinpath(f"{uuid}_shifts.npy") np.save(str(shift_path), shifts) @@ -125,12 +124,14 @@ def run_algo(batch_path, uuid, data_path: str = None): # save paths as relative path strings with forward slashes cn_path = str(PurePosixPath(cn_path.relative_to(output_dir.parent))) - mcorr_memmap_path = str(PurePosixPath(mcorr_memmap_path.relative_to(output_dir.parent))) + mcorr_memmap_path = str( + PurePosixPath(mcorr_memmap_path.relative_to(output_dir.parent)) + ) shift_path = str(PurePosixPath(shift_path.relative_to(output_dir.parent))) for proj_type in proj_paths.keys(): - d[f"{proj_type}-projection-path"] = str(PurePosixPath(proj_paths[proj_type].relative_to( - output_dir.parent - ))) + d[f"{proj_type}-projection-path"] = str( + PurePosixPath(proj_paths[proj_type].relative_to(output_dir.parent)) + ) d.update( { diff --git a/mesmerize_core/arrays/__init__.py b/mesmerize_core/arrays/__init__.py index 5e3378a..b37c7ec 100644 --- a/mesmerize_core/arrays/__init__.py +++ b/mesmerize_core/arrays/__init__.py @@ -7,5 +7,5 @@ "LazyArrayRCB", "LazyArrayResiduals", "LazyTiff", - "LazyVideo" + "LazyVideo", ] diff --git a/mesmerize_core/arrays/_base.py b/mesmerize_core/arrays/_base.py index 50ce4ac..3d2a053 100644 --- a/mesmerize_core/arrays/_base.py +++ b/mesmerize_core/arrays/_base.py @@ -12,6 +12,7 @@ class LazyArray(ABC): """ Base class for arrays that exhibit lazy computation upon indexing """ + @property @abstractmethod def dtype(self) -> str: @@ -122,10 +123,7 @@ def as_numpy(self): def save_hdf5(self, filename: Union[str, Path]): pass - def __getitem__( - self, - item: Union[int, Tuple[slice_or_int_or_range]] - ): + def __getitem__(self, item: Union[int, Tuple[slice_or_int_or_range]]): if isinstance(item, int): indexer = item @@ -161,21 +159,25 @@ def __getitem__( if start is not None: if start > self.n_frames: - raise IndexError(f"Cannot index beyond `n_frames`.\n" - f"Desired frame start index of <{start}> " - f"lies beyond `n_frames` <{self.n_frames}>") + raise IndexError( + f"Cannot index beyond `n_frames`.\n" + f"Desired frame start index of <{start}> " + f"lies beyond `n_frames` <{self.n_frames}>" + ) if stop is not None: if stop > self.n_frames: - raise IndexError(f"Cannot index beyond `n_frames`.\n" - f"Desired frame stop index of <{stop}> " - f"lies beyond `n_frames` <{self.n_frames}>") + raise IndexError( + f"Cannot index beyond `n_frames`.\n" + f"Desired frame stop index of <{stop}> " + f"lies beyond `n_frames` <{self.n_frames}>" + ) if step is None: step = 1 - + # convert indexer to slice if it was a range, allows things like decord.VideoReader slicing indexer = slice(start, stop, step) # in case it was a range object - + # dimension_0 is always time frames = self._compute_at_indices(indexer) @@ -193,7 +195,9 @@ def __getitem__( return self._compute_at_indices(indexer) def __repr__(self): - return f"{self.__class__.__name__} @{hex(id(self))}\n" \ - f"{self.__class__.__doc__}\n" \ - f"Frames are computed only upon indexing\n" \ - f"shape [frames, x, y]: {self.shape}\n" + return ( + f"{self.__class__.__name__} @{hex(id(self))}\n" + f"{self.__class__.__doc__}\n" + f"Frames are computed only upon indexing\n" + f"shape [frames, x, y]: {self.shape}\n" + ) diff --git a/mesmerize_core/arrays/_cnmf.py b/mesmerize_core/arrays/_cnmf.py index 7bb2d2b..2b3d229 100644 --- a/mesmerize_core/arrays/_cnmf.py +++ b/mesmerize_core/arrays/_cnmf.py @@ -11,11 +11,12 @@ class LazyArrayRCM(LazyArray): """LazyArray for reconstructed movie, i.e. A ⊗ C""" + def __init__( - self, - spatial: np.ndarray, - temporal: np.ndarray, - frame_dims: Tuple[int, int], + self, + spatial: np.ndarray, + temporal: np.ndarray, + frame_dims: Tuple[int, int], ): """ Parameters @@ -28,7 +29,7 @@ def __init__( frame_dims: Tuple[int, int] frame dimensions - + """ if spatial.shape[1] != temporal.shape[0]: @@ -60,18 +61,22 @@ def __init__( spatial_min = self.spatial.min(axis=0) prods = list() - for t, s in iter_product([temporal_min, temporal_max], [spatial_min, spatial_max]): + for t, s in iter_product( + [temporal_min, temporal_max], [spatial_min, spatial_max] + ): _p = np.multiply(t, s) prods.append(np.nanmin(_p)) prods.append(np.nanmax(_p)) - + self._max = np.max(prods) self._min = np.min(prods) temporal_mean = np.nanmean(self.temporal, axis=1) temporal_std = np.nanstd(self.temporal, axis=1) - self._mean_image = self.spatial.dot(temporal_mean).reshape(frame_dims, order="F") + self._mean_image = self.spatial.dot(temporal_mean).reshape( + frame_dims, order="F" + ) self._max_image = self.spatial.dot(temporal_max).reshape(frame_dims, order="F") self._min_image = self.spatial.dot(temporal_min).reshape(frame_dims, order="F") self._std_image = self.spatial.dot(temporal_std).reshape(frame_dims, order="F") @@ -127,13 +132,13 @@ def min_image(self) -> np.ndarray: def std_image(self) -> np.ndarray: """standard deviation projection image""" return self._std_image - + def _compute_at_indices(self, indices: Union[int, Tuple[int, int]]) -> np.ndarray: - rcm = self.spatial.dot( - self.temporal[:, indices] - ).reshape( - self.shape[1:] + (-1,), order="F" - ).transpose([2, 0, 1]) + rcm = ( + self.spatial.dot(self.temporal[:, indices]) + .reshape(self.shape[1:] + (-1,), order="F") + .transpose([2, 0, 1]) + ) if rcm.shape[0] == 1: return rcm[0] # 2d single frame @@ -142,12 +147,13 @@ def _compute_at_indices(self, indices: Union[int, Tuple[int, int]]) -> np.ndarra def __repr__(self): r = super().__repr__() - return f"{r}" \ - f"n_components: {self.n_components}" + return f"{r}" f"n_components: {self.n_components}" def __eq__(self, other): if not isinstance(other, LazyArrayRCM): - raise TypeError(f"cannot compute equality for against types that are not {self.__class__.__name__}") + raise TypeError( + f"cannot compute equality for against types that are not {self.__class__.__name__}" + ) if (self.spatial == other.spatial) and (self.temporal == other.temporal): return True @@ -163,12 +169,13 @@ class LazyArrayRCB(LazyArrayRCM): class LazyArrayResiduals(LazyArray): """Lazy array for residuals, i.e. Y - (A ⊗ C) - (b ⊗ f)""" + def __init__( - self, - raw_movie: np.ndarray, - rcm: LazyArrayRCM, - rcb: LazyArrayRCB, - timeout: int = 10 + self, + raw_movie: np.ndarray, + rcm: LazyArrayRCM, + rcb: LazyArrayRCB, + timeout: int = 10, ): """ Create a LazyArray of the residuals, ``Y - (A ⊗ C) - (b ⊗ f)`` @@ -247,14 +254,18 @@ def n_frames(self) -> int: # TODO: implement min max for residuals @property def min(self) -> float: - warn("min and max not yet implemented for LazyArrayResiduals. " - "Using first frame of raw movie") + warn( + "min and max not yet implemented for LazyArrayResiduals. " + "Using first frame of raw movie" + ) return float(self._raw_movie[0].min()) @property def max(self) -> float: - warn("min and max not yet implemented for LazyArrayResiduals. " - "Using first frame of raw movie") + warn( + "min and max not yet implemented for LazyArrayResiduals. " + "Using first frame of raw movie" + ) return float(self._raw_movie[0].max()) def _compute_at_indices(self, indices: Union[int, slice]) -> np.ndarray: diff --git a/mesmerize_core/arrays/_video.py b/mesmerize_core/arrays/_video.py index bc0b195..fc3604a 100644 --- a/mesmerize_core/arrays/_video.py +++ b/mesmerize_core/arrays/_video.py @@ -16,11 +16,12 @@ class LazyVideo(LazyArray): def __init__( - self, path: Union[Path, str], - min_max: Tuple[int, int] = None, - as_grayscale: bool = False, - rgb_weights: Tuple[float, float, float] = (0.299, 0.587, 0.114), - **kwargs, + self, + path: Union[Path, str], + min_max: Tuple[int, int] = None, + as_grayscale: bool = False, + rgb_weights: Tuple[float, float, float] = (0.299, 0.587, 0.114), + **kwargs, ): """ LazyVideo reader, basically just a wrapper for ``decord.VideoReader``. @@ -138,9 +139,11 @@ def _compute_at_indices(self, indices: Union[int, slice]) -> np.ndarray: a = self._video_reader[indices].asnumpy() # R + G + B -> grayscale - gray = a[..., 0] * self.rgb_weights[0] +\ - a[..., 1] * self.rgb_weights[1] +\ - a[..., 2] * self.rgb_weights[2] + gray = ( + a[..., 0] * self.rgb_weights[0] + + a[..., 1] * self.rgb_weights[1] + + a[..., 2] * self.rgb_weights[2] + ) return gray diff --git a/mesmerize_core/batch_utils.py b/mesmerize_core/batch_utils.py index 800a288..1b5ffa0 100644 --- a/mesmerize_core/batch_utils.py +++ b/mesmerize_core/batch_utils.py @@ -14,9 +14,24 @@ COMPUTE_BACKEND_SLURM = "slurm" #: SLURM backend COMPUTE_BACKEND_LOCAL = "local" -COMPUTE_BACKENDS = [COMPUTE_BACKEND_SUBPROCESS, COMPUTE_BACKEND_SLURM, COMPUTE_BACKEND_LOCAL] - -DATAFRAME_COLUMNS = ["algo", "item_name", "input_movie_path", "params", "outputs", "added_time", "ran_time", "algo_duration", "comments", "uuid"] +COMPUTE_BACKENDS = [ + COMPUTE_BACKEND_SUBPROCESS, + COMPUTE_BACKEND_SLURM, + COMPUTE_BACKEND_LOCAL, +] + +DATAFRAME_COLUMNS = [ + "algo", + "item_name", + "input_movie_path", + "params", + "outputs", + "added_time", + "ran_time", + "algo_duration", + "comments", + "uuid", +] def set_parent_raw_data_path(path: Union[Path, str]) -> Path: @@ -133,10 +148,10 @@ def split(self, path: Union[str, Path]): ) raise NotADirectoryError( - f"Could not split `path`:\n{path}" - f"\nnot relative to either batch path:\n{self.get_batch_path()}" - f"\nor parent raw data path:\n{get_parent_raw_data_path()}" - ) + f"Could not split `path`:\n{path}" + f"\nnot relative to either batch path:\n{self.get_batch_path()}" + f"\nor parent raw data path:\n{get_parent_raw_data_path()}" + ) @pd.api.extensions.register_dataframe_accessor("paths") @@ -234,7 +249,7 @@ def create_batch(path: Union[str, Path], remove_existing: bool = False) -> pd.Da df = pd.DataFrame(columns=DATAFRAME_COLUMNS) df.to_pickle(path) # save before adding platform-dependent batch path - + df.paths.set_batch_path(path) return df diff --git a/mesmerize_core/caiman_extensions/_batch_exceptions.py b/mesmerize_core/caiman_extensions/_batch_exceptions.py index 208f8d7..2d7c2e8 100644 --- a/mesmerize_core/caiman_extensions/_batch_exceptions.py +++ b/mesmerize_core/caiman_extensions/_batch_exceptions.py @@ -15,7 +15,8 @@ class DependencyError(Exception): class PreventOverwriteError(IndexError): - """ - Error thrown when trying to write to an existing batch file with a potential risk of removing existing rows. - """ - pass \ No newline at end of file + """ + Error thrown when trying to write to an existing batch file with a potential risk of removing existing rows. + """ + + pass diff --git a/mesmerize_core/caiman_extensions/_utils.py b/mesmerize_core/caiman_extensions/_utils.py index f403bdd..f422ac8 100644 --- a/mesmerize_core/caiman_extensions/_utils.py +++ b/mesmerize_core/caiman_extensions/_utils.py @@ -2,8 +2,12 @@ from typing import Union from uuid import UUID -from mesmerize_core.caiman_extensions._batch_exceptions import BatchItemNotRunError, BatchItemUnsuccessfulError, \ - WrongAlgorithmExtensionError, PreventOverwriteError +from mesmerize_core.caiman_extensions._batch_exceptions import ( + BatchItemNotRunError, + BatchItemUnsuccessfulError, + WrongAlgorithmExtensionError, + PreventOverwriteError, +) def validate(algo: str = None): @@ -21,7 +25,9 @@ def wrapper(self, *args, **kwargs): if not self._series["outputs"]["success"]: tb = self._series["outputs"]["traceback"] - raise BatchItemUnsuccessfulError(f"Batch item was unsuccessful, traceback from subprocess:\n{tb}") + raise BatchItemUnsuccessfulError( + f"Batch item was unsuccessful, traceback from subprocess:\n{tb}" + ) return func(self, *args, **kwargs) return wrapper @@ -31,14 +37,18 @@ def wrapper(self, *args, **kwargs): def _verify_and_lock_batch_file(func): """Acquires lock and ensures batch file has the same items as current df before calling wrapped function""" + @wraps(func) def wrapper(instance, *args, **kwargs): with instance._batch_lock: disk_df = instance.reload_from_disk() # check whether all the same UUIDs are present with the same indices if not instance._df["uuid"].equals(disk_df["uuid"]): - raise PreventOverwriteError("Items on disk do not match current DataFrame; reload to synchronize") + raise PreventOverwriteError( + "Items on disk do not match current DataFrame; reload to synchronize" + ) return func(instance, *args, **kwargs) + return wrapper @@ -66,4 +76,5 @@ def _parser(instance, *args, **kwargs): args = (index, *args[1:]) return func(instance, *args, **kwargs) + return _parser diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index 82a71ca..62dbd44 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -200,6 +200,7 @@ def invalidate(self, pre: bool = True, post: bool = True): invalidate after the decorated function has been fun """ + def _invalidate(func): @wraps(func) def __invalidate(instance, *args, **kwargs): @@ -207,19 +208,18 @@ def __invalidate(instance, *args, **kwargs): if pre: self.cache.drop( - self.cache.loc[self.cache["uuid"] == u].index, - inplace=True + self.cache.loc[self.cache["uuid"] == u].index, inplace=True ) rval = func(instance, *args, **kwargs) if post: self.cache.drop( - self.cache.loc[self.cache["uuid"] == u].index, - inplace=True + self.cache.loc[self.cache["uuid"] == u].index, inplace=True ) return rval return __invalidate + return _invalidate diff --git a/mesmerize_core/caiman_extensions/cnmf.py b/mesmerize_core/caiman_extensions/cnmf.py index e1df330..088d080 100644 --- a/mesmerize_core/caiman_extensions/cnmf.py +++ b/mesmerize_core/caiman_extensions/cnmf.py @@ -24,7 +24,9 @@ def _component_indices_parser(func): @wraps(func) def _parser(instance, *args, **kwargs) -> Any: if "component_indices" in kwargs.keys(): - component_indices: Union[np.ndarray, str, None] = kwargs["component_indices"] + component_indices: Union[np.ndarray, str, None] = kwargs[ + "component_indices" + ] elif len(args) > 0: component_indices = args[0] # always first positional arg in the extensions else: @@ -46,7 +48,9 @@ def _parser(instance, *args, **kwargs) -> Any: if isinstance(component_indices, str): accepted = ["all", "good", "bad"] if component_indices not in accepted: - raise ValueError(f"Accepted `str` values for `component_indices` are: {accepted}") + raise ValueError( + f"Accepted `str` values for `component_indices` are: {accepted}" + ) if component_indices == "all": component_indices = np.arange(cnmf_obj.estimates.A.shape[1]) @@ -62,6 +66,7 @@ def _parser(instance, *args, **kwargs) -> Any: args = (component_indices, *args[1:]) return func(instance, *args, **kwargs) + return _parser @@ -76,6 +81,7 @@ def __check(instance, *args, **kwargs): ) return func(instance, *args, **kwargs) + return __check @@ -188,7 +194,10 @@ def get_output(self, return_copy=True) -> CNMF: @_component_indices_parser @cnmf_cache.use_cache def get_masks( - self, component_indices: Union[np.ndarray, str] = None, threshold: float = 0.01, return_copy=True + self, + component_indices: Union[np.ndarray, str] = None, + threshold: float = 0.01, + return_copy=True, ) -> np.ndarray: """ | Get binary masks of the spatial components at the given ``component_indices``. @@ -227,7 +236,7 @@ def get_masks( masks = np.zeros(shape=(dims[0], dims[1], len(component_indices)), dtype=bool) for n, ix in enumerate(component_indices): - s = cnmf_obj.estimates.A[:, ix].toarray().reshape(cnmf_obj.dims, order='F') + s = cnmf_obj.estimates.A[:, ix].toarray().reshape(cnmf_obj.dims, order="F") s[s >= threshold] = 1 s[s < threshold] = 0 @@ -236,9 +245,7 @@ def get_masks( return masks @staticmethod - def _get_spatial_contours( - cnmf_obj: CNMF, component_indices, swap_dim - ): + def _get_spatial_contours(cnmf_obj: CNMF, component_indices, swap_dim): dims = cnmf_obj.dims if dims is None: @@ -261,10 +268,10 @@ def _get_spatial_contours( @_component_indices_parser @cnmf_cache.use_cache def get_contours( - self, - component_indices: Union[np.ndarray, str] = None, - swap_dim: bool = True, - return_copy=True + self, + component_indices: Union[np.ndarray, str] = None, + swap_dim: bool = True, + return_copy=True, ) -> Tuple[List[np.ndarray], List[np.ndarray]]: """ Get the contour and center of mass for each spatial footprint @@ -319,7 +326,7 @@ def get_temporal( component_indices: Union[np.ndarray, str] = None, add_background: bool = False, add_residuals: bool = False, - return_copy=True + return_copy=True, ) -> np.ndarray: """ Get the temporal components for this CNMF item, basically ``CNMF.estimates.C`` @@ -389,10 +396,10 @@ def get_temporal( @_component_indices_parser @cnmf_cache.use_cache def get_rcm( - self, - component_indices: Union[np.ndarray, str] = None, - temporal_components: np.ndarray = None, - return_copy=False + self, + component_indices: Union[np.ndarray, str] = None, + temporal_components: np.ndarray = None, + return_copy=False, ) -> LazyArrayRCM: """ Return the reconstructed movie with no background, i.e. ``A ⊗ C``, as a ``LazyArray``. @@ -467,8 +474,10 @@ def get_rcm( elif cnmf_obj.dims is not None: dims = cnmf_obj.dims else: - raise AttributeError(f"`dims` not found in the CNMF data, it is usually found in one of the following:\n" - f"`cnmf_obj.estimates.dims` or `cnmf_obj.dims`") + raise AttributeError( + f"`dims` not found in the CNMF data, it is usually found in one of the following:\n" + f"`cnmf_obj.estimates.dims` or `cnmf_obj.dims`" + ) spatial = cnmf_obj.estimates.A[:, component_indices] temporal = temporal_components[component_indices, :] @@ -477,7 +486,9 @@ def get_rcm( @validate("cnmf") @cnmf_cache.use_cache - def get_rcb(self,) -> LazyArrayRCB: + def get_rcb( + self, + ) -> LazyArrayRCB: """ Return the reconstructed background, ``(b ⊗ f)`` @@ -519,8 +530,10 @@ def get_rcb(self,) -> LazyArrayRCB: elif cnmf_obj.dims is not None: dims = cnmf_obj.dims else: - raise AttributeError(f"`dims` not found in the CNMF data, it is usually found in one of the following:\n" - f"`cnmf_obj.estimates.dims` or `cnmf_obj.dims`") + raise AttributeError( + f"`dims` not found in the CNMF data, it is usually found in one of the following:\n" + f"`cnmf_obj.estimates.dims` or `cnmf_obj.dims`" + ) spatial = cnmf_obj.estimates.b temporal = cnmf_obj.estimates.f @@ -576,13 +589,13 @@ def get_residuals(self) -> LazyArrayResiduals: @_check_permissions @cnmf_cache.invalidate() def run_detrend_dfof( - self, - quantileMin: float = 8, - frames_window: int = 500, - flag_auto: bool = True, - use_fast: bool = False, - use_residuals: bool = True, - detrend_only: bool = False + self, + quantileMin: float = 8, + frames_window: int = 500, + flag_auto: bool = True, + use_fast: bool = False, + use_residuals: bool = True, + detrend_only: bool = False, ) -> None: """ | Uses caiman's detrend_df_f. @@ -630,7 +643,7 @@ def run_detrend_dfof( flag_auto=flag_auto, use_fast=use_fast, use_residuals=use_residuals, - detrend_only=detrend_only + detrend_only=detrend_only, ) # remove current hdf5 file @@ -644,9 +657,7 @@ def run_detrend_dfof( @_component_indices_parser @cnmf_cache.use_cache def get_detrend_dfof( - self, - component_indices: Union[np.ndarray, str] = None, - return_copy: bool = True + self, component_indices: Union[np.ndarray, str] = None, return_copy: bool = True ): """ Get the detrended dF/F0 curves after calling ``run_detrend_dfof``. @@ -732,8 +743,7 @@ def run_eval(self, params: dict) -> None: cnmf_obj.params.quality.update(params) cnmf_obj.estimates.filter_components( - imgs=self._series.caiman.get_input_movie(), - params=cnmf_obj.params + imgs=self._series.caiman.get_input_movie(), params=cnmf_obj.params ) cnmf_obj_path = self.get_output_path() diff --git a/mesmerize_core/caiman_extensions/common.py b/mesmerize_core/caiman_extensions/common.py index bd78fb6..1007b76 100644 --- a/mesmerize_core/caiman_extensions/common.py +++ b/mesmerize_core/caiman_extensions/common.py @@ -15,7 +15,12 @@ import pandas as pd from filelock import SoftFileLock, Timeout -from ._batch_exceptions import BatchItemNotRunError, BatchItemUnsuccessfulError, DependencyError, PreventOverwriteError +from ._batch_exceptions import ( + BatchItemNotRunError, + BatchItemUnsuccessfulError, + DependencyError, + PreventOverwriteError, +) from ._utils import validate, _index_parser, _verify_and_lock_batch_file from ..batch_utils import ( COMPUTE_BACKENDS, @@ -45,8 +50,9 @@ class CaimanDataFrameExtensions: def __init__(self, df: pd.DataFrame): self._df = df - self._batch_lock = SoftFileLock(str(df.paths.get_batch_path()) + ".lock", - timeout=30, is_singleton=True) + self._batch_lock = SoftFileLock( + str(df.paths.get_batch_path()) + ".lock", timeout=30, is_singleton=True + ) def uloc(self, u: Union[str, UUID]) -> pd.Series: """ @@ -65,7 +71,13 @@ def uloc(self, u: Union[str, UUID]) -> pd.Series: return df_u.squeeze() @_verify_and_lock_batch_file - def add_item(self, algo: str, item_name: str, input_movie_path: Union[str, pd.Series], params: dict): + def add_item( + self, + algo: str, + item_name: str, + input_movie_path: Union[str, pd.Series], + params: dict, + ): """ Add an item to the DataFrame to organize parameters that can be used to run a CaImAn algorithm @@ -132,7 +144,9 @@ def add_item(self, algo: str, item_name: str, input_movie_path: Union[str, pd.Se @_verify_and_lock_batch_file @_index_parser - def update_item(self, index: Union[int, str, UUID], updates: Union[dict, pd.Series]): + def update_item( + self, index: Union[int, str, UUID], updates: Union[dict, pd.Series] + ): """ Update the item at the given index or UUID with the data in updates and write to disk. @@ -148,17 +162,21 @@ def update_item(self, index: Union[int, str, UUID], updates: Union[dict, pd.Seri row = self._df.iloc[index] for key in updates.keys(): if key not in row: - raise AttributeError(f"Cannot update item; received unknown column name '{key}'") + raise AttributeError( + f"Cannot update item; received unknown column name '{key}'" + ) row.update(updates) self._df.iloc[index] = row self.save_to_disk() - def update_item_with_results(self, uuid: Union[str, UUID], results: dict, run_duration: float): + def update_item_with_results( + self, uuid: Union[str, UUID], results: dict, run_duration: float + ): """Helper for algorithms to save their results to disk""" updates = { "outputs": results, "ran_time": datetime.now().isoformat(timespec="seconds", sep="T"), - "algo_duration": str(run_duration) + " sec" + "algo_duration": str(run_duration) + " sec", } try: # reload first because it should be safe since we have a UUID and we want to @@ -235,7 +253,12 @@ def reload_from_disk(self) -> pd.DataFrame: @_verify_and_lock_batch_file @_index_parser - def remove_item(self, index: Union[int, str, UUID], remove_data: bool = True, safe_removal: bool = True): + def remove_item( + self, + index: Union[int, str, UUID], + remove_data: bool = True, + safe_removal: bool = True, + ): """ Remove a batch item from the DataFrame and delete all data associated to that batch item from disk if ``remove_data=True`` @@ -322,7 +345,8 @@ def get_params_diffs(self, algo: str, item_name: str) -> pd.DataFrame: `item_name`. The returned index corresponds to the index of the original DataFrame - """ + """ + def flatten_params(params_dict: dict): """ Produce a flat dict with one entry for each parameter in the passed dict. @@ -337,47 +361,69 @@ def flatten_params(params_dict: dict): else: params[key1] = val1 return params - + sub_df = self._df[self._df["item_name"] == item_name] sub_df = sub_df[sub_df["algo"] == algo] if sub_df.index.size == 0: - raise NameError(f"The given `item_name`: {item_name}, does not exist in the DataFrame") + raise NameError( + f"The given `item_name`: {item_name}, does not exist in the DataFrame" + ) # get flattened parameters for each of the filtered items params_flat = sub_df.params.map(lambda p: flatten_params(p["main"])) # build list of params that differ between different parameter sets - common_params = deepcopy(params_flat.iat[0]) # holds the common value for parameters found in all sets (so far) - varying_params = set() # set of parameter keys that appear in not all sets or with varying values + common_params = deepcopy( + params_flat.iat[0] + ) # holds the common value for parameters found in all sets (so far) + varying_params = ( + set() + ) # set of parameter keys that appear in not all sets or with varying values for this_params in params_flat.iloc[1:]: # first, anything that's not in both this dict and the common set is considered varying common_paramset = set(common_params.keys()) - for not_common_key in common_paramset.symmetric_difference(this_params.keys()): + for not_common_key in common_paramset.symmetric_difference( + this_params.keys() + ): varying_params.add(not_common_key) if not_common_key in common_paramset: del common_params[not_common_key] common_paramset.remove(not_common_key) # second, look at params in the common set and remove any that differ for this set - for key in common_paramset: # iterate over this set rather than dict itself to avoid issues when deleting entries - if not np.array_equal(common_params[key], this_params[key]): # (should also work for scalars/arbitrary objects) + for ( + key + ) in ( + common_paramset + ): # iterate over this set rather than dict itself to avoid issues when deleting entries + if not np.array_equal( + common_params[key], this_params[key] + ): # (should also work for scalars/arbitrary objects) varying_params.add(key) del common_params[key] # gives a list where each item is a dict that has the unique params that correspond to a row # the indices of this series correspond to the index of the row in the parent dataframe - diffs = params_flat.map(lambda p: {key: p[key] if key in p else "" for key in varying_params}) + diffs = params_flat.map( + lambda p: { + key: p[key] if key in p else "" for key in varying_params + } + ) # return as a nicely formatted dataframe - diffs_df = pd.DataFrame.from_dict(diffs.tolist(), dtype=object).set_index(diffs.index) + diffs_df = pd.DataFrame.from_dict(diffs.tolist(), dtype=object).set_index( + diffs.index + ) return diffs_df - @warning_experimental("This feature will change in the future and directly return the " - " a DataFrame of children (rows, ie. child batch items row) " - "instead of a list of UUIDs") + @warning_experimental( + "This feature will change in the future and directly return the " + " a DataFrame of children (rows, ie. child batch items row) " + "instead of a list of UUIDs" + ) @_index_parser def get_children(self, index: Union[int, str, UUID]) -> List[UUID]: """ @@ -419,8 +465,10 @@ def get_children(self, index: Union[int, str, UUID]) -> List[UUID]: children.append(r["uuid"]) return children - @warning_experimental("This feature will change in the future and directly return the " - " pandas.Series (row, ie. batch item row) instead of the UUID") + @warning_experimental( + "This feature will change in the future and directly return the " + " pandas.Series (row, ie. batch item row) instead of the UUID" + ) @_index_parser def get_parent(self, index: Union[int, str, UUID]) -> Union[UUID, None]: """ @@ -451,7 +499,11 @@ def get_parent(self, index: Union[int, str, UUID]) -> Union[UUID, None]: continue try: _potential_parent = r.mcorr.get_output_path() - except (FileNotFoundError, BatchItemUnsuccessfulError, BatchItemNotRunError): + except ( + FileNotFoundError, + BatchItemUnsuccessfulError, + BatchItemNotRunError, + ): continue # can't be a parent if it was unsuccessful if _potential_parent == input_movie_path: @@ -460,6 +512,7 @@ def get_parent(self, index: Union[int, str, UUID]) -> Union[UUID, None]: class DummyProcess: """Dummy process for local backend""" + def wait(self): pass @@ -475,27 +528,20 @@ def __init__(self, s: pd.Series): self.process: Popen = None def _run_local( - self, - algo: str, - batch_path: Path, - uuid: UUID, - data_path: Union[Path, None], + self, + algo: str, + batch_path: Path, + uuid: UUID, + data_path: Union[Path, None], ): algo_module = getattr(algorithms, algo) algo_module.run_algo( - batch_path=str(batch_path), - uuid=str(uuid), - data_path=str(data_path) + batch_path=str(batch_path), uuid=str(uuid), data_path=str(data_path) ) return DummyProcess() - def _run_subprocess( - self, - runfile_path: str, - wait: bool, - **kwargs - ): + def _run_subprocess(self, runfile_path: str, wait: bool, **kwargs): # Get the dir that contains the input movie parent_path = self._series.paths.resolve(self._series.input_movie_path).parent @@ -510,11 +556,7 @@ def _run_subprocess( return self.process def _run_slurm( - self, - runfile_path: str, - wait: bool, - sbatch_opts: str = '', - **kwargs + self, runfile_path: str, wait: bool, sbatch_opts: str = "", **kwargs ): """ Run on a cluster using SLURM. Configurable options (to pass to run): @@ -528,8 +570,8 @@ def _run_slurm( """ # this needs to match what's in the runfile - if 'MESMERIZE_N_PROCESSES' in os.environ: - n_procs = os.environ['MESMERIZE_N_PROCESSES'] + if "MESMERIZE_N_PROCESSES" in os.environ: + n_procs = os.environ["MESMERIZE_N_PROCESSES"] else: n_procs = psutil.cpu_count() - 1 @@ -537,30 +579,25 @@ def _run_slurm( uuid = str(self._series["uuid"]) output_dir = Path(runfile_path).parent.joinpath(uuid) output_dir.mkdir(parents=True, exist_ok=True) - output_path = output_dir / f'{uuid}.log' + output_path = output_dir / f"{uuid}.log" # --wait means that the lifetme of the created process corresponds to the lifetime of the job submission_opts = [ f'--job-name={self._series["algo"]}-{uuid[:8]}', - '--ntasks=1', - f'--cpus-per-task={n_procs}', - f'--output={output_path}', - '--wait' - ] + shlex.split(sbatch_opts) - - self.process = Popen(['sbatch', *submission_opts, runfile_path]) + "--ntasks=1", + f"--cpus-per-task={n_procs}", + f"--output={output_path}", + "--wait", + ] + shlex.split(sbatch_opts) + + self.process = Popen(["sbatch", *submission_opts, runfile_path]) if wait: self.process.wait() - + return self.process @cnmf_cache.invalidate() - def run( - self, - backend: Optional[str] = None, - wait: bool = True, - **kwargs - ): + def run(self, backend: Optional[str] = None, wait: bool = True, **kwargs): """ Run a CaImAn algorithm in an external process using the chosen backend @@ -649,7 +686,9 @@ def get_input_movie_path(self) -> Path: return self._series.paths.resolve(self._series["input_movie_path"]) - def get_input_movie(self, reader: callable = None, **kwargs) -> Union[np.ndarray, Any]: + def get_input_movie( + self, reader: callable = None, **kwargs + ) -> Union[np.ndarray, Any]: """ Get the input movie @@ -669,9 +708,7 @@ def get_input_movie(self, reader: callable = None, **kwargs) -> Union[np.ndarray if reader is not None: if not callable(reader): - raise TypeError( - f"reader must be a callable type, such as a function" - ) + raise TypeError(f"reader must be a callable type, such as a function") return reader(path_str, **kwargs) @@ -703,7 +740,7 @@ def get_pnr_image(self) -> np.ndarray: def get_projection(self, proj_type: str) -> np.ndarray: """ Return the ``max``, ``mean``, or ``std`` (standard deviation) projection - + Parameters ---------- proj_type: str diff --git a/mesmerize_core/caiman_extensions/mcorr.py b/mesmerize_core/caiman_extensions/mcorr.py index 0699d98..4635c7c 100644 --- a/mesmerize_core/caiman_extensions/mcorr.py +++ b/mesmerize_core/caiman_extensions/mcorr.py @@ -112,8 +112,12 @@ def get_shifts(self, pw_rigid) -> list[np.ndarray]: shifts = np.load(str(path)) if pw_rigid: - shifts_by_dim = list(shifts) # dims-length list of n_frames x n_patches matrices + shifts_by_dim = list( + shifts + ) # dims-length list of n_frames x n_patches matrices else: - shifts_by_dim = list(shifts.T) # dims-length list of n_frames-length vectors + shifts_by_dim = list( + shifts.T + ) # dims-length list of n_frames-length vectors return shifts_by_dim diff --git a/mesmerize_core/movie_readers.py b/mesmerize_core/movie_readers.py index bde8865..c184682 100644 --- a/mesmerize_core/movie_readers.py +++ b/mesmerize_core/movie_readers.py @@ -8,6 +8,7 @@ try: import pims + HAS_PIMS = True except (ModuleNotFoundError, ImportError): HAS_PIMS = False @@ -27,9 +28,7 @@ def default_reader(path: str, **kwargs): return caiman_memmap_reader(path, **kwargs) else: - raise ValueError( - f"No default movie reader for given file extension: '{ext}'" - ) + raise ValueError(f"No default movie reader for given file extension: '{ext}'") def tiff_memmap_reader(path: str, **kwargs) -> np.memmap: @@ -50,7 +49,5 @@ def caiman_memmap_reader(path: str, **kwargs) -> np.memmap: def pims_reader(path: str, **kwargs): if not HAS_PIMS: - raise ModuleNotFoundError( - "you must install `pims` to use the pims reader" - ) + raise ModuleNotFoundError("you must install `pims` to use the pims reader") return pims.open(path, **kwargs) diff --git a/mesmerize_core/utils.py b/mesmerize_core/utils.py index 5287cdc..9ab1adb 100644 --- a/mesmerize_core/utils.py +++ b/mesmerize_core/utils.py @@ -4,7 +4,6 @@ GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 """ - import numpy as np from functools import wraps import os @@ -34,6 +33,7 @@ def warning_experimental(more_info: str = ""): """ decorator to warn the user that the function is experimental """ + def catcher(func): @wraps(func) def fn(self, *args, **kwargs): @@ -43,10 +43,12 @@ def fn(self, *args, **kwargs): f"{func.__qualname__}\n" f"{more_info}\n", FutureWarning, - stacklevel=2 + stacklevel=2, ) return func(self, *args, **kwargs) + return fn + return catcher @@ -125,15 +127,14 @@ def make_runfile( f'export MESMERIZE_N_PROCESSES={os.environ["MESMERIZE_N_PROCESSES"]}\n' ) - f.write( - f"export OPENBLAS_NUM_THREADS=1\n" - f"export MKL_NUM_THREADS=1\n" - ) + f.write(f"export OPENBLAS_NUM_THREADS=1\n" f"export MKL_NUM_THREADS=1\n") if "CONDA_PREFIX" in os.environ.keys(): # add command to run the python script in the conda environment # that was active at the time that this shell script was generated - f.write(f'{os.environ["CONDA_EXE"]} run -p {os.environ["CONDA_PREFIX"]} python {module_path} {args_str}') + f.write( + f'{os.environ["CONDA_EXE"]} run -p {os.environ["CONDA_PREFIX"]} python {module_path} {args_str}' + ) else: f.write(f"python {module_path} {args_str}") # call the script to run @@ -150,7 +151,9 @@ def make_runfile( os.unlink(tmp.name) except: continue - f.write(f'$env:{k}="{v}";\n') # write only env vars that powershell likes + f.write( + f'$env:{k}="{v}";\n' + ) # write only env vars that powershell likes f.write(f"{sys.executable} {module_path} {args_str}") st = os.stat(sh_file) diff --git a/setup.py b/setup.py index 7d539d1..35a904d 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ "click", "psutil", "jupyterlab", - "filelock" + "filelock", ] @@ -22,25 +22,24 @@ ver = f.read().split("\n")[0] -classifiers = \ - [ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: Apache Software License", - "Operating System :: POSIX :: Linux", - "Operating System :: MacOS :: MacOS X", - "Operating System :: Microsoft :: Windows :: Windows 10", - "Topic :: Scientific/Engineering :: Bio-Informatics", - "Topic :: Scientific/Engineering :: Image Recognition", - "Topic :: Scientific/Engineering :: Information Analysis", - "Intended Audience :: Science/Research" - ] +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: Apache Software License", + "Operating System :: POSIX :: Linux", + "Operating System :: MacOS :: MacOS X", + "Operating System :: Microsoft :: Windows :: Windows 10", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Topic :: Scientific/Engineering :: Image Recognition", + "Topic :: Scientific/Engineering :: Information Analysis", + "Intended Audience :: Science/Research", +] setup( name="mesmerize-core", description="High level pandas-based API for batch analysis of Calcium Imaging data using CaImAn", long_description=readme, - long_description_content_type='text/markdown', + long_description_content_type="text/markdown", classifiers=classifiers, version=ver, install_requires=install_requires, diff --git a/tests/params.py b/tests/params.py index 0593ac3..242cc1f 100644 --- a/tests/params.py +++ b/tests/params.py @@ -53,5 +53,5 @@ "update_background_components": True, "del_duplicates": True, }, - } + }, } diff --git a/tests/test_core.py b/tests/test_core.py index 5901ac0..499ca1b 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -12,7 +12,11 @@ CaimanSeriesExtensions, set_parent_raw_data_path, ) -from mesmerize_core.batch_utils import DATAFRAME_COLUMNS, COMPUTE_BACKEND_SUBPROCESS, get_full_raw_data_path +from mesmerize_core.batch_utils import ( + DATAFRAME_COLUMNS, + COMPUTE_BACKEND_SUBPROCESS, + get_full_raw_data_path, +) from mesmerize_core.utils import IS_WINDOWS from uuid import uuid4 from typing import * @@ -174,34 +178,34 @@ def test_mcorr(): # test that batch path is propagated to pd.Series assert ( - df.attrs["batch_path"] - == df.paths.get_batch_path() - == df.iloc[-1].paths.get_batch_path() - == df.iloc[-1].attrs["batch_path"] + df.attrs["batch_path"] + == df.paths.get_batch_path() + == df.iloc[-1].paths.get_batch_path() + == df.iloc[-1].attrs["batch_path"] ) # test that path resolve works for parent_raw_dir rel_input_movie_path = input_movie_path.relative_to(vid_dir) assert ( - df.paths.resolve(rel_input_movie_path) - == df.iloc[-1].paths.resolve(rel_input_movie_path) - == input_movie_path + df.paths.resolve(rel_input_movie_path) + == df.iloc[-1].paths.resolve(rel_input_movie_path) + == input_movie_path ) # test that path splitting works for parent_raw_dir split = (vid_dir, input_movie_path.relative_to(vid_dir)) assert ( - df.paths.split(input_movie_path) - == df.iloc[-1].paths.split(input_movie_path) - == split + df.paths.split(input_movie_path) + == df.iloc[-1].paths.split(input_movie_path) + == split ) # test that the input_movie_path in the DataFrame rows are relative assert Path(df.iloc[-1]["input_movie_path"]) == split[1] assert ( - get_full_raw_data_path(df.iloc[-1]["input_movie_path"]) - == vid_dir.joinpath(f"{algo}.tif") - == vid_dir.joinpath(df.iloc[-1]["input_movie_path"]) - == df.paths.resolve(df.iloc[-1]["input_movie_path"]) + get_full_raw_data_path(df.iloc[-1]["input_movie_path"]) + == vid_dir.joinpath(f"{algo}.tif") + == vid_dir.joinpath(df.iloc[-1]["input_movie_path"]) + == df.paths.resolve(df.iloc[-1]["input_movie_path"]) ) process = df.iloc[-1].caiman.run() @@ -224,32 +228,32 @@ def test_mcorr(): ) rel_mcorr_memmap_path = mcorr_memmap_path.relative_to(batch_dir) assert ( - df.paths.resolve(rel_mcorr_memmap_path) - == df.iloc[-1].paths.resolve(rel_mcorr_memmap_path) - == mcorr_memmap_path + df.paths.resolve(rel_mcorr_memmap_path) + == df.iloc[-1].paths.resolve(rel_mcorr_memmap_path) + == mcorr_memmap_path ) # test that path splitting works for batch_dir split = (batch_dir, mcorr_memmap_path.relative_to(batch_dir)) assert ( - df.paths.split(mcorr_memmap_path) - == df.iloc[-1].paths.split(mcorr_memmap_path) - == split + df.paths.split(mcorr_memmap_path) + == df.iloc[-1].paths.split(mcorr_memmap_path) + == split ) assert ( - input_movie_path - == df.iloc[-1].caiman.get_input_movie_path() - == df.paths.resolve(df.iloc[-1]["input_movie_path"]) + input_movie_path + == df.iloc[-1].caiman.get_input_movie_path() + == df.paths.resolve(df.iloc[-1]["input_movie_path"]) ) # test to check mmap output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["mcorr-output-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), - f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000.mmap', - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["mcorr-output-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), + f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000.mmap', + ) ) # test to check shifts output path @@ -263,46 +267,46 @@ def test_mcorr(): # test to check mean-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy' + ) ) # test to check std-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy' + ) ) # test to check max-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy' + ) ) # test to check correlation image output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["corr-img-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["corr-img-path"]) - == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_cn.npy') + batch_dir.joinpath(df.iloc[-1]["outputs"]["corr-img-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["corr-img-path"]) + == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_cn.npy') ) # test to check mcorr get_output_path() assert ( - df.iloc[-1].mcorr.get_output_path() - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), - f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000.mmap', - ) - == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"]) + df.iloc[-1].mcorr.get_output_path() + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), + f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000.mmap', + ) + == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"]) ) # test to check mcorr get_output() @@ -312,15 +316,15 @@ def test_mcorr(): ) numpy.testing.assert_array_equal(mcorr_output, mcorr_output_actual) - # test to check mcorr get_shifts() - mcorr_shifts = df.iloc[-1].mcorr.get_shifts(pw_rigid=test_params[algo]["main"]["pw_rigid"]) + mcorr_shifts = df.iloc[-1].mcorr.get_shifts( + pw_rigid=test_params[algo]["main"]["pw_rigid"] + ) mcorr_shifts_actual = numpy.load( ground_truths_dir.joinpath("mcorr", "mcorr_shifts.npy") ) numpy.testing.assert_array_equal(mcorr_shifts, mcorr_shifts_actual) - # test to check caiman get_input_movie_path() assert df.iloc[-1].caiman.get_input_movie_path() == get_full_raw_data_path( df.iloc[0]["input_movie_path"] @@ -331,7 +335,9 @@ def test_mcorr(): mcorr_corr_img_actual = numpy.load( ground_truths_dir.joinpath("mcorr", "mcorr_correlation_img.npy") ) - numpy.testing.assert_allclose(mcorr_corr_img, mcorr_corr_img_actual, rtol=1e-2, atol=1e-10) + numpy.testing.assert_allclose( + mcorr_corr_img, mcorr_corr_img_actual, rtol=1e-2, atol=1e-10 + ) # test to check caiman get_projection("mean") mcorr_mean = df.iloc[-1].caiman.get_projection("mean") @@ -397,12 +403,12 @@ def test_cnmf(): assert df.iloc[-1]["outputs"]["traceback"] is None assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["mcorr-output-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), - f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000.mmap', - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["mcorr-output-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), + f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000.mmap', + ) ) algo = "cnmf" @@ -442,59 +448,59 @@ def test_cnmf(): assert df.iloc[-1]["outputs"]["traceback"] is None assert ( - input_movie_path - == df.iloc[-1].caiman.get_input_movie_path() - == df.paths.resolve(df.iloc[-1]["input_movie_path"]) + input_movie_path + == df.iloc[-1].caiman.get_input_movie_path() + == df.paths.resolve(df.iloc[-1]["input_movie_path"]) ) assert ( - batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}.hdf5') - == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) - == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) + batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}.hdf5') + == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) + == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) ) # test to check mmap output path assert ( - batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), - f'{df.iloc[-1]["uuid"]}_cnmf-memmap_d1_60_d2_80_d3_1_order_C_frames_2000.mmap', - ) - == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) - == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) + batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), + f'{df.iloc[-1]["uuid"]}_cnmf-memmap_d1_60_d2_80_d3_1_order_C_frames_2000.mmap', + ) + == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) + == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) ) # test to check mean-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy' + ) ) # test to check std-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy' + ) ) # test to check max-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy' + ) ) # test to check correlation image output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["corr-img-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["corr-img-path"]) - == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_cn.npy') + batch_dir.joinpath(df.iloc[-1]["outputs"]["corr-img-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["corr-img-path"]) + == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_cn.npy') ) print("testing cnmf.get_cnmf_memmap()") @@ -517,10 +523,10 @@ def test_cnmf(): # test to check cnmf get_output_path() assert ( - df.iloc[-1].cnmf.get_output_path() - == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) - == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}.hdf5') + df.iloc[-1].cnmf.get_output_path() + == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) + == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}.hdf5') ) # test to check cnmf get_output() @@ -548,11 +554,11 @@ def test_cnmf(): allow_pickle=True, ) for contour, actual_contour in zip( - cnmf_spatial_contours_contours, cnmf_spatial_contours_contours_actual + cnmf_spatial_contours_contours, cnmf_spatial_contours_contours_actual ): numpy.testing.assert_allclose(contour, actual_contour, rtol=1e-2, atol=1e-10) for com, actual_com in zip( - cnmf_spatial_contours_coms, cnmf_spatial_contours_coms_actual + cnmf_spatial_contours_coms, cnmf_spatial_contours_coms_actual ): numpy.testing.assert_allclose(com, actual_com, rtol=1e-2, atol=1e-10) @@ -572,27 +578,41 @@ def test_cnmf(): ) numpy.testing.assert_allclose( cnmf_reconstructed_movie_AouterC.as_numpy(), - cnmf_reconstructed_movie_AouterC_actual, rtol=1e-1, atol=1e-10 + cnmf_reconstructed_movie_AouterC_actual, + rtol=1e-1, + atol=1e-10, ) # test that get_item is working properly for LazyArrays - for i in np.random.randint(10, cnmf_reconstructed_movie_AouterC_actual.shape[0] - 11, size=10): + for i in np.random.randint( + 10, cnmf_reconstructed_movie_AouterC_actual.shape[0] - 11, size=10 + ): numpy.testing.assert_allclose( cnmf_reconstructed_movie_AouterC[i], - cnmf_reconstructed_movie_AouterC_actual[i], rtol=1e-1, atol=1e-10 + cnmf_reconstructed_movie_AouterC_actual[i], + rtol=1e-1, + atol=1e-10, ) - for i in np.random.randint(10, cnmf_reconstructed_movie_AouterC_actual.shape[0] - 11, size=10): + for i in np.random.randint( + 10, cnmf_reconstructed_movie_AouterC_actual.shape[0] - 11, size=10 + ): numpy.testing.assert_allclose( - cnmf_reconstructed_movie_AouterC[i-5:i+5], - cnmf_reconstructed_movie_AouterC_actual[i-5:i+5], rtol=1e-1, atol=1e-10 + cnmf_reconstructed_movie_AouterC[i - 5 : i + 5], + cnmf_reconstructed_movie_AouterC_actual[i - 5 : i + 5], + rtol=1e-1, + atol=1e-10, ) # test to check get_rcb() cnmf_reconstructed_background = df.iloc[-1].cnmf.get_rcb() - cnmf_reconstructed_background_actual = numpy.load(ground_truths_dir.joinpath("cnmf", "reconstructed_background.npy")) + cnmf_reconstructed_background_actual = numpy.load( + ground_truths_dir.joinpath("cnmf", "reconstructed_background.npy") + ) numpy.testing.assert_allclose( cnmf_reconstructed_background.as_numpy(), - cnmf_reconstructed_background_actual, rtol=1e-2, atol=1e-10 + cnmf_reconstructed_background_actual, + rtol=1e-2, + atol=1e-10, ) # test to check get_residuals() @@ -601,16 +621,18 @@ def test_cnmf(): # cnmf_residuals_actual = numpy.load(ground_truths_dir.joinpath("cnmf", "residuals.npy")) numpy.testing.assert_allclose( cnmf_residuals.as_numpy(), - df.iloc[-1].caiman.get_input_movie() - cnmf_reconstructed_movie_AouterC_actual - cnmf_reconstructed_background_actual, + df.iloc[-1].caiman.get_input_movie() + - cnmf_reconstructed_movie_AouterC_actual + - cnmf_reconstructed_background_actual, rtol=1e2, - atol=1e-5 + atol=1e-5, ) # test to check caiman get_input_movie_path(), should be output of previous mcorr assert ( - df.iloc[-1].caiman.get_input_movie_path() - == df.paths.resolve(df.iloc[-1]["input_movie_path"]) - == batch_dir.joinpath(df.iloc[-1]["input_movie_path"]) + df.iloc[-1].caiman.get_input_movie_path() + == df.paths.resolve(df.iloc[-1]["input_movie_path"]) + == batch_dir.joinpath(df.iloc[-1]["input_movie_path"]) ) # test to check caiman get_correlation_img() @@ -618,7 +640,9 @@ def test_cnmf(): cnmf_corr_img_actual = numpy.load( ground_truths_dir.joinpath("cnmf", "cnmf_correlation_img.npy") ) - numpy.testing.assert_allclose(cnmf_corr_img, cnmf_corr_img_actual, rtol=1e-5, atol=1e-5) + numpy.testing.assert_allclose( + cnmf_corr_img, cnmf_corr_img_actual, rtol=1e-5, atol=1e-5 + ) # test to check caiman get_projection("mean") cnmf_mean = df.iloc[-1].caiman.get_projection("mean") @@ -660,7 +684,7 @@ def test_cnmf(): allow_pickle=True, ) for contour, actual_contour in zip( - ixs_contours_contours, ixs_contours_contours_actual + ixs_contours_contours, ixs_contours_contours_actual ): numpy.testing.assert_allclose(contour, actual_contour, rtol=1e-2, atol=1e-10) for com, actual_com in zip(ixs_contours_coms, ixs_contours_coms_actual): @@ -722,9 +746,9 @@ def test_cnmfe(): pytest.fail("Something wrong with setting UUID for batch items") assert ( - batch_dir.joinpath(df.iloc[-1]["input_movie_path"]) - == batch_dir.joinpath(df.iloc[0].mcorr.get_output_path()) - == df.paths.resolve(df.iloc[-1]["input_movie_path"]) + batch_dir.joinpath(df.iloc[-1]["input_movie_path"]) + == batch_dir.joinpath(df.iloc[0].mcorr.get_output_path()) + == df.paths.resolve(df.iloc[-1]["input_movie_path"]) ) process = df.iloc[-1].caiman.run() @@ -743,9 +767,9 @@ def test_cnmfe(): assert df.iloc[-1]["outputs"]["traceback"] is None assert ( - input_movie_path - == df.iloc[-1].caiman.get_input_movie_path() - == df.paths.resolve(df.iloc[-1]["input_movie_path"]) + input_movie_path + == df.iloc[-1].caiman.get_input_movie_path() + == df.paths.resolve(df.iloc[-1]["input_movie_path"]) ) assert batch_dir.joinpath( @@ -754,39 +778,39 @@ def test_cnmfe(): # test to check mmap output path assert ( - batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), - f'{df.iloc[-1]["uuid"]}_cnmf-memmap_d1_128_d2_128_d3_1_order_C_frames_1000.mmap', - ) - == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) - == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) + batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), + f'{df.iloc[-1]["uuid"]}_cnmf-memmap_d1_128_d2_128_d3_1_order_C_frames_1000.mmap', + ) + == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) + == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) ) # test to check mean-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy' + ) ) # test to check std-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy' + ) ) # test to check max-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy' + ) ) # extension tests - full @@ -810,9 +834,9 @@ def test_cnmfe(): # test to check cnmf get_output_path() assert ( - df.iloc[-1].cnmf.get_output_path() - == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) - == df.iloc[-1].paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) + df.iloc[-1].cnmf.get_output_path() + == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) + == df.iloc[-1].paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) ) # test to check cnmf get_output() @@ -840,11 +864,11 @@ def test_cnmfe(): allow_pickle=True, ) for contour, actual_contour in zip( - cnmfe_spatial_contours_contours, cnmfe_spatial_contours_contours_actual + cnmfe_spatial_contours_contours, cnmfe_spatial_contours_contours_actual ): numpy.testing.assert_allclose(contour, actual_contour, rtol=1e-2, atol=1e-10) for com, actual_com in zip( - cnmfe_spatial_contours_coms, cnmfe_spatial_contours_coms_actual + cnmfe_spatial_contours_coms, cnmfe_spatial_contours_coms_actual ): numpy.testing.assert_allclose(com, actual_com, rtol=1e-2, atol=1e-10) @@ -867,16 +891,21 @@ def test_cnmfe(): ) numpy.testing.assert_allclose( cnmfe_reconstructed_movie_AouterC.as_numpy(), - cnmfe_reconstructed_movie_AouterC_actual, rtol=1e2, atol=1e-1 + cnmfe_reconstructed_movie_AouterC_actual, + rtol=1e2, + atol=1e-1, ) # test to check get_rcb() cnmfe_reconstructed_background = df.iloc[-1].cnmf.get_rcb() cnmfe_reconstructed_background_actual = numpy.load( - ground_truths_dir.joinpath("cnmfe_full", "cnmfe_reconstructed_background.npy")) + ground_truths_dir.joinpath("cnmfe_full", "cnmfe_reconstructed_background.npy") + ) numpy.testing.assert_allclose( cnmfe_reconstructed_background.as_numpy(), - cnmfe_reconstructed_background_actual, rtol=1e-2, atol=1e-10 + cnmfe_reconstructed_background_actual, + rtol=1e-2, + atol=1e-10, ) # test to check get_residuals() @@ -885,9 +914,11 @@ def test_cnmfe(): # cnmfe_residuals_actual = numpy.load(ground_truths_dir.joinpath("cnmfe_full", "cnmfe_residuals.npy")) numpy.testing.assert_allclose( cnmfe_residuals.as_numpy(), - df.iloc[-1].caiman.get_input_movie() - cnmfe_reconstructed_movie_AouterC_actual - cnmfe_reconstructed_background_actual, + df.iloc[-1].caiman.get_input_movie() + - cnmfe_reconstructed_movie_AouterC_actual + - cnmfe_reconstructed_background_actual, rtol=1e2, - atol=1e-1 + atol=1e-1, ) # test to check passing optional ixs components to various functions @@ -917,7 +948,7 @@ def test_cnmfe(): allow_pickle=True, ) for contour, actual_contour in zip( - ixs_contours_contours, ixs_contours_contours_actual + ixs_contours_contours, ixs_contours_contours_actual ): numpy.testing.assert_allclose(contour, actual_contour, rtol=1e-2, atol=1e-10) for com, actual_com in zip(ixs_contours_coms, ixs_contours_coms_actual): @@ -1008,7 +1039,7 @@ def test_remove_item(): # remove index 1 df.caiman.remove_item(index=1, remove_data=True) - assert (path1.exists() == False) + assert path1.exists() == False assert df.isin([f"test1"]).any().any() == False # input movie path should be unaffected assert path1_input.exists() @@ -1027,12 +1058,12 @@ def test_remove_item(): assert df.iloc[2].cnmf.get_output_path().exists() df.iloc[2].cnmf.get_output() # check that the earlier data from index 3, now index 2, is equal - np.testing.assert_array_equal(data3, df.iloc[2].cnmf.get_temporal()) + np.testing.assert_array_equal(data3, df.iloc[2].cnmf.get_temporal()) np.testing.assert_raises( AssertionError, np.testing.assert_array_equal, data2, - df.iloc[2].cnmf.get_temporal() + df.iloc[2].cnmf.get_temporal(), ) @@ -1085,12 +1116,12 @@ def test_cache(): assert df.iloc[-1]["outputs"]["traceback"] is None assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["mcorr-output-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), - f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000.mmap', - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["mcorr-output-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), + f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000.mmap', + ) ) algo = "cnmf" @@ -1154,37 +1185,37 @@ def test_cache(): df.iloc[-1].cnmf.get_masks(np.arange(3)) time_stamp2 = cache[cache["function"] == "get_output"]["time_stamp"].item() hex2 = hex(id(cache[cache["function"] == "get_output"]["return_val"].item())) - assert (cache[cache["function"] == "get_output"].index.size == 1) + assert cache[cache["function"] == "get_output"].index.size == 1 # after adding enough items for cache to exceed max size, cache should remove least recently used items until # size is back under max - assert (len(cnmf.cnmf_cache.get_cache().index) == 17) + assert len(cnmf.cnmf_cache.get_cache().index) == 17 # the time stamp to get_output the second time should be greater than the original time # stamp because the cached item is being returned and therefore will have been accessed more recently - assert (time_stamp2 > time_stamp1) + assert time_stamp2 > time_stamp1 # the hex id of the item in the cache when get_output is first called # should be the same hex id of the item in the cache when get_output is called again - assert (hex1 == hex2) + assert hex1 == hex2 # test clear_cache() cnmf.cnmf_cache.clear_cache() - assert (len(cnmf.cnmf_cache.get_cache().index) == 0) + assert len(cnmf.cnmf_cache.get_cache().index) == 0 # checking that cache is cleared, checking speed at which item is returned start = time.time() df.iloc[-1].cnmf.get_output() end = time.time() - assert (len(cnmf.cnmf_cache.get_cache().index) == 1) + assert len(cnmf.cnmf_cache.get_cache().index) == 1 # second call to item now added to cache, time to return item should be must faster than before because item has # now been cached start2 = time.time() df.iloc[-1].cnmf.get_output() end2 = time.time() - assert(end2-start2 < end-start) + assert end2 - start2 < end - start # testing clear_cache() again, length of dataframe should be zero cnmf.cnmf_cache.clear_cache() - assert (len(cnmf.cnmf_cache.get_cache().index) == 0) + assert len(cnmf.cnmf_cache.get_cache().index) == 0 # test setting maxsize as 0, should effectively disable the cache...additionally, time to return an item called # twice should roughly be the same because item is not being stored in the cache @@ -1193,13 +1224,13 @@ def test_cache(): start = time.time() df.iloc[-1].cnmf.get_output() end = time.time() - assert (len(cnmf.cnmf_cache.get_cache().index) == 0) + assert len(cnmf.cnmf_cache.get_cache().index) == 0 start2 = time.time() df.iloc[-1].cnmf.get_output() end2 = time.time() - assert (len(cnmf.cnmf_cache.get_cache().index) == 0) - assert(abs((end-start)-(end2-start2)) < 0.05) + assert len(cnmf.cnmf_cache.get_cache().index) == 0 + assert abs((end - start) - (end2 - start2)) < 0.05 # test to check that separate cache items are being returned for different batch items # must add another item to the batch, running cnmfe @@ -1236,39 +1267,43 @@ def test_cache(): cnmf.cnmf_cache.set_maxsize("1M") - df.iloc[1].cnmf.get_output() # cnmf output - df.iloc[-1].cnmf.get_output() # cnmfe output + df.iloc[1].cnmf.get_output() # cnmf output + df.iloc[-1].cnmf.get_output() # cnmfe output cache = cnmf.cnmf_cache.get_cache() # checking that both outputs from different batch items are added to the cache - assert(len(cache.index) == 2) + assert len(cache.index) == 2 # checking that the uuid of each outputs from the different batch items are not the same - assert(cache.iloc[-1]["uuid"] != cache.iloc[-2]["uuid"]) + assert cache.iloc[-1]["uuid"] != cache.iloc[-2]["uuid"] # checking that the uuid of the output in the cache is the correct uuid of the batch item in the df - assert(cache.iloc[-1]["uuid"] == df.iloc[-1]["uuid"]) + assert cache.iloc[-1]["uuid"] == df.iloc[-1]["uuid"] # call get output from cnmf, check that it is the most recent thing called in the cache df.iloc[1].cnmf.get_output() cnmf_uuid = df.iloc[1]["uuid"] most_recently_called = cache.sort_values(by=["time_stamp"], ascending=True).iloc[-1] cache_uuid = most_recently_called["uuid"] - assert(cnmf_uuid == cache_uuid) + assert cnmf_uuid == cache_uuid # check to make sure by certain params that it is cnmf vs cnmfe output = df.iloc[1].cnmf.get_output() - assert(output.params.patch["low_rank_background"] == True) + assert output.params.patch["low_rank_background"] == True output2 = df.iloc[-1].cnmf.get_output() - assert(output2.params.patch["low_rank_background"] == False) - + assert output2.params.patch["low_rank_background"] == False + # test for copy # if return_copy=True, then hex id of calls to the same function should be false output = df.iloc[1].cnmf.get_output() - assert(hex(id(output)) != hex(id(cache.sort_values(by=["time_stamp"], ascending=True).iloc[-1]))) + assert hex(id(output)) != hex( + id(cache.sort_values(by=["time_stamp"], ascending=True).iloc[-1]) + ) # if return_copy=False, then hex id of calls to the same function should be true output = df.iloc[1].cnmf.get_output(return_copy=False) output2 = df.iloc[1].cnmf.get_output(return_copy=False) - assert(hex(id(output)) == hex(id(output2))) - assert(hex(id(cnmf.cnmf_cache.get_cache().iloc[-1]["return_val"])) == hex(id(output))) + assert hex(id(output)) == hex(id(output2)) + assert hex(id(cnmf.cnmf_cache.get_cache().iloc[-1]["return_val"])) == hex( + id(output) + ) From ec6a3445dcedf61c26a5820cb234dcfe40c542f8 Mon Sep 17 00:00:00 2001 From: Ethan Blackwood Date: Thu, 27 Feb 2025 00:23:48 -0500 Subject: [PATCH 05/10] Eliminated shadowing import of 'cnmf' in test_core, which only worked because CNMF is also imported in mesmerize_core.caiman_extensions.cnmf --- tests/test_core.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 499ca1b..956c16f 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -2,7 +2,7 @@ import numpy as np from caiman.utils.utils import load_dict_from_hdf5 -from caiman.source_extraction.cnmf import cnmf +from caiman.source_extraction.cnmf.cnmf import CNMF import numpy.testing import pandas as pd from mesmerize_core import ( @@ -530,7 +530,7 @@ def test_cnmf(): ) # test to check cnmf get_output() - assert isinstance(df.iloc[-1].cnmf.get_output(), cnmf.CNMF) + assert isinstance(df.iloc[-1].cnmf.get_output(), CNMF) # this doesn't work because some keys in the hdf5 file are # not always identical, like the path to the mmap file # assert sha1(open(df.iloc[1].cnmf.get_output_path(), "rb").read()).hexdigest() == sha1(open(ground_truths_dir.joinpath('cnmf', 'cnmf_output.hdf5'), "rb").read()).hexdigest() @@ -840,7 +840,7 @@ def test_cnmfe(): ) # test to check cnmf get_output() - assert isinstance(df.iloc[-1].cnmf.get_output(), cnmf.CNMF) + assert isinstance(df.iloc[-1].cnmf.get_output(), CNMF) # this doesn't work because some keys in the hdf5 file are # not always identical, like the path to the mmap file # assert sha1(open(df.iloc[1].cnmf.get_output_path(), "rb").read()).hexdigest() == sha1(open(ground_truths_dir.joinpath('cnmf', 'cnmf_output.hdf5'), "rb").read()).hexdigest() From 7f9245a21be52301c0b60cd8d5236de20e68fb42 Mon Sep 17 00:00:00 2001 From: Ethan Blackwood Date: Thu, 27 Feb 2025 01:38:52 -0500 Subject: [PATCH 06/10] Still getting cryptic macos platform errors, maybe wrong approach, try micromamba instead --- .github/workflows/macos-conda.yml | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/.github/workflows/macos-conda.yml b/.github/workflows/macos-conda.yml index ea75b62..bdd7a0b 100644 --- a/.github/workflows/macos-conda.yml +++ b/.github/workflows/macos-conda.yml @@ -16,21 +16,16 @@ jobs: steps: - uses: actions/checkout@v3 - - uses: conda-incubator/setup-miniconda@v3 + - uses: mamba-org/setup-micromamba@v2 with: - architecture: 'x64' - python-version: '3.10' - channels: conda-forge,defaults - channel-priority: true - activate-environment: mescore + condarc: "channel_priority: flexible" + environment-name: mescore environment-file: environment.yml - miniforge-version: latest + create-args: -c conda-forge -c defaults python=3.10 pytest - name: Test mesmerize-core with pytest shell: bash -el {0} run: | - conda activate mescore - mamba install pytest caimanmanager install pip install . DOWNLOAD_GROUND_TRUTHS=1 pytest -s . From f2afbd04cfe0a109cfd60c42c33dfab0bcdb85e7 Mon Sep 17 00:00:00 2001 From: Ethan Blackwood Date: Thu, 27 Feb 2025 02:09:02 -0500 Subject: [PATCH 07/10] Fix CONDA_EXE not defined in micromamba environment --- .github/workflows/macos-conda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/macos-conda.yml b/.github/workflows/macos-conda.yml index bdd7a0b..d762954 100644 --- a/.github/workflows/macos-conda.yml +++ b/.github/workflows/macos-conda.yml @@ -28,5 +28,5 @@ jobs: run: | caimanmanager install pip install . - DOWNLOAD_GROUND_TRUTHS=1 pytest -s . + CONDA_EXE="$MAMBA_EXE" DOWNLOAD_GROUND_TRUTHS=1 pytest -s . From e6513525ffaa545fe75cfe113be6142e970abf69 Mon Sep 17 00:00:00 2001 From: Ethan Blackwood Date: Thu, 27 Feb 2025 22:33:01 -0500 Subject: [PATCH 08/10] Add print statements to learn more about Windows cache issue --- tests/test_core.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/test_core.py b/tests/test_core.py index 956c16f..690e362 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1284,7 +1284,15 @@ def test_cache(): # call get output from cnmf, check that it is the most recent thing called in the cache df.iloc[1].cnmf.get_output() cnmf_uuid = df.iloc[1]["uuid"] - most_recently_called = cache.sort_values(by=["time_stamp"], ascending=True).iloc[-1] + cache_sorted = cache.sort_values(by=["time_stamp"], ascending=True) + print("Cache sorted from oldest to newest call:") + print(cache_sorted) + print("Call times:") + for _, row in cache_sorted.iterrows(): + print(f"{row['time_stamp']:.0f}", end=", ") + print("") + + most_recently_called = cache_sorted.iloc[-1] cache_uuid = most_recently_called["uuid"] assert cnmf_uuid == cache_uuid From bc329a17de15a07c41c3e7b68c220a99e9ab7816 Mon Sep 17 00:00:00 2001 From: Ethan Blackwood Date: Thu, 27 Feb 2025 23:59:16 -0500 Subject: [PATCH 09/10] Fix print statment --- tests/test_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_core.py b/tests/test_core.py index 690e362..c38db7c 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1289,7 +1289,7 @@ def test_cache(): print(cache_sorted) print("Call times:") for _, row in cache_sorted.iterrows(): - print(f"{row['time_stamp']:.0f}", end=", ") + print(f"{row['time_stamp']}", end=", ") print("") most_recently_called = cache_sorted.iloc[-1] From 626e6b30c10f0bd36df43a7ee6b76ea9d750c546 Mon Sep 17 00:00:00 2001 From: Ethan Blackwood Date: Fri, 28 Feb 2025 00:35:22 -0500 Subject: [PATCH 10/10] Increase tolerance for load time comparison and add sleep for 'most_recently_called' test --- tests/test_core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_core.py b/tests/test_core.py index c38db7c..7b7888a 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1230,7 +1230,7 @@ def test_cache(): df.iloc[-1].cnmf.get_output() end2 = time.time() assert len(cnmf.cnmf_cache.get_cache().index) == 0 - assert abs((end - start) - (end2 - start2)) < 0.05 + assert abs((end - start) - (end2 - start2)) < 0.1 # test to check that separate cache items are being returned for different batch items # must add another item to the batch, running cnmfe @@ -1282,6 +1282,7 @@ def test_cache(): assert cache.iloc[-1]["uuid"] == df.iloc[-1]["uuid"] # call get output from cnmf, check that it is the most recent thing called in the cache + time.sleep(0.01) # make absolutely sure the times aren't identical df.iloc[1].cnmf.get_output() cnmf_uuid = df.iloc[1]["uuid"] cache_sorted = cache.sort_values(by=["time_stamp"], ascending=True)