From b7bddc5b30e0afe6dc1209339af79f902bf37562 Mon Sep 17 00:00:00 2001 From: David Nicholson Date: Mon, 15 Mar 2021 22:30:33 -0400 Subject: [PATCH] TST/CI: refactor test suite to use TeenyTweetyNet, fix #330 generating test data: - rename tweetynet configs - add teenytweetynet configs - modify test_data_generate.py script - so it finds all configs - have it make model subdirectories in results/ - rewrite Makefile - differentiate 'all' and 'ci' generated test data - change commands, variables, urls to data refactoring tests: - add `model` parameters to fixtures: - to `specific_config` fixture - to `specific_config_toml` fixture - to `dataframe` fixtures - change `previous_run_path` fixture to `previous_run_path_factory` - use `model` as argument to `_previous_run_path` function returned by factory - add `models` command-line option for pytest + that will parametrize any test that specifies `models` fixture with whatever arguments are passed in at command line - use `model` fixture in tests - add model fixture to unit tests in test_core/ and test_cli/ - use `model` fixture in a test in test_labeled_timebins - use `previous_run_path_factory` with `model` in test_cli/test_learncurve.py - also add `default_model` fixture that is used whenever a model name is needed by other fixtures (e.g. `specific_config`) but the model shouldn't actually matter + idea is this model should work no matter where tests are run, CI v. locally + this is a code smell to me -- if tests don't depend on model then why do I need a "dummy model" + but not obvious to me right now how to disentangle them, and I just want to get the damn CI working CI: - have ci.yml download just test data generated for ci - have ci run pytest using command-line option models, and specifying only teenytweetynet for now (default but make it explicit anyway) - fix the `fix_prep_csv_paths` script so that it correctly finds prep csv files with the new test data directory names and structure other refactoring - move src/scripts to tests/scripts - rename test_data to data_for_tests + so it doesn't look like a sub-package of tests to `pytest` --- .github/workflows/ci.yml | 6 +- .gitignore | 4 +- Makefile | 91 ++++-- src/scripts/test_data/test_data_generate.py | 262 --------------- tests/conftest.py | 17 + tests/data_for_tests/configs/configs.json | 116 +++++++ .../configs/invalid_option_config.toml | 0 .../configs/invalid_section_config.toml | 0 .../invalid_train_and_learncurve_config.toml | 8 +- ...weetynet_eval_audio_cbin_annot_notmat.toml | 30 ++ ...et_learncurve_audio_cbin_annot_notmat.toml | 37 +++ ...tynet_predict_audio_cbin_annot_notmat.toml | 29 ++ ...tynet_predict_audio_wav_annot_koumura.toml | 29 ++ ...eetynet_train_audio_cbin_annot_notmat.toml | 35 ++ ...eetynet_train_audio_wav_annot_koumura.toml | 36 +++ ...weetynet_train_spect_mat_annot_yarden.toml | 34 ++ ...eetynet_eval_audio_cbin_annot_notmat.toml} | 8 +- ...t_learncurve_audio_cbin_annot_notmat.toml} | 8 +- ...ynet_predict_audio_cbin_annot_notmat.toml} | 8 +- ...ynet_predict_audio_wav_annot_koumura.toml} | 8 +- ...etynet_train_audio_cbin_annot_notmat.toml} | 8 +- ...etynet_train_audio_wav_annot_koumura.toml} | 10 +- ...eetynet_train_spect_mat_annot_yarden.toml} | 8 +- .../generated/.gitkeep | 0 .../source/.gitkeep | 0 tests/fixtures/__init__.py | 1 + tests/fixtures/config.py | 13 +- tests/fixtures/dataframe.py | 12 +- tests/fixtures/model.py | 12 + tests/fixtures/path.py | 18 +- tests/fixtures/test_data.py | 4 +- .../scripts}/fix_prep_csv_paths.py | 6 +- tests/scripts/generate_data_for_tests.py | 301 ++++++++++++++++++ tests/test_cli/test_eval.py | 2 + tests/test_cli/test_learncurve.py | 8 +- tests/test_cli/test_predict.py | 2 + tests/test_cli/test_prep.py | 6 + tests/test_cli/test_train.py | 2 + tests/test_config/test_config.py | 8 +- tests/test_config/test_parse.py | 29 +- tests/test_core/test_eval.py | 2 + tests/test_core/test_learncurve.py | 4 + tests/test_core/test_predict.py | 2 + tests/test_core/test_prep.py | 2 + tests/test_core/test_train.py | 2 + tests/test_data/configs/configs.json | 53 --- tests/test_files/test_files.py | 8 +- tests/test_labeled_timebins.py | 2 + 48 files changed, 876 insertions(+), 415 deletions(-) delete mode 100644 src/scripts/test_data/test_data_generate.py create mode 100644 tests/data_for_tests/configs/configs.json rename tests/{test_data => data_for_tests}/configs/invalid_option_config.toml (100%) rename tests/{test_data => data_for_tests}/configs/invalid_section_config.toml (100%) rename tests/{test_data => data_for_tests}/configs/invalid_train_and_learncurve_config.toml (68%) create mode 100644 tests/data_for_tests/configs/teenytweetynet_eval_audio_cbin_annot_notmat.toml create mode 100644 tests/data_for_tests/configs/teenytweetynet_learncurve_audio_cbin_annot_notmat.toml create mode 100644 tests/data_for_tests/configs/teenytweetynet_predict_audio_cbin_annot_notmat.toml create mode 100644 tests/data_for_tests/configs/teenytweetynet_predict_audio_wav_annot_koumura.toml create mode 100644 tests/data_for_tests/configs/teenytweetynet_train_audio_cbin_annot_notmat.toml create mode 100644 tests/data_for_tests/configs/teenytweetynet_train_audio_wav_annot_koumura.toml create mode 100644 tests/data_for_tests/configs/teenytweetynet_train_spect_mat_annot_yarden.toml rename tests/{test_data/configs/test_eval_audio_cbin_annot_notmat.toml => data_for_tests/configs/tweetynet_eval_audio_cbin_annot_notmat.toml} (69%) rename tests/{test_data/configs/test_learncurve_audio_cbin_annot_notmat.toml => data_for_tests/configs/tweetynet_learncurve_audio_cbin_annot_notmat.toml} (58%) rename tests/{test_data/configs/test_predict_audio_cbin_annot_notmat.toml => data_for_tests/configs/tweetynet_predict_audio_cbin_annot_notmat.toml} (66%) rename tests/{test_data/configs/test_predict_audio_wav_annot_koumura.toml => data_for_tests/configs/tweetynet_predict_audio_wav_annot_koumura.toml} (66%) rename tests/{test_data/configs/test_train_audio_cbin_annot_notmat.toml => data_for_tests/configs/tweetynet_train_audio_cbin_annot_notmat.toml} (54%) rename tests/{test_data/configs/test_train_audio_wav_annot_koumura.toml => data_for_tests/configs/tweetynet_train_audio_wav_annot_koumura.toml} (51%) rename tests/{test_data/configs/test_train_spect_mat_annot_yarden.toml => data_for_tests/configs/tweetynet_train_spect_mat_annot_yarden.toml} (56%) rename tests/{test_data => data_for_tests}/generated/.gitkeep (100%) rename tests/{test_data => data_for_tests}/source/.gitkeep (100%) create mode 100644 tests/fixtures/model.py rename {src/scripts/test_data => tests/scripts}/fix_prep_csv_paths.py (85%) create mode 100644 tests/scripts/generate_data_for_tests.py delete mode 100644 tests/test_data/configs/configs.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c1cd382ae..f0d9938b3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,7 +28,7 @@ jobs: - name: run tests run: | make test-data-download-source - make test-data-download-generate + make test-data-download-generated-ci poetry install - poetry run python ./src/scripts/test_data/fix_prep_csv_paths.py - poetry run pytest + poetry run python ./tests/scripts/fix_prep_csv_paths.py + poetry run pytest --models teenytweetynet diff --git a/.gitignore b/.gitignore index 0b2ad94a3..9f9c1c99a 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,6 @@ build/ doc/_build/ # test data -tests/test_data/source/ -tests/test_data/generated/ +tests/data_for_tests/source/ +tests/data_for_tests/generated/ *.tar.gz diff --git a/Makefile b/Makefile index f66551fdb..309ffdd84 100644 --- a/Makefile +++ b/Makefile @@ -1,33 +1,57 @@ -SOURCE_TEST_DATA_TAR=tests/test_data/source/source_test_data.tar.gz -SOURCE_TEST_DATA_URL=https://osf.io/7ru4s/download +TEST_DATA_GENERATE_SCRIPT=./tests/scripts/generate_data_for_tests.py -TEST_DATA_GENERATE_SCRIPT=./src/scripts/test_data/test_data_generate.py -GENERATED_TEST_DATA_TAR=tests/test_data/generated/generated_test_data.tar.gz -GENERATED_TEST_DATA_URL=https://osf.io/q76xd/download -GENERATED_TEST_DATA_TOP_LEVEL_DIRS=tests/test_data/generated/configs tests/test_data/generated/prep tests/test_data/generated/results +DATA_FOR_TESTS_DIR=./tests/data_for_tests/ +GENERATED_TEST_DATA_DIR=${DATA_FOR_TESTS_DIR}generated/ +CONFIGS_DIR=${GENERATED_TEST_DATA_DIR}configs +PREP_DIR=${GENERATED_TEST_DATA_DIR}prep/ +RESULTS_DIR=${GENERATED_TEST_DATA_DIR}results/ +RESULTS_CI=$(shell ls -d ${RESULTS_DIR}*/*/teenytweetynet) +GENERATED_TEST_DATA_CI_DIRS=${CONFIGS_DIR} ${PREP_DIR} ${RESULTS_CI} +GENERATED_TEST_DATA_ALL_DIRS=${GENERATED_TEST_DATA_CI_DIRS} $(shell ls -d ${RESULTS_DIR}/*/*/tweetynet) + +SOURCE_TEST_DATA_TAR=${DATA_FOR_TESTS_DIR}source/source_test_data.tar.gz +GENERATED_TEST_DATA_CI_TAR=${GENERATED_TEST_DATA_DIR}generated_test_data.ci.tar.gz +GENERATED_TEST_DATA_ALL_TAR=${GENERATED_TEST_DATA_DIR}generated_test_data.tar.gz + +SOURCE_TEST_DATA_URL=https://osf.io/s85vh/download +GENERATED_TEST_DATA_ALL_URL=https://osf.io/gt5xw/download +GENERATED_TEST_DATA_CI_URL=https://osf.io/u64nt/download help: @echo 'Makefile for vak ' @echo ' ' @echo 'Usage: ' - @echo ' make test-data-clean-source remove source test data ' - @echo ' make test-data-download-source download source test data ' - @echo ' make test-data-generate generate vak files used by tests from source data ' - @echo ' make test-data-clean-generate remove generated test data ' - @echo ' make test-data-tar-generate place generated test data in compressed tar file ' - @echo ' make test-data-download-generate download generated test data .tar and expand ' - @echo ' make variables show variables defined for Makefile ' + @echo ' make test-data-clean-source remove source test data ' + @echo ' make test-data-download-source download source test data ' + @echo ' make test-data-generate generate vak files used by tests from source data ' + @echo ' make test-data-clean-generated remove generated test data ' + @echo ' make test-data-tar-generated-all place all generated test data in compressed tar file ' + @echo ' make test-data-tar-generated-ci place generated test data for CI in compressed tar file ' + @echo ' make test-data-download-generated-all download .tar with all generated test data and expand ' + @echo ' make test-data-download-generated-ci download .tar with generated test data for CI and expand ' + @echo ' make variables show variables defined for Makefile ' variables: - @echo ' SOURCE_TEST_DATA_TAR : $(GENERATED_TEST_DATA_TAR) ' - @echo ' SOURCE_TEST_DATA_URL : $(GENERATED_TEST_DATA_URL) ' - @echo ' TESTS_DATA_GENERATE_SCRIPT : $(TEST_DATA_GENERATE_SCRIPT) ' - @echo ' GENERATED_TEST_DATA_TAR : $(GENERATED_TEST_DATA_TAR) ' - @echo ' GENERATED_TEST_DATA_URL : $(GENERATED_TEST_DATA_URL) ' - @echo ' GENERATED_TEST_DATA_TOP_LEVEL_DIRS : $(GENERATED_TEST_DATA_TOP_LEVEL_DIRS) ' + @echo ' TESTS_DATA_GENERATE_SCRIPT : $(TEST_DATA_GENERATE_SCRIPT) ' + @echo '' + @echo ' DATA_FOR_TESTS_DIR : $(DATA_FOR_TESTS_DIR) ' + @echo ' GENERATED_TEST_DATA_DIR : $(GENERATED_TEST_DATA_DIR) ' + @echo ' PREP_DIR : $(PREP_DIR) ' + @echo ' RESULTS_DIR : $(RESULTS_DIR) ' + @echo ' RESULTS_CI : $(RESULTS_CI) ' + @echo ' GENERATED_TEST_DATA_CI_DIRS : $(GENERATED_TEST_DATA_CI_DIRS) ' + @echo ' GENERATED_TEST_DATA_ALL_DIRS : $(GENERATED_TEST_DATA_ALL_DIRS) ' + @echo '' + @echo ' SOURCE_TEST_DATA_TAR : $(SOURCE_TEST_DATA_TAR) ' + @echo ' GENERATED_TEST_DATA_CI_TAR : $(GENERATED_TEST_DATA_CI_TAR) ' + @echo ' GENERATED_TEST_DATA_ALL_TAR : $(GENERATED_TEST_DATA_ALL_TAR) ' + @echo '' + @echo ' SOURCE_TEST_DATA_URL : $(SOURCE_TEST_DATA_URL) ' + @echo ' GENERATED_TEST_DATA_ALL_URL : $(GENERATED_TEST_DATA_ALL_URL) ' + @echo ' GENERATED_TEST_DATA_CI_URL : $(GENERATED_TEST_DATA_CI_URL) ' test-data-clean-source: - rm -rfv ./tests/test_data/source/* + rm -rfv ./tests/data_for_tests/source/* test-data-download-source: wget -q $(SOURCE_TEST_DATA_URL) -O $(SOURCE_TEST_DATA_TAR) @@ -36,14 +60,25 @@ test-data-download-source: test-data-generate : $(TEST_DATA_GENERATE_SCRIPT) poetry run python $(TEST_DATA_GENERATE_SCRIPT) -test-data-clean-generate : - rm -rfv ./tests/test_data/generated/* +test-data-clean-generated : + rm -rfv ./tests/data_for_tests/generated/* + +test-data-tar-generated-all: + tar -czvf $(GENERATED_TEST_DATA_ALL_TAR) $(GENERATED_TEST_DATA_ALL_DIRS) + +test-data-tar-generated-ci: + tar -czvf $(GENERATED_TEST_DATA_CI_TAR) $(GENERATED_TEST_DATA_CI_DIRS) -test-data-tar-generate: - tar -czvf $(GENERATED_TEST_DATA_TAR) $(GENERATED_TEST_DATA_TOP_LEVEL_DIRS) +test-data-download-generated-all: + wget -q $(GENERATED_TEST_DATA_ALL_URL) -O $(GENERATED_TEST_DATA_ALL_TAR) + tar -xzf $(GENERATED_TEST_DATA_ALL_TAR) -test-data-download-generate: - wget -q $(GENERATED_TEST_DATA_URL) -O $(GENERATED_TEST_DATA_TAR) - tar -xzf $(GENERATED_TEST_DATA_TAR) +test-data-download-generated-ci: + wget -q $(GENERATED_TEST_DATA_CI_URL) -O $(GENERATED_TEST_DATA_CI_TAR) + tar -xzf $(GENERATED_TEST_DATA_CI_TAR) -.PHONY: help variables test-data-clean-source test-data-download-source test-data-generate test-data-clean-generate test-data-tar-generate test-data-download-generate +.PHONY: help variables \ + test-data-clean-source test-data-download-source \ + test-data-generate test-data-clean-generated \ + test-data-tar-generated-all test-data-tar-generated-all \ + test-data-download-generated-all test-data-download-generated-ci diff --git a/src/scripts/test_data/test_data_generate.py b/src/scripts/test_data/test_data_generate.py deleted file mode 100644 index 0d6a4038c..000000000 --- a/src/scripts/test_data/test_data_generate.py +++ /dev/null @@ -1,262 +0,0 @@ -"""script run by Makefile test-data-generate command - -makes all the 'generated' test data, i.e. files created by vak, -It's called 'generated' test data to distinguish it from the -'source' test data, i.e., files **not** created by vak, that is, -the input data used when vak does create files (csv files, logs, -neural network checkpoints, etc.) - -This script generates: -* temporary config.toml files used when generating results -* `prep`d (prepared) datasets, and results created with those datasets, - both of which were generated using the temporary config.toml files - -all the setup configs send output to one of two places: -for any prep command, the output goes to some child directory of ./tests/test_data/generated/prep -for any command run with a `prep`d dataset, the output goes to some child dir of ./tests/test_data/generated/results - -examples: - when we run `vak prep tests/test_data/generated/configs/test_train_audio_wav_annot_koumura.toml` - the `prep`d dataset will be in a new directory created in - `./tests/test_data/generated/prep/train/audio_wav_annot_koumura` - - when we run `vak train tests/test_data/genereated/configs/test_train_audio_wav_annot_koumura.toml` - it will use the `prep`d dataset csv that's now in - `./tests/test_data/generated/prep/train/audio_wav_annot_koumura`, that the temporary config.toml points to, - and the results will go to a new directory created in - `./tests/test_data/generated/results/train/audio_wav_annot_koumura` - -To set up this directory structure, we iterate through the constants defined below. - -The constants are: -- TOP_LEVEL_DIRS - name of sub-directories in .tests/test_data/generated that correspond to - either tempory config files, 'prep'd datasets, or `results` generated from those `prep`d datasets -- COMMAND_DIRS - names of sub-sub-directories in ./tests/test_data/generated/prep & - ./tests/test_data/generated/results that correspond to cli commands; - e.g., dataset from running `vak prep $A_TRAIN_CONFIG.TOML` will be in - ./tests/test_data/generated/prep/train/audio_{}_annot_{} -- DATA_DIRS - names of "subsub"directories in ./tests/test_data/$COMMAND that correspond to toy test data sets; - those sub-directories contain test data generated by $COMMAND using the specified toy test data set - -In other words, the parent directory for all the generated directories we need to remove -will have a path of the form: `./tests/test_data/$TOP_LEVEL_DIRS/COMMAND_DIRS/DATA_DIRS`. -For example: -The prep`d dataset from running `vak prep $AUDIO_CBIN_ANNOT_NOTMAT_CONFIG.TOML` will be in -`./tests/test_data/prep/train/audio_cbin_annot_notmat` -and corresponding results will be in -`./tests/test_data/results/train/audio_cbin_annot_notmat` -The directories will have names with timestamps like `prep_20201015_1115`. -Those are the generated directories we want to remove. -""" -from pathlib import Path -import shutil - -import toml -import vak - -HERE = Path(__file__).parent -TESTS_ROOT = HERE.joinpath('../../../tests') -TEST_DATA_ROOT = TESTS_ROOT.joinpath('test_data') -GENERATED_TEST_DATA = TEST_DATA_ROOT.joinpath('generated') -GENERATED_TEST_CONFIGS_ROOT = GENERATED_TEST_DATA.joinpath('configs') - -# convention is that all the config.toml files in tests/test_data/configs -# that should be run when generating test data -# have filenames of the form `test_{COMMAND}_audio_{FORMAT}_annot_{FORMAT}.toml' -# **or** `test_{COMMAND}_spect_{FORMAT}_annot_{FORMAT}_config.ini' -# e.g., 'test_learncurve_audio_cbin_annot_notmat.toml' -TEST_CONFIGS_ROOT = TEST_DATA_ROOT.joinpath('configs') -CONFIGS_TO_RUN = TEST_CONFIGS_ROOT.glob('test*.toml') - -# the sub-directories that will get made inside `./tests/test_data/generated` -TOP_LEVEL_DIRS = [ - 'configs', - 'prep', - 'results', -] - -# these sub-dirs get made in each of the TOP_LEVEL_DIRS (except for 'configs') -COMMAND_DIRS = [ - 'eval', - 'learncurve', - 'predict', - 'train', -] - -# these sub-dirs get made in each of the COMMAND_DIRS (except for 'configs') -DATA_DIRS = [ - 'audio_cbin_annot_notmat', - 'audio_wav_annot_koumura', - 'spect_mat_annot_yarden', -] - - -def make_subdirs_in_generated(): - """make sub-directories inside ./tests/test_data/generated - - first thing that has to get done before copying configs and - then using those configs to generate results - """ - for top_level_dir in TOP_LEVEL_DIRS: - if top_level_dir == 'configs': - subdir_to_make = GENERATED_TEST_DATA / top_level_dir - subdir_to_make.mkdir(parents=True) - else: - for command_dir in COMMAND_DIRS: - for data_dir in DATA_DIRS: - subdir_to_make = GENERATED_TEST_DATA / top_level_dir / command_dir / data_dir - subdir_to_make.mkdir(parents=True) - - -def copy_config_files(): - """copy config files from setup to test_data/configs - - the copied files are the ones that get modified when this setup script runs, - while the originals in this directory remain unchanged. - """ - for toml_path in CONFIGS_TO_RUN: - if not toml_path.exists(): - raise FileNotFoundError( - f'{toml_path} not found') - - dst = GENERATED_TEST_CONFIGS_ROOT.joinpath(toml_path.name) - print(f"\tcopying to {dst}") - shutil.copy(src=toml_path, dst=dst) - - -def run_prep(test_config_paths): - """run ``vak prep`` for all test configs""" - for test_config_path in test_config_paths: - if not test_config_path.exists(): - raise FileNotFoundError( - f'{test_config_path} not found') - print(f"re-running vak prep to set up for test, using config: {test_config_path.name}") - vak.cli.prep.prep(toml_path=test_config_path) - - -def run_results(test_config_paths): - """run ``vak {command}`` for all test configs, - where {command} is determined from the config file name - """ - for test_config_path in test_config_paths: - if 'train' in test_config_path.name: - vak.cli.train.train(toml_path=test_config_path) - elif 'eval' in test_config_path.name: - vak.cli.eval.eval(toml_path=test_config_path) - elif 'predict' in test_config_path.name: - vak.cli.predict.predict(toml_path=test_config_path) - elif 'learncurve' in test_config_path.name: - vak.cli.learncurve.learning_curve(toml_path=test_config_path) - else: - raise ValueError( - f'unable to determine command to run from config name:\n{test_config_path}' - ) - - -def fix_options_in_configs(test_config_paths, command): - """fix values assigned to options in predict and eval configs - - Need to do this because both predict and eval configs have options - that can only be assigned *after* running the corresponding `train` config - """ - # split configs into train and predict or eval configs - configs_to_fix = [test_config for test_config in test_config_paths if command in test_config.name] - train_configs = [test_config for test_config in test_config_paths if 'train' in test_config.name] - - for config_to_fix in configs_to_fix: - # figure out which 'train' config corresponds to this 'predict' or 'eval' config - # by using 'suffix' of config file names. `train` suffix will match `predict`/'eval' suffix - prefix, suffix = config_to_fix.name.split(command) - train_config_to_use = [] - for train_config in train_configs: - train_prefix, train_suffix = train_config.name.split('train') - if train_suffix == suffix: - train_config_to_use.append(train_config) - if len(train_config_to_use) != 1: - raise ValueError( - f'did not find just a single train config that matches with predict config:\n' - f'{config_to_fix}' - f'Matches were: {train_config_to_use}' - ) - train_config_to_use = train_config_to_use[0] - - # now use the config to find the results dir and get the values for the options we need to set - # which are checkpoint_path, spect_scaler_path, and labelmap_path - with train_config_to_use.open('r') as fp: - train_config_toml = toml.load(fp) - root_results_dir = Path(train_config_toml['TRAIN']['root_results_dir']) - results_dir = sorted(root_results_dir.glob('results_*')) - if len(results_dir) != 1: - raise ValueError( - f'did not find just a single results directory in root_results_dir from train_config:\n' - f'{train_config_to_use}' - f'root_results_dir was: {root_results_dir}' - f'Matches for "results_*" were: {results_dir}' - ) - results_dir = results_dir[0] - # these are the only options whose values we need to change - # and they are the same for both predict and eval - checkpoint_path = sorted(results_dir.glob('**/checkpoints/checkpoint.pt'))[0] - spect_scaler_path = sorted(results_dir.glob('StandardizeSpect'))[0] - labelmap_path = sorted(results_dir.glob('labelmap.json'))[0] - - # now add these values to corresponding options in predict / eval config - with config_to_fix.open('r') as fp: - config_toml = toml.load(fp) - config_toml[command.upper()]['checkpoint_path'] = str(checkpoint_path) - config_toml[command.upper()]['spect_scaler_path'] = str(spect_scaler_path) - config_toml[command.upper()]['labelmap_path'] = str(labelmap_path) - with config_to_fix.open('w') as fp: - toml.dump(config_toml, fp) - - -# need to run 'train' config before we run 'predict' -# so we can add checkpoints, etc., from training to predict -COMMANDS = ( - 'train', - 'learncurve', - 'eval', - 'predict', -) - - -def main(): - print('making sub-directories in ./tests/test_data/generated/ where files generated by `vak` will go') - make_subdirs_in_generated() - - print('copying config files run to generate test data from ./tests/test_data/configs to ' - './tests/test_data/generated/configs') - copy_config_files() - - test_config_paths = sorted( - GENERATED_TEST_CONFIGS_ROOT.glob('test*toml') - ) - print( - f'will generate test data from these test config files: {test_config_paths}' - ) - for command in COMMANDS: - print( - f'running configs for command: {command}' - ) - command_config_paths = [test_config_path - for test_config_path in test_config_paths - if command in test_config_path.name] - print( - f'using the following configs:\n{command_config_paths}' - ) - if command == 'predict' or command == 'eval': - # fix values for required options in predict / eval configs - # using results from running the corresponding train configs. - # this only works if we ran the train configs already, - # which we should have because of ordering of COMMANDS constant above - fix_options_in_configs(test_config_paths, command) - - run_prep(test_config_paths=command_config_paths) - run_results(test_config_paths=command_config_paths) - - -if __name__ == '__main__': - main() diff --git a/tests/conftest.py b/tests/conftest.py index d4bb20901..362698856 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1 +1,18 @@ from .fixtures import * + + +def pytest_addoption(parser): + parser.addoption( + "--models", action="store", default="teenytweetynet", nargs='+', + help="vak models to test, space-separated list of names" + ) + + +def pytest_generate_tests(metafunc): + models = metafunc.config.option.models + if isinstance(models, str): + # wrap a single model name in a list + models = [models] + # **note!** fixture name is singular even though cmdopt is plural + if 'model' in metafunc.fixturenames and models is not None: + metafunc.parametrize("model", models) diff --git a/tests/data_for_tests/configs/configs.json b/tests/data_for_tests/configs/configs.json new file mode 100644 index 000000000..6daa30c59 --- /dev/null +++ b/tests/data_for_tests/configs/configs.json @@ -0,0 +1,116 @@ +{ + "configs": [ + { + "filename": "tweetynet_eval_audio_cbin_annot_notmat.toml", + "model": "tweetynet", + "config_type": "eval", + "audio_format": "cbin", + "spect_format": null, + "annot_format": "notmat" + }, + { + "filename": "tweetynet_learncurve_audio_cbin_annot_notmat.toml", + "model": "tweetynet", + "config_type": "learncurve", + "audio_format": "cbin", + "spect_format": null, + "annot_format": "notmat" + }, + { + "filename": "tweetynet_predict_audio_cbin_annot_notmat.toml", + "model": "tweetynet", + "config_type": "predict", + "audio_format": "cbin", + "spect_format": null, + "annot_format": "notmat" + }, + { + "filename": "tweetynet_predict_audio_wav_annot_koumura.toml", + "model": "tweetynet", + "config_type": "predict", + "audio_format": "wav", + "spect_format": null, + "annot_format": "koumura" + }, + { + "filename": "tweetynet_train_audio_cbin_annot_notmat.toml", + "model": "tweetynet", + "config_type": "train", + "audio_format": "cbin", + "spect_format": null, + "annot_format": "notmat" + }, + { + "filename": "tweetynet_train_audio_wav_annot_koumura.toml", + "model": "tweetynet", + "config_type": "train", + "audio_format": "wav", + "spect_format": null, + "annot_format": "koumura" + }, + { + "filename": "tweetynet_train_spect_mat_annot_yarden.toml", + "model": "tweetynet", + "config_type": "train", + "audio_format": null, + "spect_format": "mat", + "annot_format": "yarden" + }, + { + "filename": "teenytweetynet_eval_audio_cbin_annot_notmat.toml", + "model": "teenytweetynet", + "config_type": "eval", + "audio_format": "cbin", + "spect_format": null, + "annot_format": "notmat" + }, + { + "filename": "teenytweetynet_learncurve_audio_cbin_annot_notmat.toml", + "model": "teenytweetynet", + "config_type": "learncurve", + "audio_format": "cbin", + "spect_format": null, + "annot_format": "notmat" + }, + { + "filename": "teenytweetynet_predict_audio_cbin_annot_notmat.toml", + "model": "teenytweetynet", + "config_type": "predict", + "audio_format": "cbin", + "spect_format": null, + "annot_format": "notmat" + }, + { + "filename": "teenytweetynet_predict_audio_wav_annot_koumura.toml", + "model": "teenytweetynet", + "config_type": "predict", + "audio_format": "wav", + "spect_format": null, + "annot_format": "koumura" + }, + { + "filename": "teenytweetynet_train_audio_cbin_annot_notmat.toml", + "model": "teenytweetynet", + "config_type": "train", + "audio_format": "cbin", + "spect_format": null, + "annot_format": "notmat" + }, + { + "filename": "teenytweetynet_train_audio_wav_annot_koumura.toml", + "model": "teenytweetynet", + "config_type": "train", + "audio_format": "wav", + "spect_format": null, + "annot_format": "koumura" + }, + { + "filename": "teenytweetynet_train_spect_mat_annot_yarden.toml", + "model": "teenytweetynet", + "config_type": "train", + "audio_format": null, + "spect_format": "mat", + "annot_format": "yarden" + } + ] +} \ No newline at end of file diff --git a/tests/test_data/configs/invalid_option_config.toml b/tests/data_for_tests/configs/invalid_option_config.toml similarity index 100% rename from tests/test_data/configs/invalid_option_config.toml rename to tests/data_for_tests/configs/invalid_option_config.toml diff --git a/tests/test_data/configs/invalid_section_config.toml b/tests/data_for_tests/configs/invalid_section_config.toml similarity index 100% rename from tests/test_data/configs/invalid_section_config.toml rename to tests/data_for_tests/configs/invalid_section_config.toml diff --git a/tests/test_data/configs/invalid_train_and_learncurve_config.toml b/tests/data_for_tests/configs/invalid_train_and_learncurve_config.toml similarity index 68% rename from tests/test_data/configs/invalid_train_and_learncurve_config.toml rename to tests/data_for_tests/configs/invalid_train_and_learncurve_config.toml index 130e0f888..40d27e9a1 100644 --- a/tests/test_data/configs/invalid_train_and_learncurve_config.toml +++ b/tests/data_for_tests/configs/invalid_train_and_learncurve_config.toml @@ -1,6 +1,6 @@ [PREP] -data_dir = "./tests/test_data/source/cbins/gy6or6/032312" -output_dir = "./tests/test_data/generated/prep/train/audio_cbin_annot_notmat" +data_dir = "./tests/data_for_tests/source/cbins/gy6or6/032312" +output_dir = "./tests/data_for_tests/generated/prep/train/audio_cbin_annot_notmat" audio_format = "cbin" annot_format = "notmat" labelset = "iabcdefghjk" @@ -30,7 +30,7 @@ ckpt_step = 200 patience = 4 num_workers = 4 device = "cuda" -root_results_dir = "./tests/test_data/generated/results/train/audio_cbin_annot_notmat" +root_results_dir = "./tests/data_for_tests/generated/results/train/audio_cbin_annot_notmat" [LEARNCURVE] models = 'TweetyNet' @@ -44,7 +44,7 @@ num_workers = 4 train_set_durs = [ 4, 6 ] num_replicates = 2 device = "cuda" -root_results_dir = './tests/test_data/generated/results/learncurve/audio_cbin_annot_notmat' +root_results_dir = './tests/data_for_tests/generated/results/learncurve/audio_cbin_annot_notmat' [TweetyNet.optimizer] lr = 0.001 diff --git a/tests/data_for_tests/configs/teenytweetynet_eval_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/teenytweetynet_eval_audio_cbin_annot_notmat.toml new file mode 100644 index 000000000..4d2567940 --- /dev/null +++ b/tests/data_for_tests/configs/teenytweetynet_eval_audio_cbin_annot_notmat.toml @@ -0,0 +1,30 @@ +[PREP] +labelset = "iabcdefghjk" +data_dir = "./tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032412" +output_dir = "./tests/data_for_tests/generated/prep/eval/audio_cbin_annot_notmat" +audio_format = "cbin" +annot_format = "notmat" +spect_output_dir = "./tests/data_for_tests/generated/prep/eval/audio_cbin_annot_notmat" + +[SPECT_PARAMS] +fft_size = 512 +step_size = 64 +freq_cutoffs = [ 500, 10000,] +thresh = 6.25 +transform_type = "log_spect" + +[DATALOADER] +window_size = 44 + +[EVAL] +checkpoint_path = "~/Documents/repos/coding/birdsong/TeenyTweetyNet/results/BFSongRepository/gy6or6/results_200620_165308/TeenyTweetyNet/checkpoints/max-val-acc-checkpoint.pt" +labelmap_path = "~/Documents/repos/coding/birdsong/TeenyTweetyNet/results/BFSongRepository/gy6or6/results_200620_165308/labelmap.json" +models = "TeenyTweetyNet" +batch_size = 4 +num_workers = 4 +device = "cuda" +spect_scaler_path = "~/Documents/repos/coding/birdsong/TeenyTweetyNet/results/BFSongRepository/gy6or6/results_200620_165308/StandardizeSpect" +output_dir = "./tests/data_for_tests/generated/results/eval/audio_cbin_annot_notmat/teenytweetynet" + +[TeenyTweetyNet.optimizer] +lr = 0.001 diff --git a/tests/data_for_tests/configs/teenytweetynet_learncurve_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/teenytweetynet_learncurve_audio_cbin_annot_notmat.toml new file mode 100644 index 000000000..ff85ca497 --- /dev/null +++ b/tests/data_for_tests/configs/teenytweetynet_learncurve_audio_cbin_annot_notmat.toml @@ -0,0 +1,37 @@ +[PREP] +data_dir = './tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032312' +output_dir = './tests/data_for_tests/generated/prep/learncurve/audio_cbin_annot_notmat' +audio_format = 'cbin' +annot_format = 'notmat' +spect_output_dir = "./tests/data_for_tests/generated/prep/learncurve/audio_cbin_annot_notmat" +labelset = 'iabcdefghjk' +train_dur = 50 +val_dur = 15 +test_dur = 30 + +[SPECT_PARAMS] +fft_size = 512 +step_size = 64 +freq_cutoffs = [ 500, 10000 ] +thresh = 6.25 +transform_type = 'log_spect' + +[DATALOADER] +window_size = 44 + +[LEARNCURVE] +models = 'TeenyTweetyNet' +normalize_spectrograms = true +batch_size = 4 +num_epochs = 2 +val_step = 50 +ckpt_step = 200 +patience = 3 +num_workers = 4 +train_set_durs = [ 4, 6 ] +num_replicates = 2 +device = "cuda" +root_results_dir = './tests/data_for_tests/generated/results/learncurve/audio_cbin_annot_notmat/teenytweetynet' + +[TeenyTweetyNet] +optimizer.lr = 0.001 diff --git a/tests/data_for_tests/configs/teenytweetynet_predict_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/teenytweetynet_predict_audio_cbin_annot_notmat.toml new file mode 100644 index 000000000..8ecd749d2 --- /dev/null +++ b/tests/data_for_tests/configs/teenytweetynet_predict_audio_cbin_annot_notmat.toml @@ -0,0 +1,29 @@ +[PREP] +data_dir = "./tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032412" +output_dir = "./tests/data_for_tests/generated/prep/predict/audio_cbin_annot_notmat" +audio_format = "cbin" +spect_output_dir = "./tests/data_for_tests/generated/prep/predict/audio_cbin_annot_notmat" + +[SPECT_PARAMS] +fft_size = 512 +step_size = 64 +freq_cutoffs = [ 500, 10000 ] +thresh = 6.25 +transform_type = 'log_spect' + +[DATALOADER] +window_size = 44 + +[PREDICT] +spect_scaler_path = "/home/user/results_181014_194418/spect_scaler" +checkpoint_path = "~/Documents/repos/coding/birdsong/TeenyTweetyNet/results/BFSongRepository/bl26lb16/results_200620_164245/TeenyTweetyNet/checkpoints/max-val-acc-checkpoint.pt" +labelmap_path = "~/Documents/repos/coding/birdsong/TeenyTweetyNet/results/BFSongRepository/bl26lb16/results_200620_164245/labelmap.json" +models = "TeenyTweetyNet" +batch_size = 4 +num_workers = 4 +device = "cuda" +output_dir = "./tests/data_for_tests/generated/results/predict/audio_cbin_annot_notmat/teenytweetynet" +annot_csv_filename = "bl26lb16.041912.annot.csv" + +[TeenyTweetyNet.optimizer] +lr = 0.001 diff --git a/tests/data_for_tests/configs/teenytweetynet_predict_audio_wav_annot_koumura.toml b/tests/data_for_tests/configs/teenytweetynet_predict_audio_wav_annot_koumura.toml new file mode 100644 index 000000000..432877e48 --- /dev/null +++ b/tests/data_for_tests/configs/teenytweetynet_predict_audio_wav_annot_koumura.toml @@ -0,0 +1,29 @@ +[PREP] +data_dir = "./tests/data_for_tests/source/audio_wav_annot_koumura/Bird0" +output_dir = "./tests/data_for_tests/generated/prep/predict/audio_wav_annot_koumura" +audio_format = "wav" +spect_output_dir = "./tests/data_for_tests/generated/prep/predict/audio_wav_annot_koumura" + +[SPECT_PARAMS] +fft_size = 512 +step_size = 64 +freq_cutoffs = [ 500, 10000 ] +thresh = 6.25 +transform_type = 'log_spect' + +[DATALOADER] +window_size = 44 + +[PREDICT] +spect_scaler_path = "/home/user/results_181014_194418/spect_scaler" +checkpoint_path = "~/Documents/repos/coding/birdsong/TeenyTweetyNet/results/BFSongRepository/bl26lb16/results_200620_164245/TeenyTweetyNet/checkpoints/max-val-acc-checkpoint.pt" +labelmap_path = "~/Documents/repos/coding/birdsong/TeenyTweetyNet/results/BFSongRepository/bl26lb16/results_200620_164245/labelmap.json" +models = "TeenyTweetyNet" +batch_size = 4 +num_workers = 4 +device = "cuda" +output_dir = "./tests/data_for_tests/generated/results/predict/audio_wav_annot_koumura/teenytweetynet" +annot_csv_filename = "Bird0.annot.csv" + +[TeenyTweetyNet.optimizer] +lr = 0.001 diff --git a/tests/data_for_tests/configs/teenytweetynet_train_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/teenytweetynet_train_audio_cbin_annot_notmat.toml new file mode 100644 index 000000000..341af0c04 --- /dev/null +++ b/tests/data_for_tests/configs/teenytweetynet_train_audio_cbin_annot_notmat.toml @@ -0,0 +1,35 @@ +[PREP] +data_dir = "./tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032312" +output_dir = "./tests/data_for_tests/generated/prep/train/audio_cbin_annot_notmat" +audio_format = "cbin" +annot_format = "notmat" +spect_output_dir = "./tests/data_for_tests/generated/prep/train/audio_cbin_annot_notmat" +labelset = "iabcdefghjk" +train_dur = 50 +val_dur = 15 +test_dur = 30 + +[SPECT_PARAMS] +fft_size=512 +step_size=64 +freq_cutoffs = [500, 10000] +thresh = 6.25 +transform_type = "log_spect" + +[DATALOADER] +window_size = 44 + +[TRAIN] +models = "TeenyTweetyNet" +normalize_spectrograms = true +batch_size = 4 +num_epochs = 2 +val_step = 50 +ckpt_step = 200 +patience = 3 +num_workers = 4 +device = "cuda" +root_results_dir = "./tests/data_for_tests/generated/results/train/audio_cbin_annot_notmat/teenytweetynet" + +[TeenyTweetyNet.optimizer] +lr = 0.001 diff --git a/tests/data_for_tests/configs/teenytweetynet_train_audio_wav_annot_koumura.toml b/tests/data_for_tests/configs/teenytweetynet_train_audio_wav_annot_koumura.toml new file mode 100644 index 000000000..7306ee7bf --- /dev/null +++ b/tests/data_for_tests/configs/teenytweetynet_train_audio_wav_annot_koumura.toml @@ -0,0 +1,36 @@ +[PREP] +labelset = '012345678' +data_dir = "./tests/data_for_tests/source/audio_wav_annot_koumura/Bird0" +output_dir = "./tests/data_for_tests/generated/prep/train/audio_wav_annot_koumura" +audio_format = "wav" +spect_output_dir = "./tests/data_for_tests/generated/prep/train/audio_wav_annot_koumura" +annot_format = "koumura" +annot_file = "./tests/data_for_tests/source/audio_wav_annot_koumura/Bird0/Annotation.xml" +test_dur = 50 +train_dur = 15 +val_dur = 30 + +[SPECT_PARAMS] +fft_size = 512 +step_size = 64 +freq_cutoffs = [500, 10000,] +thresh = 6.25 +transform_type = "log_spect" + +[DATALOADER] +window_size = 44 + +[TRAIN] +models = "TeenyTweetyNet" +normalize_spectrograms = true +batch_size = 4 +num_epochs = 2 +val_step = 50 +ckpt_step = 200 +patience = 3 +num_workers = 4 +device = "cuda" +root_results_dir = "./tests/data_for_tests/generated/results/train/audio_wav_annot_koumura/teenytweetynet" + +[TeenyTweetyNet.optimizer] +lr = 0.001 diff --git a/tests/data_for_tests/configs/teenytweetynet_train_spect_mat_annot_yarden.toml b/tests/data_for_tests/configs/teenytweetynet_train_spect_mat_annot_yarden.toml new file mode 100644 index 000000000..db5a720cd --- /dev/null +++ b/tests/data_for_tests/configs/teenytweetynet_train_spect_mat_annot_yarden.toml @@ -0,0 +1,34 @@ +[PREP] +data_dir = "./tests/data_for_tests/source/spect_mat_annot_yarden/llb3/spect" +output_dir = "./tests/data_for_tests/generated/prep/train/spect_mat_annot_yarden" +spect_format = "mat" +annot_format = "yarden" +annot_file = "./tests/data_for_tests/source/spect_mat_annot_yarden/llb3/llb3_annot_subset.mat" +labelset = "range: 1-3,6-14,17-19" +train_dur = 213 +val_dur = 213 + +[SPECT_PARAMS] +fft_size = 512 +step_size = 64 +freq_cutoffs = [500, 10000] +thresh = 6.25 +transform_type = "log_spect" + +[DATALOADER] +window_size = 44 + +[TRAIN] +models = "TeenyTweetyNet" +normalize_spectrograms = false +batch_size = 4 +num_epochs = 2 +val_step = 50 +ckpt_step = 200 +patience = 3 +num_workers = 4 +device = "cuda" +root_results_dir = "./tests/data_for_tests/generated/results/train/spect_mat_annot_yarden/teenytweetynet" + +[TeenyTweetyNet.optimizer] +lr = 0.001 diff --git a/tests/test_data/configs/test_eval_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/tweetynet_eval_audio_cbin_annot_notmat.toml similarity index 69% rename from tests/test_data/configs/test_eval_audio_cbin_annot_notmat.toml rename to tests/data_for_tests/configs/tweetynet_eval_audio_cbin_annot_notmat.toml index 465d46b7b..601295acb 100644 --- a/tests/test_data/configs/test_eval_audio_cbin_annot_notmat.toml +++ b/tests/data_for_tests/configs/tweetynet_eval_audio_cbin_annot_notmat.toml @@ -1,10 +1,10 @@ [PREP] labelset = "iabcdefghjk" -data_dir = "./tests/test_data/source/audio_cbin_annot_notmat/gy6or6/032412" -output_dir = "./tests/test_data/generated/prep/eval/audio_cbin_annot_notmat" +data_dir = "./tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032412" +output_dir = "./tests/data_for_tests/generated/prep/eval/audio_cbin_annot_notmat" audio_format = "cbin" annot_format = "notmat" -spect_output_dir = "./tests/test_data/generated/prep/eval/audio_cbin_annot_notmat" +spect_output_dir = "./tests/data_for_tests/generated/prep/eval/audio_cbin_annot_notmat" [SPECT_PARAMS] fft_size = 512 @@ -24,7 +24,7 @@ batch_size = 11 num_workers = 4 device = "cuda" spect_scaler_path = "~/Documents/repos/coding/birdsong/tweetynet/results/BFSongRepository/gy6or6/results_200620_165308/StandardizeSpect" -output_dir = "./tests/test_data/generated/results/eval/audio_cbin_annot_notmat" +output_dir = "./tests/data_for_tests/generated/results/eval/audio_cbin_annot_notmat/tweetynet" [TweetyNet.optimizer] lr = 0.001 diff --git a/tests/test_data/configs/test_learncurve_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/tweetynet_learncurve_audio_cbin_annot_notmat.toml similarity index 58% rename from tests/test_data/configs/test_learncurve_audio_cbin_annot_notmat.toml rename to tests/data_for_tests/configs/tweetynet_learncurve_audio_cbin_annot_notmat.toml index 85df5935d..d4e2766ba 100644 --- a/tests/test_data/configs/test_learncurve_audio_cbin_annot_notmat.toml +++ b/tests/data_for_tests/configs/tweetynet_learncurve_audio_cbin_annot_notmat.toml @@ -1,9 +1,9 @@ [PREP] -data_dir = './tests/test_data/source/audio_cbin_annot_notmat/gy6or6/032312' -output_dir = './tests/test_data/generated/prep/learncurve/audio_cbin_annot_notmat' +data_dir = './tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032312' +output_dir = './tests/data_for_tests/generated/prep/learncurve/audio_cbin_annot_notmat' audio_format = 'cbin' annot_format = 'notmat' -spect_output_dir = "./tests/test_data/generated/prep/learncurve/audio_cbin_annot_notmat" +spect_output_dir = "./tests/data_for_tests/generated/prep/learncurve/audio_cbin_annot_notmat" labelset = 'iabcdefghjk' train_dur = 50 val_dur = 15 @@ -31,7 +31,7 @@ num_workers = 4 train_set_durs = [ 4, 6 ] num_replicates = 2 device = "cuda" -root_results_dir = './tests/test_data/generated/results/learncurve/audio_cbin_annot_notmat' +root_results_dir = './tests/data_for_tests/generated/results/learncurve/audio_cbin_annot_notmat/tweetynet' [TweetyNet] optimizer.lr = 0.001 diff --git a/tests/test_data/configs/test_predict_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/tweetynet_predict_audio_cbin_annot_notmat.toml similarity index 66% rename from tests/test_data/configs/test_predict_audio_cbin_annot_notmat.toml rename to tests/data_for_tests/configs/tweetynet_predict_audio_cbin_annot_notmat.toml index e04cbac25..221b6eb24 100644 --- a/tests/test_data/configs/test_predict_audio_cbin_annot_notmat.toml +++ b/tests/data_for_tests/configs/tweetynet_predict_audio_cbin_annot_notmat.toml @@ -1,8 +1,8 @@ [PREP] -data_dir = "./tests/test_data/source/audio_cbin_annot_notmat/gy6or6/032412" -output_dir = "./tests/test_data/generated/prep/predict/audio_cbin_annot_notmat" +data_dir = "./tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032412" +output_dir = "./tests/data_for_tests/generated/prep/predict/audio_cbin_annot_notmat" audio_format = "cbin" -spect_output_dir = "./tests/test_data/generated/prep/predict/audio_cbin_annot_notmat" +spect_output_dir = "./tests/data_for_tests/generated/prep/predict/audio_cbin_annot_notmat" [SPECT_PARAMS] fft_size = 512 @@ -22,7 +22,7 @@ models = "TweetyNet" batch_size = 11 num_workers = 4 device = "cuda" -output_dir = "./tests/test_data/generated/results/predict/audio_cbin_annot_notmat" +output_dir = "./tests/data_for_tests/generated/results/predict/audio_cbin_annot_notmat/tweetynet" annot_csv_filename = "bl26lb16.041912.annot.csv" [TweetyNet.optimizer] diff --git a/tests/test_data/configs/test_predict_audio_wav_annot_koumura.toml b/tests/data_for_tests/configs/tweetynet_predict_audio_wav_annot_koumura.toml similarity index 66% rename from tests/test_data/configs/test_predict_audio_wav_annot_koumura.toml rename to tests/data_for_tests/configs/tweetynet_predict_audio_wav_annot_koumura.toml index 2ff8df2bf..d96dbdd05 100644 --- a/tests/test_data/configs/test_predict_audio_wav_annot_koumura.toml +++ b/tests/data_for_tests/configs/tweetynet_predict_audio_wav_annot_koumura.toml @@ -1,8 +1,8 @@ [PREP] -data_dir = "./tests/test_data/source/audio_wav_annot_koumura/Bird0" -output_dir = "./tests/test_data/generated/prep/predict/audio_wav_annot_koumura" +data_dir = "./tests/data_for_tests/source/audio_wav_annot_koumura/Bird0" +output_dir = "./tests/data_for_tests/generated/prep/predict/audio_wav_annot_koumura" audio_format = "wav" -spect_output_dir = "./tests/test_data/generated/prep/predict/audio_wav_annot_koumura" +spect_output_dir = "./tests/data_for_tests/generated/prep/predict/audio_wav_annot_koumura" [SPECT_PARAMS] fft_size = 512 @@ -22,7 +22,7 @@ models = "TweetyNet" batch_size = 11 num_workers = 4 device = "cuda" -output_dir = "./tests/test_data/generated/results/predict/audio_wav_annot_koumura" +output_dir = "./tests/data_for_tests/generated/results/predict/audio_wav_annot_koumura/tweetynet" annot_csv_filename = "Bird0.annot.csv" [TweetyNet.optimizer] diff --git a/tests/test_data/configs/test_train_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/tweetynet_train_audio_cbin_annot_notmat.toml similarity index 54% rename from tests/test_data/configs/test_train_audio_cbin_annot_notmat.toml rename to tests/data_for_tests/configs/tweetynet_train_audio_cbin_annot_notmat.toml index 2429a8f66..9a93ac5bb 100644 --- a/tests/test_data/configs/test_train_audio_cbin_annot_notmat.toml +++ b/tests/data_for_tests/configs/tweetynet_train_audio_cbin_annot_notmat.toml @@ -1,9 +1,9 @@ [PREP] -data_dir = "./tests/test_data/source/audio_cbin_annot_notmat/gy6or6/032312" -output_dir = "./tests/test_data/generated/prep/train/audio_cbin_annot_notmat" +data_dir = "./tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032312" +output_dir = "./tests/data_for_tests/generated/prep/train/audio_cbin_annot_notmat" audio_format = "cbin" annot_format = "notmat" -spect_output_dir = "./tests/test_data/generated/prep/train/audio_cbin_annot_notmat" +spect_output_dir = "./tests/data_for_tests/generated/prep/train/audio_cbin_annot_notmat" labelset = "iabcdefghjk" train_dur = 50 val_dur = 15 @@ -29,7 +29,7 @@ ckpt_step = 200 patience = 4 num_workers = 4 device = "cuda" -root_results_dir = "./tests/test_data/generated/results/train/audio_cbin_annot_notmat" +root_results_dir = "./tests/data_for_tests/generated/results/train/audio_cbin_annot_notmat/tweetynet" [TweetyNet.optimizer] lr = 0.001 diff --git a/tests/test_data/configs/test_train_audio_wav_annot_koumura.toml b/tests/data_for_tests/configs/tweetynet_train_audio_wav_annot_koumura.toml similarity index 51% rename from tests/test_data/configs/test_train_audio_wav_annot_koumura.toml rename to tests/data_for_tests/configs/tweetynet_train_audio_wav_annot_koumura.toml index f9536d139..3fd42fa65 100644 --- a/tests/test_data/configs/test_train_audio_wav_annot_koumura.toml +++ b/tests/data_for_tests/configs/tweetynet_train_audio_wav_annot_koumura.toml @@ -1,11 +1,11 @@ [PREP] labelset = '012345678' -data_dir = "./tests/test_data/source/audio_wav_annot_koumura/Bird0" -output_dir = "./tests/test_data/generated/prep/train/audio_wav_annot_koumura" +data_dir = "./tests/data_for_tests/source/audio_wav_annot_koumura/Bird0" +output_dir = "./tests/data_for_tests/generated/prep/train/audio_wav_annot_koumura" audio_format = "wav" -spect_output_dir = "./tests/test_data/generated/prep/train/audio_wav_annot_koumura" +spect_output_dir = "./tests/data_for_tests/generated/prep/train/audio_wav_annot_koumura" annot_format = "koumura" -annot_file = "./tests/test_data/source/audio_wav_annot_koumura/Bird0/Annotation.xml" +annot_file = "./tests/data_for_tests/source/audio_wav_annot_koumura/Bird0/Annotation.xml" test_dur = 50 train_dur = 15 val_dur = 30 @@ -30,7 +30,7 @@ ckpt_step = 200 patience = 4 num_workers = 4 device = "cuda" -root_results_dir = "./tests/test_data/generated/results/train/audio_wav_annot_koumura" +root_results_dir = "./tests/data_for_tests/generated/results/train/audio_wav_annot_koumura/tweetynet" [TweetyNet.optimizer] lr = 0.001 diff --git a/tests/test_data/configs/test_train_spect_mat_annot_yarden.toml b/tests/data_for_tests/configs/tweetynet_train_spect_mat_annot_yarden.toml similarity index 56% rename from tests/test_data/configs/test_train_spect_mat_annot_yarden.toml rename to tests/data_for_tests/configs/tweetynet_train_spect_mat_annot_yarden.toml index 724e9ce86..f3d73b24a 100644 --- a/tests/test_data/configs/test_train_spect_mat_annot_yarden.toml +++ b/tests/data_for_tests/configs/tweetynet_train_spect_mat_annot_yarden.toml @@ -1,9 +1,9 @@ [PREP] -data_dir = "./tests/test_data/source/spect_mat_annot_yarden/llb3/spect" -output_dir = "./tests/test_data/generated/prep/train/spect_mat_annot_yarden" +data_dir = "./tests/data_for_tests/source/spect_mat_annot_yarden/llb3/spect" +output_dir = "./tests/data_for_tests/generated/prep/train/spect_mat_annot_yarden" spect_format = "mat" annot_format = "yarden" -annot_file = "./tests/test_data/source/spect_mat_annot_yarden/llb3/llb3_annot_subset.mat" +annot_file = "./tests/data_for_tests/source/spect_mat_annot_yarden/llb3/llb3_annot_subset.mat" labelset = "range: 1-3,6-14,17-19" train_dur = 213 val_dur = 213 @@ -28,7 +28,7 @@ ckpt_step = 200 patience = 4 num_workers = 4 device = "cuda" -root_results_dir = "./tests/test_data/generated/results/train/spect_mat_annot_yarden" +root_results_dir = "./tests/data_for_tests/generated/results/train/spect_mat_annot_yarden/tweetynet" [TweetyNet.optimizer] lr = 0.001 diff --git a/tests/test_data/generated/.gitkeep b/tests/data_for_tests/generated/.gitkeep similarity index 100% rename from tests/test_data/generated/.gitkeep rename to tests/data_for_tests/generated/.gitkeep diff --git a/tests/test_data/source/.gitkeep b/tests/data_for_tests/source/.gitkeep similarity index 100% rename from tests/test_data/source/.gitkeep rename to tests/data_for_tests/source/.gitkeep diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py index 4642b04fb..eadbad0c2 100644 --- a/tests/fixtures/__init__.py +++ b/tests/fixtures/__init__.py @@ -3,6 +3,7 @@ from .config import * from .dataframe import * from .device import * +from .model import * from .path import * from .spect import * from .split import * diff --git a/tests/fixtures/config.py b/tests/fixtures/config.py index a18a70c8f..468285197 100644 --- a/tests/fixtures/config.py +++ b/tests/fixtures/config.py @@ -8,11 +8,12 @@ @pytest.fixture def test_configs_root(test_data_root): - """Path that points to test_data/configs + """Path that points to data_for_tests/configs Two types of config files in this directory: - 1) those used by the src/scripts/test_data/test_data_generate.py script. - All configs that start with ``test_`` prefix. + 1) those used by the tests/scripts/generate_data_for_tests.py script. + Will be listed in configs.json. See ``specific_config`` fixture below + for details about types of configs. 2) those used by tests that are static, e.g., ``invalid_section_config.toml`` This fixture facilitates access to type (2), e.g. in test_config/test_parse @@ -72,7 +73,7 @@ def generated_test_configs_root(generated_test_data_root): # ---- path to config files ---- @pytest.fixture def all_generated_configs(generated_test_configs_root): - return sorted(generated_test_configs_root.glob('test*toml')) + return sorted(generated_test_configs_root.glob('*toml')) @pytest.fixture @@ -98,6 +99,7 @@ def specific_config(generated_test_configs_root, e.g. to the ``tmp_path`` fixture used by unit tests """ def _specific_config(config_type, + model, annot_format, audio_format=None, spect_format=None, @@ -131,6 +133,7 @@ def _specific_config(config_type, if all( [ schematized_config['config_type'] == config_type, + schematized_config['model'] == model, schematized_config['annot_format'] == annot_format, schematized_config['audio_format'] == audio_format, schematized_config['spect_format'] == spect_format, @@ -208,12 +211,14 @@ def specific_config_toml(specific_config): `config_type`, `audio_format`, `spect_format`, `annot_format` """ def _specific_config_toml(config_type, + model, annot_format, audio_format=None, spect_format=None, ): config_path = specific_config( config_type, + model, annot_format, audio_format, spect_format diff --git a/tests/fixtures/dataframe.py b/tests/fixtures/dataframe.py index 76ce3029f..3176c86ca 100644 --- a/tests/fixtures/dataframe.py +++ b/tests/fixtures/dataframe.py @@ -13,12 +13,14 @@ def specific_csv_path(specific_config_toml): `config_type`, `audio_format`, `spect_format`, `annot_format` """ def _specific_csv_path(config_type, + model, annot_format, audio_format=None, spect_format=None, ): config_toml = specific_config_toml( config_type, + model, annot_format, audio_format, spect_format @@ -38,12 +40,14 @@ def specific_dataframe(specific_csv_path): `config_type`, `audio_format`, `spect_format`, `annot_format` """ def _specific_dataframe(config_type, - annot_format, - audio_format=None, - spect_format=None, - ): + model, + annot_format, + audio_format=None, + spect_format=None, + ): csv_path = specific_csv_path( config_type, + model, annot_format, audio_format, spect_format diff --git a/tests/fixtures/model.py b/tests/fixtures/model.py new file mode 100644 index 000000000..c9ddab49f --- /dev/null +++ b/tests/fixtures/model.py @@ -0,0 +1,12 @@ +import pytest + + +@pytest.fixture +def default_model(): + """default model used whenever a model is needed to run a test. + Should work regardless of where the test is run, i.e. both on + CI platform and locally. + + currently ``teenytweetynet`` + """ + return 'teenytweetynet' diff --git a/tests/fixtures/path.py b/tests/fixtures/path.py index 3be3c3a51..531e88819 100644 --- a/tests/fixtures/path.py +++ b/tests/fixtures/path.py @@ -8,13 +8,17 @@ @pytest.fixture -def previous_run_path(generated_test_data_root): - learncurve_results_root = generated_test_data_root.joinpath( - 'results/learncurve/audio_cbin_annot_notmat' - ) - results_dirs = sorted(learncurve_results_root.glob(f'{RESULTS_DIR_PREFIX}*')) - assert len(results_dirs) >= 1 - return results_dirs[-1] +def previous_run_path_factory(generated_test_data_root): + + def _previous_run_path(model): + learncurve_results_root = generated_test_data_root.joinpath( + f'results/learncurve/audio_cbin_annot_notmat/{model}' + ) + results_dirs = sorted(learncurve_results_root.glob(f'{RESULTS_DIR_PREFIX}*')) + assert len(results_dirs) >= 1 + return results_dirs[-1] + + return _previous_run_path @pytest.fixture diff --git a/tests/fixtures/test_data.py b/tests/fixtures/test_data.py index c5c786c85..d95c50b7d 100644 --- a/tests/fixtures/test_data.py +++ b/tests/fixtures/test_data.py @@ -7,8 +7,8 @@ @pytest.fixture def test_data_root(): - """Path that points to root of test_data directory""" - return HERE.joinpath('..', 'test_data') + """Path that points to root of data_for_tests directory""" + return HERE.joinpath('..', 'data_for_tests') @pytest.fixture diff --git a/src/scripts/test_data/fix_prep_csv_paths.py b/tests/scripts/fix_prep_csv_paths.py similarity index 85% rename from src/scripts/test_data/fix_prep_csv_paths.py rename to tests/scripts/fix_prep_csv_paths.py index 0514d70d4..7af686821 100644 --- a/src/scripts/test_data/fix_prep_csv_paths.py +++ b/tests/scripts/fix_prep_csv_paths.py @@ -3,9 +3,9 @@ import pandas as pd HERE = Path(__file__).parent -PROJ_ROOT = HERE / '..' / '..' / '..' -PROJ_ROOT_ABS = PROJ_ROOT.resolve() -GENERATED_TEST_DATA = PROJ_ROOT / 'tests' / 'test_data' / 'generated' +PROJ_ROOT = HERE / '..' / '..' +PROJ_ROOT_ABS = PROJ_ROOT.resolve() # <- used to fix paths!!! +GENERATED_TEST_DATA = PROJ_ROOT / 'tests' / 'data_for_tests' / 'generated' def main(): diff --git a/tests/scripts/generate_data_for_tests.py b/tests/scripts/generate_data_for_tests.py new file mode 100644 index 000000000..8f203030d --- /dev/null +++ b/tests/scripts/generate_data_for_tests.py @@ -0,0 +1,301 @@ +"""script run by Makefile test-data-generate command + +makes all the 'generated' test data, i.e. files created by vak, +It's called 'generated' test data to distinguish it from the +'source' test data, i.e., files **not** created by vak, that is, +the input data used when vak does create files (csv files, logs, +neural network checkpoints, etc.) + +This script generates: +* temporary config.toml files used when generating results +* `prep`d (prepared) datasets, and results created with those datasets, + both of which were generated using the temporary config.toml files + +all the setup configs send output to one of two places: +for any prep command, the output goes to some child directory of ./tests/data_for_tests/generated/prep +for any command run with a `prep`d dataset, the output goes to some child dir of ./tests/data_for_tests/generated/results + +examples: + when we run `vak prep tests/data_for_tests/generated/configs/test_train_audio_wav_annot_koumura.toml` + the `prep`d dataset will be in a new directory created in + `./tests/data_for_tests/generated/prep/train/audio_wav_annot_koumura` + + when we run `vak train tests/data_for_tests/genereated/configs/test_train_audio_wav_annot_koumura.toml` + it will use the `prep`d dataset csv that's now in + `./tests/data_for_tests/generated/prep/train/audio_wav_annot_koumura`, that the temporary config.toml points to, + and the results will go to a new directory created in + `./tests/data_for_tests/generated/results/train/audio_wav_annot_koumura` + +To set up this directory structure, we iterate through the constants defined below. + +The constants are: +- TOP_LEVEL_DIRS + name of sub-directories in .tests/data_for_tests/generated that correspond to + either tempory config files, 'prep'd datasets, or `results` generated from those `prep`d datasets +- COMMAND_DIRS + names of sub-sub-directories in ./tests/data_for_tests/generated/prep & + ./tests/data_for_tests/generated/results that correspond to cli commands; + e.g., dataset from running `vak prep $A_TRAIN_CONFIG.TOML` will be in + ./tests/data_for_tests/generated/prep/train/audio_{}_annot_{} +- DATA_DIRS + names of "subsub"directories in ./tests/data_for_tests/$COMMAND that correspond to toy test data sets; + those sub-directories contain test data generated by $COMMAND using the specified toy test data set + +In other words, the parent directory for all the generated directories we need to remove +will have a path of the form: `./tests/data_for_tests/$TOP_LEVEL_DIRS/COMMAND_DIRS/DATA_DIRS`. +For example: +The prep`d dataset from running `vak prep $AUDIO_CBIN_ANNOT_NOTMAT_CONFIG.TOML` will be in +`./tests/data_for_tests/prep/train/audio_cbin_annot_notmat` +and corresponding results will be in +`./tests/data_for_tests/results/train/audio_cbin_annot_notmat` +The directories will have names with timestamps like `prep_20201015_1115`. +Those are the generated directories we want to remove. +""" +from pathlib import Path +import shutil + +import toml +import vak + +HERE = Path(__file__).parent +TEST_DATA_ROOT = HERE / '..' / 'data_for_tests' +GENERATED_TEST_DATA = TEST_DATA_ROOT / 'generated' +GENERATED_TEST_CONFIGS_ROOT = GENERATED_TEST_DATA / 'configs' + +# convention is that all the config.toml files in tests/data_for_tests/configs +# that should be run when generating test data +# have filenames of the form `{MODEL}_{COMMAND}_audio_{FORMAT}_annot_{FORMAT}.toml' +# **or** `{MODEL}_{COMMAND}_spect_{FORMAT}_annot_{FORMAT}_config.ini' +# e.g., 'tweetynet_learncurve_audio_cbin_annot_notmat.toml'. +# Below, we iterate over model names +# so glob doesn't pick up static configs that are just used for testing, +# like 'invalid_option_config.toml` +TEST_CONFIGS_ROOT = TEST_DATA_ROOT.joinpath('configs') +CONFIGS_TO_RUN = [] +MODELS = ('teenytweetynet', 'tweetynet') +for model in MODELS: + CONFIGS_TO_RUN.extend(sorted(TEST_CONFIGS_ROOT.glob(f'{model}*.toml'))) + +# the sub-directories that will get made inside `./tests/data_for_tests/generated` +TOP_LEVEL_DIRS = [ + 'configs', + 'prep', + 'results', +] + +# these sub-dirs get made in each of the TOP_LEVEL_DIRS (except for 'configs') +COMMAND_DIRS = [ + 'eval', + 'learncurve', + 'predict', + 'train', +] + +# these sub-dirs get made in each of the COMMAND_DIRS (except for 'configs') +DATA_DIRS = [ + 'audio_cbin_annot_notmat', + 'audio_wav_annot_koumura', + 'spect_mat_annot_yarden', +] + + +def make_subdirs_in_generated(): + """make sub-directories inside ./tests/data_for_tests/generated + + first thing that has to get done before copying configs and + then using those configs to generate results + + makes three directories in data_for_tests/generated: + configs, prep, and results. + prep has one sub-directory for every data "type". + results does also, but in addition will have sub-directories + within those for models. + """ + for top_level_dir in TOP_LEVEL_DIRS: + if top_level_dir == 'configs': + subdir_to_make = GENERATED_TEST_DATA / top_level_dir + subdir_to_make.mkdir(parents=True) + else: + for command_dir in COMMAND_DIRS: + for data_dir in DATA_DIRS: + if top_level_dir == 'prep': + subdir_to_make = GENERATED_TEST_DATA / top_level_dir / command_dir / data_dir + subdir_to_make.mkdir(parents=True) + else: + for model in MODELS: + subdir_to_make = GENERATED_TEST_DATA / top_level_dir / command_dir / data_dir / model + subdir_to_make.mkdir(parents=True) + + +def copy_config_files(): + """copy config files from setup to data_for_tests/configs + + the copied files are the ones that get modified when this setup script runs, + while the originals in this directory remain unchanged. + """ + copied_configs = [] + + for toml_path in CONFIGS_TO_RUN: + if not toml_path.exists(): + raise FileNotFoundError( + f'{toml_path} not found') + + dst = GENERATED_TEST_CONFIGS_ROOT.joinpath(toml_path.name) + print(f"\tcopying to {dst}") + shutil.copy(src=toml_path, dst=dst) + copied_configs.append(dst) + + return copied_configs + + +def run_prep(config_paths): + """run ``vak prep`` to generate data for testing""" + for config_path in config_paths: + if not config_path.exists(): + raise FileNotFoundError( + f'{config_path} not found') + print(f"running vak prep to generate data for tests test, using config: {config_path.name}") + vak.cli.prep.prep(toml_path=config_path) + + +def fix_options_in_configs(config_paths, command): + """fix values assigned to options in predict and eval configs + + Need to do this because both predict and eval configs have options + that can only be assigned *after* running the corresponding `train` config + """ + # split configs into train and predict or eval configs + configs_to_fix = [config for config in config_paths if command in config.name] + train_configs = [config for config in config_paths if 'train' in config.name] + + for config_to_fix in configs_to_fix: + # figure out which 'train' config corresponds to this 'predict' or 'eval' config + # by using 'suffix' of config file names. `train` suffix will match `predict`/'eval' suffix + prefix, suffix = config_to_fix.name.split(command) + train_config_to_use = [] + for train_config in train_configs: + train_prefix, train_suffix = train_config.name.split('train') + if train_suffix == suffix: + train_config_to_use.append(train_config) + if len(train_config_to_use) != 1: + raise ValueError( + f'did not find just a single train config that matches with predict config:\n' + f'{config_to_fix}' + f'Matches were: {train_config_to_use}' + ) + train_config_to_use = train_config_to_use[0] + + # now use the config to find the results dir and get the values for the options we need to set + # which are checkpoint_path, spect_scaler_path, and labelmap_path + with train_config_to_use.open('r') as fp: + train_config_toml = toml.load(fp) + root_results_dir = Path(train_config_toml['TRAIN']['root_results_dir']) + results_dir = sorted(root_results_dir.glob('results_*')) + if len(results_dir) != 1: + raise ValueError( + f'did not find just a single results directory in root_results_dir from train_config:\n' + f'{train_config_to_use}' + f'root_results_dir was: {root_results_dir}' + f'Matches for "results_*" were: {results_dir}' + ) + results_dir = results_dir[0] + # these are the only options whose values we need to change + # and they are the same for both predict and eval + checkpoint_path = sorted(results_dir.glob('**/checkpoints/checkpoint.pt'))[0] + spect_scaler_path = sorted(results_dir.glob('StandardizeSpect'))[0] + labelmap_path = sorted(results_dir.glob('labelmap.json'))[0] + + # now add these values to corresponding options in predict / eval config + with config_to_fix.open('r') as fp: + config_toml = toml.load(fp) + config_toml[command.upper()]['checkpoint_path'] = str(checkpoint_path) + config_toml[command.upper()]['spect_scaler_path'] = str(spect_scaler_path) + config_toml[command.upper()]['labelmap_path'] = str(labelmap_path) + with config_to_fix.open('w') as fp: + toml.dump(config_toml, fp) + + +# need to run 'train' config before we run 'predict' +# so we can add checkpoints, etc., from training to predict +COMMANDS = ( + 'train', + 'learncurve', + 'eval', + 'predict', +) + + +def main(): + print('making sub-directories in ./tests/data_for_tests/generated/ where files generated by `vak` will go') + make_subdirs_in_generated() + + print('copying config files run to generate test data from ./tests/data_for_tests/configs to ' + './tests/data_for_tests/generated/configs') + config_paths = copy_config_files() + + print( + f'will generate test data from these config files: {config_paths}' + ) + + # ---- only need to run prep once, since prep'd data is the same regardless of model ---- + prep_config_paths = [config_path + for config_path in config_paths + if config_path.name.startswith(MODELS[0])] + run_prep(config_paths=prep_config_paths) + # now add the prep csv from those configs to the corresponding config + # from all the other models + for model in MODELS[1:]: + model_config_paths = [config_path + for config_path in config_paths + if config_path.name.startswith(model)] + for model_config_path in model_config_paths: + # we want the same prep config for MODEL[0] which will have the + # exact same name, but with a different model name as the "prefix" + stem_minus_model = model_config_path.stem.replace(model, '') + prep_config_path = [prep_config_path + for prep_config_path in prep_config_paths + if prep_config_path.stem.endswith(stem_minus_model)] + assert len(prep_config_path) == 1 + prep_config_path = prep_config_path[0] + with prep_config_path.open('r') as fp: + prep_config_toml = toml.load(fp) + with model_config_path.open('r') as fp: + model_config_toml = toml.load(fp) + # find the section that `vak prep` added the `csv_path` to, + # and set `csv_path` for model config to the same value in + # the same section for this model config + for section_name, options_dict in prep_config_toml.items(): + if 'csv_path' in options_dict: + model_config_toml[section_name]['csv_path'] = options_dict['csv_path'] + with model_config_path.open('w') as fp: + toml.dump(model_config_toml, fp) + + for model in MODELS: + for command in COMMANDS: + if command == 'prep': + continue # already ran 'prep' + print( + f'running configs for command: {command}' + ) + command_config_paths = [config_path + for config_path in config_paths + if config_path.name.startswith(model) and command in config_path.name] + print( + f'using the following configs:\n{command_config_paths}' + ) + if command == 'predict' or command == 'eval': + # fix values for required options in predict / eval configs + # using results from running the corresponding train configs. + # this only works if we ran the train configs already, + # which we should have because of ordering of COMMANDS constant above + copied_config_paths_this_model = [config_path + for config_path in config_paths + if config_path.name.startswith(model)] + fix_options_in_configs(copied_config_paths_this_model, command) + + for config_path in command_config_paths: + vak.cli.cli.cli(command, config_path) + + +if __name__ == '__main__': + main() diff --git a/tests/test_cli/test_eval.py b/tests/test_cli/test_eval.py index 73cfa373f..d4cbec8f0 100644 --- a/tests/test_cli/test_eval.py +++ b/tests/test_cli/test_eval.py @@ -21,6 +21,7 @@ def test_eval(audio_format, annot_format, specific_config, tmp_path, + model, device): output_dir = tmp_path.joinpath(f'test_eval_{audio_format}_{spect_format}_{annot_format}') output_dir.mkdir() @@ -35,6 +36,7 @@ def test_eval(audio_format, ] toml_path = specific_config(config_type='eval', + model=model, audio_format=audio_format, annot_format=annot_format, spect_format=spect_format, diff --git a/tests/test_cli/test_learncurve.py b/tests/test_cli/test_learncurve.py index f61a94324..e4175cd99 100644 --- a/tests/test_cli/test_learncurve.py +++ b/tests/test_cli/test_learncurve.py @@ -11,6 +11,7 @@ def test_learncurve(specific_config, tmp_path, + model, device): root_results_dir = tmp_path.joinpath('test_learncurve_root_results_dir') root_results_dir.mkdir() @@ -25,6 +26,7 @@ def test_learncurve(specific_config, ] toml_path = specific_config(config_type='learncurve', + model=model, audio_format='cbin', annot_format='notmat', options_to_change=options_to_change) @@ -51,8 +53,9 @@ def test_learncurve(specific_config, ) def test_learncurve_previous_run_path(specific_config, tmp_path, + model, device, - previous_run_path, + previous_run_path_factory, window_size): root_results_dir = tmp_path.joinpath('test_learncurve_root_results_dir') root_results_dir.mkdir() @@ -66,13 +69,14 @@ def test_learncurve_previous_run_path(specific_config, 'value': device}, {'section': 'LEARNCURVE', 'option': 'previous_run_path', - 'value': str(previous_run_path)}, + 'value': str(previous_run_path_factory(model))}, {'section': 'DATALOADER', 'option': 'window_size', 'value': window_size} ] toml_path = specific_config(config_type='learncurve', + model=model, audio_format='cbin', annot_format='notmat', options_to_change=options_to_change) diff --git a/tests/test_cli/test_predict.py b/tests/test_cli/test_predict.py index e9d86c277..24df42c59 100644 --- a/tests/test_cli/test_predict.py +++ b/tests/test_cli/test_predict.py @@ -22,6 +22,7 @@ def test_predict(audio_format, annot_format, specific_config, tmp_path, + model, device): output_dir = tmp_path.joinpath(f'test_predict_{audio_format}_{spect_format}_{annot_format}') output_dir.mkdir() @@ -36,6 +37,7 @@ def test_predict(audio_format, ] toml_path = specific_config(config_type='predict', + model=model, audio_format=audio_format, annot_format=annot_format, options_to_change=options_to_change) diff --git a/tests/test_cli/test_prep.py b/tests/test_cli/test_prep.py index 5b5eb8d5e..69a1c1362 100644 --- a/tests/test_cli/test_prep.py +++ b/tests/test_cli/test_prep.py @@ -28,8 +28,10 @@ def test_purpose_from_toml(config_type, spect_format, annot_format, specific_config, + default_model, tmp_path): toml_path = specific_config(config_type=config_type, + model=default_model, audio_format=audio_format, annot_format=annot_format, spect_format=spect_format) @@ -55,6 +57,7 @@ def test_prep(config_type, spect_format, annot_format, specific_config, + default_model, tmp_path): output_dir = tmp_path.joinpath(f'test_prep_{config_type}_{audio_format}_{spect_format}_{annot_format}') output_dir.mkdir() @@ -73,6 +76,7 @@ def test_prep(config_type, }, ] toml_path = specific_config(config_type=config_type, + model=default_model, audio_format=audio_format, annot_format=annot_format, spect_format=spect_format, @@ -107,6 +111,7 @@ def test_prep_csv_path_raises(config_type, spect_format, annot_format, specific_config, + default_model, tmp_path): output_dir = tmp_path.joinpath(f'test_prep_{config_type}_{audio_format}_{spect_format}_{annot_format}') output_dir.mkdir() @@ -119,6 +124,7 @@ def test_prep_csv_path_raises(config_type, }, ] toml_path = specific_config(config_type=config_type, + model=default_model, audio_format=audio_format, annot_format=annot_format, spect_format=spect_format, diff --git a/tests/test_cli/test_train.py b/tests/test_cli/test_train.py index 3f0b43258..a2788cfc3 100644 --- a/tests/test_cli/test_train.py +++ b/tests/test_cli/test_train.py @@ -23,6 +23,7 @@ def test_train(audio_format, annot_format, specific_config, tmp_path, + model, device): root_results_dir = tmp_path.joinpath('test_train_root_results_dir') root_results_dir.mkdir() @@ -37,6 +38,7 @@ def test_train(audio_format, ] toml_path = specific_config(config_type='train', + model=model, audio_format=audio_format, annot_format=annot_format, spect_format=spect_format, diff --git a/tests/test_config/test_config.py b/tests/test_config/test_config.py index 771add883..f64b7382b 100644 --- a/tests/test_config/test_config.py +++ b/tests/test_config/test_config.py @@ -2,11 +2,15 @@ def test_config_attrs_class( - all_generated_configs_toml_path_pairs + all_generated_configs_toml_path_pairs, + default_model, ): """test that instantiating Config class works as expected""" for config_toml, toml_path in all_generated_configs_toml_path_pairs: - # this is basically the body of the ``config.parse.from_toml`` function. + if default_model not in str(toml_path): + continue # only need to check configs for one model + # also avoids FileNotFoundError on CI + # this is basically the body of the ``config.parse.from_toml`` function. config_dict = {} for section_name in list(vak.config.parse.SECTION_CLASSES.keys()): if section_name in config_toml: diff --git a/tests/test_config/test_parse.py b/tests/test_config/test_parse.py index a14945a5d..a52f9a6a0 100644 --- a/tests/test_config/test_parse.py +++ b/tests/test_config/test_parse.py @@ -24,11 +24,15 @@ ) def test_parse_config_section_returns_attrs_class( section_name, - all_generated_configs_toml_path_pairs + all_generated_configs_toml_path_pairs, + default_model, ): """test that ``vak.config.parse.parse_config_section`` returns an instance of ``vak.config.learncurve.LearncurveConfig``""" for config_toml, toml_path in all_generated_configs_toml_path_pairs: + if default_model not in str(toml_path): + continue # only need to check configs for one model + # also avoids FileNotFoundError on CI if section_name in config_toml: config_section_obj = vak.config.parse.parse_config_section( config_toml=config_toml, @@ -52,18 +56,25 @@ def test_parse_config_section_returns_attrs_class( ) def test_parse_config_section_missing_options_raises( section_name, - all_generated_configs_toml_path_pairs + all_generated_configs_toml_path_pairs, + default_model, ): """test that configs without the required options in a section raise KeyError""" if vak.config.parse.REQUIRED_OPTIONS[section_name] is None: pytest.skip(f'no required options to test for section: {section_name}') + # in comprehensions below, filter by default model + # because we only need to check configs for one model + # also avoids FileNotFoundError on CI if section_name == 'PREP': - configs_toml_path_pairs = all_generated_configs_toml_path_pairs + configs_toml_path_pairs = ((config_toml, toml_path) + for config_toml, toml_path in all_generated_configs_toml_path_pairs + if default_model in str(toml_path)) else: configs_toml_path_pairs = ((config_toml, toml_path) for config_toml, toml_path in all_generated_configs_toml_path_pairs - if section_name.lower() in toml_path.name) + if section_name.lower() in toml_path.name and default_model in str(toml_path)) + for config_toml, toml_path in configs_toml_path_pairs: if section_name in config_toml: for option in vak.config.parse.REQUIRED_OPTIONS[section_name]: @@ -232,8 +243,11 @@ def test_load_from_toml_path_raises_when_config_doesnt_exist(config_that_doesnt_ vak.config.parse._load_toml_from_path(config_that_doesnt_exist) -def test_from_toml_path_returns_instance_of_config(all_generated_configs): +def test_from_toml_path_returns_instance_of_config(all_generated_configs, default_model): for toml_path in all_generated_configs: + if default_model not in str(toml_path): + continue # only need to check configs for one model + # also avoids FileNotFoundError on CI config_obj = vak.config.parse.from_toml_path(toml_path) assert isinstance(config_obj, vak.config.parse.Config) @@ -243,8 +257,11 @@ def test_from_toml_path_raises_when_config_doesnt_exist(config_that_doesnt_exist vak.config.parse.from_toml_path(config_that_doesnt_exist) -def test_from_toml(all_generated_configs_toml_path_pairs): +def test_from_toml(all_generated_configs_toml_path_pairs, default_model): for config_toml, toml_path in all_generated_configs_toml_path_pairs: + if default_model not in str(toml_path): + continue # only need to check configs for one model + # also avoids FileNotFoundError on CI config_obj = vak.config.parse.from_toml(config_toml, toml_path) assert isinstance(config_obj, vak.config.parse.Config) diff --git a/tests/test_core/test_eval.py b/tests/test_core/test_eval.py index a34e70a47..532f2e34d 100644 --- a/tests/test_core/test_eval.py +++ b/tests/test_core/test_eval.py @@ -28,6 +28,7 @@ def test_eval(audio_format, annot_format, specific_config, tmp_path, + model, device): output_dir = tmp_path.joinpath(f'test_eval_{audio_format}_{spect_format}_{annot_format}') output_dir.mkdir() @@ -42,6 +43,7 @@ def test_eval(audio_format, ] toml_path = specific_config(config_type='eval', + model=model, audio_format=audio_format, annot_format=annot_format, spect_format=spect_format, diff --git a/tests/test_core/test_learncurve.py b/tests/test_core/test_learncurve.py index 9162b2163..112c6fcf4 100644 --- a/tests/test_core/test_learncurve.py +++ b/tests/test_core/test_learncurve.py @@ -51,6 +51,7 @@ def learncurve_output_matches_expected(cfg, def test_learncurve(specific_config, tmp_path, + model, device): options_to_change = { 'section': 'LEARNCURVE', @@ -59,6 +60,7 @@ def test_learncurve(specific_config, } toml_path = specific_config(config_type='learncurve', + model=model, audio_format='cbin', annot_format='notmat', options_to_change=options_to_change) @@ -98,6 +100,7 @@ def test_learncurve(specific_config, def test_learncurve_no_results_path(specific_config, tmp_path, + model, device): root_results_dir = tmp_path.joinpath('test_learncurve_no_results_path') root_results_dir.mkdir() @@ -116,6 +119,7 @@ def test_learncurve_no_results_path(specific_config, ] toml_path = specific_config(config_type='learncurve', + model=model, audio_format='cbin', annot_format='notmat', options_to_change=options_to_change) diff --git a/tests/test_core/test_predict.py b/tests/test_core/test_predict.py index 421731a05..14128a707 100644 --- a/tests/test_core/test_predict.py +++ b/tests/test_core/test_predict.py @@ -32,6 +32,7 @@ def test_predict(audio_format, save_net_outputs, specific_config, tmp_path, + model, device): output_dir = tmp_path.joinpath(f'test_predict_{audio_format}_{spect_format}_{annot_format}') output_dir.mkdir() @@ -48,6 +49,7 @@ def test_predict(audio_format, 'value': save_net_outputs} ] toml_path = specific_config(config_type='predict', + model=model, audio_format=audio_format, annot_format=annot_format, options_to_change=options_to_change) diff --git a/tests/test_core/test_prep.py b/tests/test_core/test_prep.py index 3f972c925..8a70c5b2d 100644 --- a/tests/test_core/test_prep.py +++ b/tests/test_core/test_prep.py @@ -45,6 +45,7 @@ def test_prep(config_type, spect_format, annot_format, specific_config, + default_model, tmp_path): output_dir = tmp_path.joinpath(f'test_prep_{config_type}_{audio_format}_{spect_format}_{annot_format}') output_dir.mkdir() @@ -55,6 +56,7 @@ def test_prep(config_type, 'value': str(output_dir) } toml_path = specific_config(config_type=config_type, + model=default_model, audio_format=audio_format, annot_format=annot_format, spect_format=spect_format, diff --git a/tests/test_core/test_train.py b/tests/test_core/test_train.py index 679d8b0ef..f7944d7f4 100644 --- a/tests/test_core/test_train.py +++ b/tests/test_core/test_train.py @@ -44,6 +44,7 @@ def test_train(audio_format, annot_format, specific_config, tmp_path, + model, device): options_to_change = { 'section': 'TRAIN', @@ -51,6 +52,7 @@ def test_train(audio_format, 'value': device } toml_path = specific_config(config_type='train', + model=model, audio_format=audio_format, annot_format=annot_format, spect_format=spect_format, diff --git a/tests/test_data/configs/configs.json b/tests/test_data/configs/configs.json deleted file mode 100644 index 0bf924980..000000000 --- a/tests/test_data/configs/configs.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "configs": [ - { - "filename": "test_eval_audio_cbin_annot_notmat.toml", - "config_type": "eval", - "audio_format": "cbin", - "spect_format": null, - "annot_format": "notmat" - }, - { - "filename": "test_learncurve_audio_cbin_annot_notmat.toml", - "config_type": "learncurve", - "audio_format": "cbin", - "spect_format": null, - "annot_format": "notmat" - }, - { - "filename": "test_predict_audio_cbin_annot_notmat.toml", - "config_type": "predict", - "audio_format": "cbin", - "spect_format": null, - "annot_format": "notmat" - }, - { - "filename": "test_predict_audio_wav_annot_koumura.toml", - "config_type": "predict", - "audio_format": "wav", - "spect_format": null, - "annot_format": "koumura" - }, - { - "filename": "test_train_audio_cbin_annot_notmat.toml", - "config_type": "train", - "audio_format": "cbin", - "spect_format": null, - "annot_format": "notmat" - }, - { - "filename": "test_train_audio_wav_annot_koumura.toml", - "config_type": "train", - "audio_format": "wav", - "spect_format": null, - "annot_format": "koumura" - }, - { - "filename": "test_train_spect_mat_annot_yarden.toml", - "config_type": "train", - "audio_format": null, - "spect_format": "mat", - "annot_format": "yarden" - } - ] -} \ No newline at end of file diff --git a/tests/test_files/test_files.py b/tests/test_files/test_files.py index ba7999f61..d32c370cc 100644 --- a/tests/test_files/test_files.py +++ b/tests/test_files/test_files.py @@ -29,8 +29,8 @@ def test_files_from_dir_with_cbin(audio_dir_cbin, @pytest.mark.parametrize( ('dir_path', 'ext'), - [('./tests/test_data/source/audio_wav_annot_textgrid/AGBk/', 'WAV'), - ('./tests/test_data/source/audio_wav_annot_koumura/Bird0/Wave', 'wav'), + [('./tests/data_for_tests/source/audio_wav_annot_textgrid/AGBk/', 'WAV'), + ('./tests/data_for_tests/source/audio_wav_annot_koumura/Bird0/Wave', 'wav'), ] ) def test_from_dir_is_case_insensitive(dir_path, ext): @@ -43,8 +43,8 @@ def test_from_dir_is_case_insensitive(dir_path, ext): @pytest.mark.parametrize( ('dir_path', 'ext'), - [('./tests/test_data/source/audio_wav_annot_textgrid/', 'WAV'), - ('./tests/test_data/source/audio_wav_annot_koumura/Bird0', 'wav'), + [('./tests/data_for_tests/source/audio_wav_annot_textgrid/', 'WAV'), + ('./tests/data_for_tests/source/audio_wav_annot_koumura/Bird0', 'wav'), ] ) def test_from_dir_searches_child_dir(dir_path, ext): diff --git a/tests/test_labeled_timebins.py b/tests/test_labeled_timebins.py index 2a1b32870..9d872aea1 100644 --- a/tests/test_labeled_timebins.py +++ b/tests/test_labeled_timebins.py @@ -136,9 +136,11 @@ def test_lbl_tb2segments_recovers_onsets_offsets_labels(): def test_lbl_tb2segments_recovers_onsets_offsets_labels_from_real_data( specific_dataframe, labelset_yarden, + model, ): """test that ``lbl_tb2segments`` recovers onsets and offsets from real data""" vak_df = specific_dataframe(config_type='train', + model=model, spect_format='mat', annot_format='yarden') labelmap = vak.labels.to_map(