From b7bddc5b30e0afe6dc1209339af79f902bf37562 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Mon, 15 Mar 2021 22:30:33 -0400
Subject: [PATCH] TST/CI: refactor test suite to use TeenyTweetyNet, fix #330

generating test data:
- rename tweetynet configs
- add teenytweetynet configs
- modify test_data_generate.py script
  - so it finds all configs
  - have it make model subdirectories in results/
- rewrite Makefile
  - differentiate 'all' and 'ci' generated test data
  - change commands, variables, urls to data

refactoring tests:
- add `model` parameters to fixtures:
  - to `specific_config` fixture
  - to `specific_config_toml` fixture
  - to `dataframe` fixtures
  - change `previous_run_path` fixture to `previous_run_path_factory`
    - use `model` as argument to `_previous_run_path` function
      returned by factory
- add `models` command-line option for pytest
  + that will parametrize any test that specifies `models` fixture
    with whatever arguments are passed in at command line
- use `model` fixture in tests
  - add model fixture to unit tests in test_core/ and test_cli/
  - use `model` fixture in a test in test_labeled_timebins
  - use `previous_run_path_factory` with `model` in
    test_cli/test_learncurve.py
- also add `default_model` fixture that is used whenever a model
  name is needed by other fixtures (e.g. `specific_config`) but
  the model shouldn't actually matter
  + idea is this model should work no matter where tests are run,
    CI v. locally
  + this is a code smell to me -- if tests don't depend on model
    then why do I need a "dummy model"
  + but not obvious to me right now how to disentangle them, and I
    just want to get the damn CI working

CI:
- have ci.yml download just test data generated for ci
- have ci run pytest using command-line option models,
  and specifying only teenytweetynet for now
  (default but make it explicit anyway)
- fix the `fix_prep_csv_paths` script so that it correctly
  finds prep csv files with the new test data directory
  names and structure

other refactoring
- move src/scripts to tests/scripts
- rename test_data to data_for_tests
  + so it doesn't look like a sub-package of tests to `pytest`
---
 .github/workflows/ci.yml                      |   6 +-
 .gitignore                                    |   4 +-
 Makefile                                      |  91 ++++--
 src/scripts/test_data/test_data_generate.py   | 262 ---------------
 tests/conftest.py                             |  17 +
 tests/data_for_tests/configs/configs.json     | 116 +++++++
 .../configs/invalid_option_config.toml        |   0
 .../configs/invalid_section_config.toml       |   0
 .../invalid_train_and_learncurve_config.toml  |   8 +-
 ...weetynet_eval_audio_cbin_annot_notmat.toml |  30 ++
 ...et_learncurve_audio_cbin_annot_notmat.toml |  37 +++
 ...tynet_predict_audio_cbin_annot_notmat.toml |  29 ++
 ...tynet_predict_audio_wav_annot_koumura.toml |  29 ++
 ...eetynet_train_audio_cbin_annot_notmat.toml |  35 ++
 ...eetynet_train_audio_wav_annot_koumura.toml |  36 +++
 ...weetynet_train_spect_mat_annot_yarden.toml |  34 ++
 ...eetynet_eval_audio_cbin_annot_notmat.toml} |   8 +-
 ...t_learncurve_audio_cbin_annot_notmat.toml} |   8 +-
 ...ynet_predict_audio_cbin_annot_notmat.toml} |   8 +-
 ...ynet_predict_audio_wav_annot_koumura.toml} |   8 +-
 ...etynet_train_audio_cbin_annot_notmat.toml} |   8 +-
 ...etynet_train_audio_wav_annot_koumura.toml} |  10 +-
 ...eetynet_train_spect_mat_annot_yarden.toml} |   8 +-
 .../generated/.gitkeep                        |   0
 .../source/.gitkeep                           |   0
 tests/fixtures/__init__.py                    |   1 +
 tests/fixtures/config.py                      |  13 +-
 tests/fixtures/dataframe.py                   |  12 +-
 tests/fixtures/model.py                       |  12 +
 tests/fixtures/path.py                        |  18 +-
 tests/fixtures/test_data.py                   |   4 +-
 .../scripts}/fix_prep_csv_paths.py            |   6 +-
 tests/scripts/generate_data_for_tests.py      | 301 ++++++++++++++++++
 tests/test_cli/test_eval.py                   |   2 +
 tests/test_cli/test_learncurve.py             |   8 +-
 tests/test_cli/test_predict.py                |   2 +
 tests/test_cli/test_prep.py                   |   6 +
 tests/test_cli/test_train.py                  |   2 +
 tests/test_config/test_config.py              |   8 +-
 tests/test_config/test_parse.py               |  29 +-
 tests/test_core/test_eval.py                  |   2 +
 tests/test_core/test_learncurve.py            |   4 +
 tests/test_core/test_predict.py               |   2 +
 tests/test_core/test_prep.py                  |   2 +
 tests/test_core/test_train.py                 |   2 +
 tests/test_data/configs/configs.json          |  53 ---
 tests/test_files/test_files.py                |   8 +-
 tests/test_labeled_timebins.py                |   2 +
 48 files changed, 876 insertions(+), 415 deletions(-)
 delete mode 100644 src/scripts/test_data/test_data_generate.py
 create mode 100644 tests/data_for_tests/configs/configs.json
 rename tests/{test_data => data_for_tests}/configs/invalid_option_config.toml (100%)
 rename tests/{test_data => data_for_tests}/configs/invalid_section_config.toml (100%)
 rename tests/{test_data => data_for_tests}/configs/invalid_train_and_learncurve_config.toml (68%)
 create mode 100644 tests/data_for_tests/configs/teenytweetynet_eval_audio_cbin_annot_notmat.toml
 create mode 100644 tests/data_for_tests/configs/teenytweetynet_learncurve_audio_cbin_annot_notmat.toml
 create mode 100644 tests/data_for_tests/configs/teenytweetynet_predict_audio_cbin_annot_notmat.toml
 create mode 100644 tests/data_for_tests/configs/teenytweetynet_predict_audio_wav_annot_koumura.toml
 create mode 100644 tests/data_for_tests/configs/teenytweetynet_train_audio_cbin_annot_notmat.toml
 create mode 100644 tests/data_for_tests/configs/teenytweetynet_train_audio_wav_annot_koumura.toml
 create mode 100644 tests/data_for_tests/configs/teenytweetynet_train_spect_mat_annot_yarden.toml
 rename tests/{test_data/configs/test_eval_audio_cbin_annot_notmat.toml => data_for_tests/configs/tweetynet_eval_audio_cbin_annot_notmat.toml} (69%)
 rename tests/{test_data/configs/test_learncurve_audio_cbin_annot_notmat.toml => data_for_tests/configs/tweetynet_learncurve_audio_cbin_annot_notmat.toml} (58%)
 rename tests/{test_data/configs/test_predict_audio_cbin_annot_notmat.toml => data_for_tests/configs/tweetynet_predict_audio_cbin_annot_notmat.toml} (66%)
 rename tests/{test_data/configs/test_predict_audio_wav_annot_koumura.toml => data_for_tests/configs/tweetynet_predict_audio_wav_annot_koumura.toml} (66%)
 rename tests/{test_data/configs/test_train_audio_cbin_annot_notmat.toml => data_for_tests/configs/tweetynet_train_audio_cbin_annot_notmat.toml} (54%)
 rename tests/{test_data/configs/test_train_audio_wav_annot_koumura.toml => data_for_tests/configs/tweetynet_train_audio_wav_annot_koumura.toml} (51%)
 rename tests/{test_data/configs/test_train_spect_mat_annot_yarden.toml => data_for_tests/configs/tweetynet_train_spect_mat_annot_yarden.toml} (56%)
 rename tests/{test_data => data_for_tests}/generated/.gitkeep (100%)
 rename tests/{test_data => data_for_tests}/source/.gitkeep (100%)
 create mode 100644 tests/fixtures/model.py
 rename {src/scripts/test_data => tests/scripts}/fix_prep_csv_paths.py (85%)
 create mode 100644 tests/scripts/generate_data_for_tests.py
 delete mode 100644 tests/test_data/configs/configs.json

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c1cd382ae..f0d9938b3 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -28,7 +28,7 @@ jobs:
       - name: run tests
         run: |
           make test-data-download-source
-          make test-data-download-generate
+          make test-data-download-generated-ci
           poetry install
-          poetry run python ./src/scripts/test_data/fix_prep_csv_paths.py
-          poetry run pytest
+          poetry run python ./tests/scripts/fix_prep_csv_paths.py
+          poetry run pytest --models teenytweetynet
diff --git a/.gitignore b/.gitignore
index 0b2ad94a3..9f9c1c99a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,6 +18,6 @@ build/
 doc/_build/
 
 # test data
-tests/test_data/source/
-tests/test_data/generated/
+tests/data_for_tests/source/
+tests/data_for_tests/generated/
 *.tar.gz
diff --git a/Makefile b/Makefile
index f66551fdb..309ffdd84 100644
--- a/Makefile
+++ b/Makefile
@@ -1,33 +1,57 @@
-SOURCE_TEST_DATA_TAR=tests/test_data/source/source_test_data.tar.gz
-SOURCE_TEST_DATA_URL=https://osf.io/7ru4s/download
+TEST_DATA_GENERATE_SCRIPT=./tests/scripts/generate_data_for_tests.py
 
-TEST_DATA_GENERATE_SCRIPT=./src/scripts/test_data/test_data_generate.py
-GENERATED_TEST_DATA_TAR=tests/test_data/generated/generated_test_data.tar.gz
-GENERATED_TEST_DATA_URL=https://osf.io/q76xd/download
-GENERATED_TEST_DATA_TOP_LEVEL_DIRS=tests/test_data/generated/configs tests/test_data/generated/prep tests/test_data/generated/results
+DATA_FOR_TESTS_DIR=./tests/data_for_tests/
+GENERATED_TEST_DATA_DIR=${DATA_FOR_TESTS_DIR}generated/
+CONFIGS_DIR=${GENERATED_TEST_DATA_DIR}configs
+PREP_DIR=${GENERATED_TEST_DATA_DIR}prep/
+RESULTS_DIR=${GENERATED_TEST_DATA_DIR}results/
+RESULTS_CI=$(shell ls -d ${RESULTS_DIR}*/*/teenytweetynet)
+GENERATED_TEST_DATA_CI_DIRS=${CONFIGS_DIR} ${PREP_DIR} ${RESULTS_CI}
+GENERATED_TEST_DATA_ALL_DIRS=${GENERATED_TEST_DATA_CI_DIRS} $(shell ls -d ${RESULTS_DIR}/*/*/tweetynet)
+
+SOURCE_TEST_DATA_TAR=${DATA_FOR_TESTS_DIR}source/source_test_data.tar.gz
+GENERATED_TEST_DATA_CI_TAR=${GENERATED_TEST_DATA_DIR}generated_test_data.ci.tar.gz
+GENERATED_TEST_DATA_ALL_TAR=${GENERATED_TEST_DATA_DIR}generated_test_data.tar.gz
+
+SOURCE_TEST_DATA_URL=https://osf.io/s85vh/download
+GENERATED_TEST_DATA_ALL_URL=https://osf.io/gt5xw/download
+GENERATED_TEST_DATA_CI_URL=https://osf.io/u64nt/download
 
 help:
 	@echo 'Makefile for vak                                                           			'
 	@echo '                                                                           			'
 	@echo 'Usage:                                                                     			'
-	@echo '     make test-data-clean-source          remove source test data                        '
-	@echo '     make test-data-download-source       download source test data                      '
-	@echo '     make test-data-generate              generate vak files used by tests from source data   '
-	@echo '     make test-data-clean-generate        remove generated test data          					'
-	@echo '     make test-data-tar-generate          place generated test data in compressed tar file       	'
-	@echo '     make test-data-download-generate     download generated test data .tar and expand        	'
-	@echo '     make variables              show variables defined for Makefile 					'
+	@echo '     make test-data-clean-source				remove source test data                        '
+	@echo '     make test-data-download-source          download source test data                      '
+	@echo '     make test-data-generate                 generate vak files used by tests from source data   '
+	@echo '     make test-data-clean-generated          remove generated test data          					'
+	@echo '     make test-data-tar-generated-all        place all generated test data in compressed tar file       	'
+	@echo '     make test-data-tar-generated-ci         place generated test data for CI in compressed tar file       	'
+	@echo '     make test-data-download-generated-all   download .tar with all generated test data and expand        	'
+	@echo '     make test-data-download-generated-ci    download .tar with generated test data for CI and expand        	'
+	@echo '     make variables                          show variables defined for Makefile 					'
 
 variables:
-	@echo '     SOURCE_TEST_DATA_TAR                : $(GENERATED_TEST_DATA_TAR)				'
-	@echo '     SOURCE_TEST_DATA_URL                : $(GENERATED_TEST_DATA_URL)				'
-	@echo '     TESTS_DATA_GENERATE_SCRIPT    		: $(TEST_DATA_GENERATE_SCRIPT)				'
-	@echo '     GENERATED_TEST_DATA_TAR      		: $(GENERATED_TEST_DATA_TAR)				'
-	@echo '     GENERATED_TEST_DATA_URL      		: $(GENERATED_TEST_DATA_URL)				'
-	@echo '     GENERATED_TEST_DATA_TOP_LEVEL_DIRS	: $(GENERATED_TEST_DATA_TOP_LEVEL_DIRS)		'
+	@echo '     TESTS_DATA_GENERATE_SCRIPT      : $(TEST_DATA_GENERATE_SCRIPT)				'
+	@echo ''
+	@echo '     DATA_FOR_TESTS_DIR              : $(DATA_FOR_TESTS_DIR)		'
+	@echo '     GENERATED_TEST_DATA_DIR         : $(GENERATED_TEST_DATA_DIR)		'
+	@echo '     PREP_DIR                        : $(PREP_DIR)		'
+	@echo '     RESULTS_DIR                     : $(RESULTS_DIR)		'
+	@echo '     RESULTS_CI                      : $(RESULTS_CI)		'
+	@echo '     GENERATED_TEST_DATA_CI_DIRS     : $(GENERATED_TEST_DATA_CI_DIRS)		'
+	@echo '     GENERATED_TEST_DATA_ALL_DIRS    : $(GENERATED_TEST_DATA_ALL_DIRS)		'
+	@echo ''
+	@echo '     SOURCE_TEST_DATA_TAR            : $(SOURCE_TEST_DATA_TAR)				'
+	@echo '     GENERATED_TEST_DATA_CI_TAR      : $(GENERATED_TEST_DATA_CI_TAR)				'
+	@echo '     GENERATED_TEST_DATA_ALL_TAR     : $(GENERATED_TEST_DATA_ALL_TAR)				'
+	@echo ''
+	@echo '     SOURCE_TEST_DATA_URL            : $(SOURCE_TEST_DATA_URL)				'
+	@echo '     GENERATED_TEST_DATA_ALL_URL 	: $(GENERATED_TEST_DATA_ALL_URL)				'
+	@echo '     GENERATED_TEST_DATA_CI_URL      : $(GENERATED_TEST_DATA_CI_URL)				'
 
 test-data-clean-source:
-	rm -rfv ./tests/test_data/source/*
+	rm -rfv ./tests/data_for_tests/source/*
 
 test-data-download-source:
 	wget -q $(SOURCE_TEST_DATA_URL) -O $(SOURCE_TEST_DATA_TAR)
@@ -36,14 +60,25 @@ test-data-download-source:
 test-data-generate : $(TEST_DATA_GENERATE_SCRIPT)
 	poetry run python $(TEST_DATA_GENERATE_SCRIPT)
 
-test-data-clean-generate :
-	rm -rfv ./tests/test_data/generated/*
+test-data-clean-generated :
+	rm -rfv ./tests/data_for_tests/generated/*
+
+test-data-tar-generated-all:
+	tar -czvf $(GENERATED_TEST_DATA_ALL_TAR) $(GENERATED_TEST_DATA_ALL_DIRS)
+
+test-data-tar-generated-ci:
+	tar -czvf $(GENERATED_TEST_DATA_CI_TAR) $(GENERATED_TEST_DATA_CI_DIRS)
 
-test-data-tar-generate:
-	tar -czvf $(GENERATED_TEST_DATA_TAR) $(GENERATED_TEST_DATA_TOP_LEVEL_DIRS)
+test-data-download-generated-all:
+	wget -q $(GENERATED_TEST_DATA_ALL_URL) -O $(GENERATED_TEST_DATA_ALL_TAR)
+	tar -xzf $(GENERATED_TEST_DATA_ALL_TAR)
 
-test-data-download-generate:
-	wget -q $(GENERATED_TEST_DATA_URL) -O $(GENERATED_TEST_DATA_TAR)
-	tar -xzf $(GENERATED_TEST_DATA_TAR)
+test-data-download-generated-ci:
+	wget -q $(GENERATED_TEST_DATA_CI_URL) -O $(GENERATED_TEST_DATA_CI_TAR)
+	tar -xzf $(GENERATED_TEST_DATA_CI_TAR)
 
-.PHONY: help variables test-data-clean-source test-data-download-source test-data-generate test-data-clean-generate test-data-tar-generate test-data-download-generate
+.PHONY: help variables \
+        test-data-clean-source test-data-download-source \
+        test-data-generate test-data-clean-generated \
+        test-data-tar-generated-all test-data-tar-generated-all \
+        test-data-download-generated-all test-data-download-generated-ci
diff --git a/src/scripts/test_data/test_data_generate.py b/src/scripts/test_data/test_data_generate.py
deleted file mode 100644
index 0d6a4038c..000000000
--- a/src/scripts/test_data/test_data_generate.py
+++ /dev/null
@@ -1,262 +0,0 @@
-"""script run by Makefile test-data-generate command
-
-makes all the 'generated' test data, i.e. files created by vak,
-It's called 'generated' test data to distinguish it from the
-'source' test data, i.e., files **not** created by vak, that is,
-the input data used when vak does create files (csv files, logs,
-neural network checkpoints, etc.)
-
-This script generates:
-* temporary config.toml files used when generating results
-* `prep`d (prepared) datasets, and results created with those datasets,
-  both of which were generated using the temporary config.toml files
-
-all the setup configs send output to one of two places:
-for any prep command, the output goes to some child directory of ./tests/test_data/generated/prep
-for any command run with a `prep`d dataset, the output goes to some child dir of ./tests/test_data/generated/results
-
-examples:
-    when we run `vak prep tests/test_data/generated/configs/test_train_audio_wav_annot_koumura.toml`
-    the `prep`d dataset will be in a new directory created in
-    `./tests/test_data/generated/prep/train/audio_wav_annot_koumura`
-
-    when we run `vak train tests/test_data/genereated/configs/test_train_audio_wav_annot_koumura.toml`
-    it will use the `prep`d dataset csv that's now in
-    `./tests/test_data/generated/prep/train/audio_wav_annot_koumura`, that the temporary config.toml points to,
-    and the results will go to a new directory created in
-    `./tests/test_data/generated/results/train/audio_wav_annot_koumura`
-
-To set up this directory structure, we iterate through the constants defined below.
-
-The constants are:
-- TOP_LEVEL_DIRS
-    name of sub-directories in .tests/test_data/generated that correspond to
-    either tempory config files, 'prep'd datasets, or `results` generated from those `prep`d datasets
-- COMMAND_DIRS
-    names of sub-sub-directories in ./tests/test_data/generated/prep &
-    ./tests/test_data/generated/results that correspond to cli commands;
-    e.g., dataset from running `vak prep $A_TRAIN_CONFIG.TOML` will be in
-    ./tests/test_data/generated/prep/train/audio_{}_annot_{}
-- DATA_DIRS
-    names of "subsub"directories in ./tests/test_data/$COMMAND that correspond to toy test data sets;
-    those sub-directories contain test data generated by $COMMAND using the specified toy test data set
-
-In other words, the parent directory for all the generated directories we need to remove
-will have a path of the form: `./tests/test_data/$TOP_LEVEL_DIRS/COMMAND_DIRS/DATA_DIRS`.
-For example:
-The prep`d dataset from running `vak prep $AUDIO_CBIN_ANNOT_NOTMAT_CONFIG.TOML` will be in
-`./tests/test_data/prep/train/audio_cbin_annot_notmat`
-and corresponding results will be in
-`./tests/test_data/results/train/audio_cbin_annot_notmat`
-The directories will have names with timestamps like `prep_20201015_1115`.
-Those are the generated directories we want to remove.
-"""
-from pathlib import Path
-import shutil
-
-import toml
-import vak
-
-HERE = Path(__file__).parent
-TESTS_ROOT = HERE.joinpath('../../../tests')
-TEST_DATA_ROOT = TESTS_ROOT.joinpath('test_data')
-GENERATED_TEST_DATA = TEST_DATA_ROOT.joinpath('generated')
-GENERATED_TEST_CONFIGS_ROOT = GENERATED_TEST_DATA.joinpath('configs')
-
-# convention is that all the config.toml files in tests/test_data/configs
-# that should be run when generating test data
-# have filenames of the form `test_{COMMAND}_audio_{FORMAT}_annot_{FORMAT}.toml'
-# **or** `test_{COMMAND}_spect_{FORMAT}_annot_{FORMAT}_config.ini'
-# e.g., 'test_learncurve_audio_cbin_annot_notmat.toml'
-TEST_CONFIGS_ROOT = TEST_DATA_ROOT.joinpath('configs')
-CONFIGS_TO_RUN = TEST_CONFIGS_ROOT.glob('test*.toml')
-
-# the sub-directories that will get made inside `./tests/test_data/generated`
-TOP_LEVEL_DIRS = [
-    'configs',
-    'prep',
-    'results',
-]
-
-# these sub-dirs get made in each of the TOP_LEVEL_DIRS (except for 'configs')
-COMMAND_DIRS = [
-    'eval',
-    'learncurve',
-    'predict',
-    'train',
-]
-
-# these sub-dirs get made in each of the COMMAND_DIRS (except for 'configs')
-DATA_DIRS = [
-    'audio_cbin_annot_notmat',
-    'audio_wav_annot_koumura',
-    'spect_mat_annot_yarden',
-]
-
-
-def make_subdirs_in_generated():
-    """make sub-directories inside ./tests/test_data/generated
-
-    first thing that has to get done before copying configs and
-    then using those configs to generate results
-    """
-    for top_level_dir in TOP_LEVEL_DIRS:
-        if top_level_dir == 'configs':
-            subdir_to_make = GENERATED_TEST_DATA / top_level_dir
-            subdir_to_make.mkdir(parents=True)
-        else:
-            for command_dir in COMMAND_DIRS:
-                for data_dir in DATA_DIRS:
-                    subdir_to_make = GENERATED_TEST_DATA / top_level_dir / command_dir / data_dir
-                    subdir_to_make.mkdir(parents=True)
-
-
-def copy_config_files():
-    """copy config files from setup to test_data/configs
-
-    the copied files are the ones that get modified when this setup script runs,
-    while the originals in this directory remain unchanged.
-    """
-    for toml_path in CONFIGS_TO_RUN:
-        if not toml_path.exists():
-            raise FileNotFoundError(
-                f'{toml_path} not found')
-
-        dst = GENERATED_TEST_CONFIGS_ROOT.joinpath(toml_path.name)
-        print(f"\tcopying to {dst}")
-        shutil.copy(src=toml_path, dst=dst)
-
-
-def run_prep(test_config_paths):
-    """run ``vak prep`` for all test configs"""
-    for test_config_path in test_config_paths:
-        if not test_config_path.exists():
-            raise FileNotFoundError(
-                f'{test_config_path} not found')
-        print(f"re-running vak prep to set up for test, using config: {test_config_path.name}")
-        vak.cli.prep.prep(toml_path=test_config_path)
-
-
-def run_results(test_config_paths):
-    """run ``vak {command}`` for all test configs,
-    where {command} is determined from the config file name
-    """
-    for test_config_path in test_config_paths:
-        if 'train' in test_config_path.name:
-            vak.cli.train.train(toml_path=test_config_path)
-        elif 'eval' in test_config_path.name:
-            vak.cli.eval.eval(toml_path=test_config_path)
-        elif 'predict' in test_config_path.name:
-            vak.cli.predict.predict(toml_path=test_config_path)
-        elif 'learncurve' in test_config_path.name:
-            vak.cli.learncurve.learning_curve(toml_path=test_config_path)
-        else:
-            raise ValueError(
-                f'unable to determine command to run from config name:\n{test_config_path}'
-            )
-
-
-def fix_options_in_configs(test_config_paths, command):
-    """fix values assigned to options in predict and eval configs
-
-    Need to do this because both predict and eval configs have options
-    that can only be assigned *after* running the corresponding `train` config
-    """
-    # split configs into train and predict or eval configs
-    configs_to_fix = [test_config for test_config in test_config_paths if command in test_config.name]
-    train_configs = [test_config for test_config in test_config_paths if 'train' in test_config.name]
-
-    for config_to_fix in configs_to_fix:
-        # figure out which 'train' config corresponds to this 'predict' or 'eval' config
-        # by using 'suffix' of config file names. `train` suffix will match `predict`/'eval' suffix
-        prefix, suffix = config_to_fix.name.split(command)
-        train_config_to_use = []
-        for train_config in train_configs:
-            train_prefix, train_suffix = train_config.name.split('train')
-            if train_suffix == suffix:
-                train_config_to_use.append(train_config)
-        if len(train_config_to_use) != 1:
-            raise ValueError(
-                f'did not find just a single train config that matches with predict config:\n'
-                f'{config_to_fix}'
-                f'Matches were: {train_config_to_use}'
-            )
-        train_config_to_use = train_config_to_use[0]
-
-        # now use the config to find the results dir and get the values for the options we need to set
-        # which are checkpoint_path, spect_scaler_path, and labelmap_path
-        with train_config_to_use.open('r') as fp:
-            train_config_toml = toml.load(fp)
-        root_results_dir = Path(train_config_toml['TRAIN']['root_results_dir'])
-        results_dir = sorted(root_results_dir.glob('results_*'))
-        if len(results_dir) != 1:
-            raise ValueError(
-                f'did not find just a single results directory in root_results_dir from train_config:\n'
-                f'{train_config_to_use}'
-                f'root_results_dir was: {root_results_dir}'
-                f'Matches for "results_*" were: {results_dir}'
-            )
-        results_dir = results_dir[0]
-        # these are the only options whose values we need to change
-        # and they are the same for both predict and eval
-        checkpoint_path = sorted(results_dir.glob('**/checkpoints/checkpoint.pt'))[0]
-        spect_scaler_path = sorted(results_dir.glob('StandardizeSpect'))[0]
-        labelmap_path = sorted(results_dir.glob('labelmap.json'))[0]
-
-        # now add these values to corresponding options in predict / eval config
-        with config_to_fix.open('r') as fp:
-            config_toml = toml.load(fp)
-        config_toml[command.upper()]['checkpoint_path'] = str(checkpoint_path)
-        config_toml[command.upper()]['spect_scaler_path'] = str(spect_scaler_path)
-        config_toml[command.upper()]['labelmap_path'] = str(labelmap_path)
-        with config_to_fix.open('w') as fp:
-            toml.dump(config_toml, fp)
-
-
-# need to run 'train' config before we run 'predict'
-# so we can add checkpoints, etc., from training to predict
-COMMANDS = (
-    'train',
-    'learncurve',
-    'eval',
-    'predict',
-)
-
-
-def main():
-    print('making sub-directories in ./tests/test_data/generated/ where files generated by `vak` will go')
-    make_subdirs_in_generated()
-
-    print('copying config files run to generate test data from ./tests/test_data/configs to '
-          './tests/test_data/generated/configs')
-    copy_config_files()
-
-    test_config_paths = sorted(
-        GENERATED_TEST_CONFIGS_ROOT.glob('test*toml')
-    )
-    print(
-        f'will generate test data from these test config files: {test_config_paths}'
-    )
-    for command in COMMANDS:
-        print(
-            f'running configs for command: {command}'
-        )
-        command_config_paths = [test_config_path
-                                for test_config_path in test_config_paths
-                                if command in test_config_path.name]
-        print(
-            f'using the following configs:\n{command_config_paths}'
-        )
-        if command == 'predict' or command == 'eval':
-            # fix values for required options in predict / eval configs
-            # using results from running the corresponding train configs.
-            # this only works if we ran the train configs already,
-            # which we should have because of ordering of COMMANDS constant above
-            fix_options_in_configs(test_config_paths, command)
-
-        run_prep(test_config_paths=command_config_paths)
-        run_results(test_config_paths=command_config_paths)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/tests/conftest.py b/tests/conftest.py
index d4bb20901..362698856 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1 +1,18 @@
 from .fixtures import *
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--models", action="store", default="teenytweetynet", nargs='+',
+        help="vak models to test, space-separated list of names"
+    )
+
+
+def pytest_generate_tests(metafunc):
+    models = metafunc.config.option.models
+    if isinstance(models, str):
+        # wrap a single model name in a list
+        models = [models]
+    # **note!** fixture name is singular even though cmdopt is plural
+    if 'model' in metafunc.fixturenames and models is not None:
+        metafunc.parametrize("model", models)
diff --git a/tests/data_for_tests/configs/configs.json b/tests/data_for_tests/configs/configs.json
new file mode 100644
index 000000000..6daa30c59
--- /dev/null
+++ b/tests/data_for_tests/configs/configs.json
@@ -0,0 +1,116 @@
+{
+  "configs": [
+    {
+      "filename": "tweetynet_eval_audio_cbin_annot_notmat.toml",
+      "model": "tweetynet",
+      "config_type": "eval",
+      "audio_format": "cbin",
+      "spect_format": null,
+      "annot_format": "notmat"
+    },
+    {
+      "filename": "tweetynet_learncurve_audio_cbin_annot_notmat.toml",
+      "model": "tweetynet",
+      "config_type": "learncurve",
+      "audio_format": "cbin",
+      "spect_format": null,
+      "annot_format": "notmat"
+    },
+    {
+      "filename": "tweetynet_predict_audio_cbin_annot_notmat.toml",
+      "model": "tweetynet",
+      "config_type": "predict",
+      "audio_format": "cbin",
+      "spect_format": null,
+      "annot_format": "notmat"
+    },
+    {
+      "filename": "tweetynet_predict_audio_wav_annot_koumura.toml",
+      "model": "tweetynet",
+      "config_type": "predict",
+      "audio_format": "wav",
+      "spect_format": null,
+      "annot_format": "koumura"
+    },
+    {
+      "filename": "tweetynet_train_audio_cbin_annot_notmat.toml",
+      "model": "tweetynet",
+      "config_type": "train",
+      "audio_format": "cbin",
+      "spect_format": null,
+      "annot_format": "notmat"
+    },
+    {
+      "filename": "tweetynet_train_audio_wav_annot_koumura.toml",
+      "model": "tweetynet",
+      "config_type": "train",
+      "audio_format": "wav",
+      "spect_format": null,
+      "annot_format": "koumura"
+    },
+    {
+      "filename": "tweetynet_train_spect_mat_annot_yarden.toml",
+      "model": "tweetynet",
+      "config_type": "train",
+      "audio_format": null,
+      "spect_format": "mat",
+      "annot_format": "yarden"
+    },
+    {
+      "filename": "teenytweetynet_eval_audio_cbin_annot_notmat.toml",
+      "model": "teenytweetynet",
+      "config_type": "eval",
+      "audio_format": "cbin",
+      "spect_format": null,
+      "annot_format": "notmat"
+    },
+    {
+      "filename": "teenytweetynet_learncurve_audio_cbin_annot_notmat.toml",
+      "model": "teenytweetynet",
+      "config_type": "learncurve",
+      "audio_format": "cbin",
+      "spect_format": null,
+      "annot_format": "notmat"
+    },
+    {
+      "filename": "teenytweetynet_predict_audio_cbin_annot_notmat.toml",
+      "model": "teenytweetynet",
+      "config_type": "predict",
+      "audio_format": "cbin",
+      "spect_format": null,
+      "annot_format": "notmat"
+    },
+    {
+      "filename": "teenytweetynet_predict_audio_wav_annot_koumura.toml",
+      "model": "teenytweetynet",
+      "config_type": "predict",
+      "audio_format": "wav",
+      "spect_format": null,
+      "annot_format": "koumura"
+    },
+    {
+      "filename": "teenytweetynet_train_audio_cbin_annot_notmat.toml",
+      "model": "teenytweetynet",
+      "config_type": "train",
+      "audio_format": "cbin",
+      "spect_format": null,
+      "annot_format": "notmat"
+    },
+    {
+      "filename": "teenytweetynet_train_audio_wav_annot_koumura.toml",
+      "model": "teenytweetynet",
+      "config_type": "train",
+      "audio_format": "wav",
+      "spect_format": null,
+      "annot_format": "koumura"
+    },
+    {
+      "filename": "teenytweetynet_train_spect_mat_annot_yarden.toml",
+      "model": "teenytweetynet",
+      "config_type": "train",
+      "audio_format": null,
+      "spect_format": "mat",
+      "annot_format": "yarden"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/test_data/configs/invalid_option_config.toml b/tests/data_for_tests/configs/invalid_option_config.toml
similarity index 100%
rename from tests/test_data/configs/invalid_option_config.toml
rename to tests/data_for_tests/configs/invalid_option_config.toml
diff --git a/tests/test_data/configs/invalid_section_config.toml b/tests/data_for_tests/configs/invalid_section_config.toml
similarity index 100%
rename from tests/test_data/configs/invalid_section_config.toml
rename to tests/data_for_tests/configs/invalid_section_config.toml
diff --git a/tests/test_data/configs/invalid_train_and_learncurve_config.toml b/tests/data_for_tests/configs/invalid_train_and_learncurve_config.toml
similarity index 68%
rename from tests/test_data/configs/invalid_train_and_learncurve_config.toml
rename to tests/data_for_tests/configs/invalid_train_and_learncurve_config.toml
index 130e0f888..40d27e9a1 100644
--- a/tests/test_data/configs/invalid_train_and_learncurve_config.toml
+++ b/tests/data_for_tests/configs/invalid_train_and_learncurve_config.toml
@@ -1,6 +1,6 @@
 [PREP]
-data_dir = "./tests/test_data/source/cbins/gy6or6/032312"
-output_dir = "./tests/test_data/generated/prep/train/audio_cbin_annot_notmat"
+data_dir = "./tests/data_for_tests/source/cbins/gy6or6/032312"
+output_dir = "./tests/data_for_tests/generated/prep/train/audio_cbin_annot_notmat"
 audio_format = "cbin"
 annot_format = "notmat"
 labelset = "iabcdefghjk"
@@ -30,7 +30,7 @@ ckpt_step = 200
 patience = 4
 num_workers = 4
 device = "cuda"
-root_results_dir = "./tests/test_data/generated/results/train/audio_cbin_annot_notmat"
+root_results_dir = "./tests/data_for_tests/generated/results/train/audio_cbin_annot_notmat"
 
 [LEARNCURVE]
 models = 'TweetyNet'
@@ -44,7 +44,7 @@ num_workers = 4
 train_set_durs = [ 4, 6 ]
 num_replicates = 2
 device = "cuda"
-root_results_dir = './tests/test_data/generated/results/learncurve/audio_cbin_annot_notmat'
+root_results_dir = './tests/data_for_tests/generated/results/learncurve/audio_cbin_annot_notmat'
 
 [TweetyNet.optimizer]
 lr = 0.001
diff --git a/tests/data_for_tests/configs/teenytweetynet_eval_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/teenytweetynet_eval_audio_cbin_annot_notmat.toml
new file mode 100644
index 000000000..4d2567940
--- /dev/null
+++ b/tests/data_for_tests/configs/teenytweetynet_eval_audio_cbin_annot_notmat.toml
@@ -0,0 +1,30 @@
+[PREP]
+labelset = "iabcdefghjk"
+data_dir = "./tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032412"
+output_dir = "./tests/data_for_tests/generated/prep/eval/audio_cbin_annot_notmat"
+audio_format = "cbin"
+annot_format = "notmat"
+spect_output_dir = "./tests/data_for_tests/generated/prep/eval/audio_cbin_annot_notmat"
+
+[SPECT_PARAMS]
+fft_size = 512
+step_size = 64
+freq_cutoffs = [ 500, 10000,]
+thresh = 6.25
+transform_type = "log_spect"
+
+[DATALOADER]
+window_size = 44
+
+[EVAL]
+checkpoint_path = "~/Documents/repos/coding/birdsong/TeenyTweetyNet/results/BFSongRepository/gy6or6/results_200620_165308/TeenyTweetyNet/checkpoints/max-val-acc-checkpoint.pt"
+labelmap_path = "~/Documents/repos/coding/birdsong/TeenyTweetyNet/results/BFSongRepository/gy6or6/results_200620_165308/labelmap.json"
+models = "TeenyTweetyNet"
+batch_size = 4
+num_workers = 4
+device = "cuda"
+spect_scaler_path = "~/Documents/repos/coding/birdsong/TeenyTweetyNet/results/BFSongRepository/gy6or6/results_200620_165308/StandardizeSpect"
+output_dir = "./tests/data_for_tests/generated/results/eval/audio_cbin_annot_notmat/teenytweetynet"
+
+[TeenyTweetyNet.optimizer]
+lr = 0.001
diff --git a/tests/data_for_tests/configs/teenytweetynet_learncurve_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/teenytweetynet_learncurve_audio_cbin_annot_notmat.toml
new file mode 100644
index 000000000..ff85ca497
--- /dev/null
+++ b/tests/data_for_tests/configs/teenytweetynet_learncurve_audio_cbin_annot_notmat.toml
@@ -0,0 +1,37 @@
+[PREP]
+data_dir = './tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032312'
+output_dir = './tests/data_for_tests/generated/prep/learncurve/audio_cbin_annot_notmat'
+audio_format = 'cbin'
+annot_format = 'notmat'
+spect_output_dir = "./tests/data_for_tests/generated/prep/learncurve/audio_cbin_annot_notmat"
+labelset = 'iabcdefghjk'
+train_dur = 50
+val_dur = 15
+test_dur = 30
+
+[SPECT_PARAMS]
+fft_size = 512
+step_size = 64
+freq_cutoffs = [ 500, 10000 ]
+thresh = 6.25
+transform_type = 'log_spect'
+
+[DATALOADER]
+window_size = 44
+
+[LEARNCURVE]
+models = 'TeenyTweetyNet'
+normalize_spectrograms = true
+batch_size = 4
+num_epochs = 2
+val_step = 50
+ckpt_step = 200
+patience = 3
+num_workers = 4
+train_set_durs = [ 4, 6 ]
+num_replicates = 2
+device = "cuda"
+root_results_dir = './tests/data_for_tests/generated/results/learncurve/audio_cbin_annot_notmat/teenytweetynet'
+
+[TeenyTweetyNet]
+optimizer.lr = 0.001
diff --git a/tests/data_for_tests/configs/teenytweetynet_predict_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/teenytweetynet_predict_audio_cbin_annot_notmat.toml
new file mode 100644
index 000000000..8ecd749d2
--- /dev/null
+++ b/tests/data_for_tests/configs/teenytweetynet_predict_audio_cbin_annot_notmat.toml
@@ -0,0 +1,29 @@
+[PREP]
+data_dir = "./tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032412"
+output_dir = "./tests/data_for_tests/generated/prep/predict/audio_cbin_annot_notmat"
+audio_format = "cbin"
+spect_output_dir = "./tests/data_for_tests/generated/prep/predict/audio_cbin_annot_notmat"
+
+[SPECT_PARAMS]
+fft_size = 512
+step_size = 64
+freq_cutoffs = [ 500, 10000 ]
+thresh = 6.25
+transform_type = 'log_spect'
+
+[DATALOADER]
+window_size = 44
+
+[PREDICT]
+spect_scaler_path = "/home/user/results_181014_194418/spect_scaler"
+checkpoint_path = "~/Documents/repos/coding/birdsong/TeenyTweetyNet/results/BFSongRepository/bl26lb16/results_200620_164245/TeenyTweetyNet/checkpoints/max-val-acc-checkpoint.pt"
+labelmap_path = "~/Documents/repos/coding/birdsong/TeenyTweetyNet/results/BFSongRepository/bl26lb16/results_200620_164245/labelmap.json"
+models = "TeenyTweetyNet"
+batch_size = 4
+num_workers = 4
+device = "cuda"
+output_dir = "./tests/data_for_tests/generated/results/predict/audio_cbin_annot_notmat/teenytweetynet"
+annot_csv_filename = "bl26lb16.041912.annot.csv"
+
+[TeenyTweetyNet.optimizer]
+lr = 0.001
diff --git a/tests/data_for_tests/configs/teenytweetynet_predict_audio_wav_annot_koumura.toml b/tests/data_for_tests/configs/teenytweetynet_predict_audio_wav_annot_koumura.toml
new file mode 100644
index 000000000..432877e48
--- /dev/null
+++ b/tests/data_for_tests/configs/teenytweetynet_predict_audio_wav_annot_koumura.toml
@@ -0,0 +1,29 @@
+[PREP]
+data_dir = "./tests/data_for_tests/source/audio_wav_annot_koumura/Bird0"
+output_dir = "./tests/data_for_tests/generated/prep/predict/audio_wav_annot_koumura"
+audio_format = "wav"
+spect_output_dir = "./tests/data_for_tests/generated/prep/predict/audio_wav_annot_koumura"
+
+[SPECT_PARAMS]
+fft_size = 512
+step_size = 64
+freq_cutoffs = [ 500, 10000 ]
+thresh = 6.25
+transform_type = 'log_spect'
+
+[DATALOADER]
+window_size = 44
+
+[PREDICT]
+spect_scaler_path = "/home/user/results_181014_194418/spect_scaler"
+checkpoint_path = "~/Documents/repos/coding/birdsong/TeenyTweetyNet/results/BFSongRepository/bl26lb16/results_200620_164245/TeenyTweetyNet/checkpoints/max-val-acc-checkpoint.pt"
+labelmap_path = "~/Documents/repos/coding/birdsong/TeenyTweetyNet/results/BFSongRepository/bl26lb16/results_200620_164245/labelmap.json"
+models = "TeenyTweetyNet"
+batch_size = 4
+num_workers = 4
+device = "cuda"
+output_dir = "./tests/data_for_tests/generated/results/predict/audio_wav_annot_koumura/teenytweetynet"
+annot_csv_filename = "Bird0.annot.csv"
+
+[TeenyTweetyNet.optimizer]
+lr = 0.001
diff --git a/tests/data_for_tests/configs/teenytweetynet_train_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/teenytweetynet_train_audio_cbin_annot_notmat.toml
new file mode 100644
index 000000000..341af0c04
--- /dev/null
+++ b/tests/data_for_tests/configs/teenytweetynet_train_audio_cbin_annot_notmat.toml
@@ -0,0 +1,35 @@
+[PREP]
+data_dir = "./tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032312"
+output_dir = "./tests/data_for_tests/generated/prep/train/audio_cbin_annot_notmat"
+audio_format = "cbin"
+annot_format = "notmat"
+spect_output_dir = "./tests/data_for_tests/generated/prep/train/audio_cbin_annot_notmat"
+labelset = "iabcdefghjk"
+train_dur = 50
+val_dur = 15
+test_dur = 30
+
+[SPECT_PARAMS]
+fft_size=512
+step_size=64
+freq_cutoffs = [500, 10000]
+thresh = 6.25
+transform_type = "log_spect"
+
+[DATALOADER]
+window_size = 44
+
+[TRAIN]
+models = "TeenyTweetyNet"
+normalize_spectrograms = true
+batch_size = 4
+num_epochs = 2
+val_step = 50
+ckpt_step = 200
+patience = 3
+num_workers = 4
+device = "cuda"
+root_results_dir = "./tests/data_for_tests/generated/results/train/audio_cbin_annot_notmat/teenytweetynet"
+
+[TeenyTweetyNet.optimizer]
+lr = 0.001
diff --git a/tests/data_for_tests/configs/teenytweetynet_train_audio_wav_annot_koumura.toml b/tests/data_for_tests/configs/teenytweetynet_train_audio_wav_annot_koumura.toml
new file mode 100644
index 000000000..7306ee7bf
--- /dev/null
+++ b/tests/data_for_tests/configs/teenytweetynet_train_audio_wav_annot_koumura.toml
@@ -0,0 +1,36 @@
+[PREP]
+labelset = '012345678'
+data_dir = "./tests/data_for_tests/source/audio_wav_annot_koumura/Bird0"
+output_dir = "./tests/data_for_tests/generated/prep/train/audio_wav_annot_koumura"
+audio_format = "wav"
+spect_output_dir = "./tests/data_for_tests/generated/prep/train/audio_wav_annot_koumura"
+annot_format = "koumura"
+annot_file = "./tests/data_for_tests/source/audio_wav_annot_koumura/Bird0/Annotation.xml"
+test_dur = 50
+train_dur = 15
+val_dur = 30
+
+[SPECT_PARAMS]
+fft_size = 512
+step_size = 64
+freq_cutoffs = [500, 10000,]
+thresh = 6.25
+transform_type = "log_spect"
+
+[DATALOADER]
+window_size = 44
+
+[TRAIN]
+models = "TeenyTweetyNet"
+normalize_spectrograms = true
+batch_size = 4
+num_epochs = 2
+val_step = 50
+ckpt_step = 200
+patience = 3
+num_workers = 4
+device = "cuda"
+root_results_dir = "./tests/data_for_tests/generated/results/train/audio_wav_annot_koumura/teenytweetynet"
+
+[TeenyTweetyNet.optimizer]
+lr = 0.001
diff --git a/tests/data_for_tests/configs/teenytweetynet_train_spect_mat_annot_yarden.toml b/tests/data_for_tests/configs/teenytweetynet_train_spect_mat_annot_yarden.toml
new file mode 100644
index 000000000..db5a720cd
--- /dev/null
+++ b/tests/data_for_tests/configs/teenytweetynet_train_spect_mat_annot_yarden.toml
@@ -0,0 +1,34 @@
+[PREP]
+data_dir = "./tests/data_for_tests/source/spect_mat_annot_yarden/llb3/spect"
+output_dir = "./tests/data_for_tests/generated/prep/train/spect_mat_annot_yarden"
+spect_format = "mat"
+annot_format = "yarden"
+annot_file = "./tests/data_for_tests/source/spect_mat_annot_yarden/llb3/llb3_annot_subset.mat"
+labelset = "range: 1-3,6-14,17-19"
+train_dur = 213
+val_dur = 213
+
+[SPECT_PARAMS]
+fft_size = 512
+step_size = 64
+freq_cutoffs = [500, 10000]
+thresh = 6.25
+transform_type = "log_spect"
+
+[DATALOADER]
+window_size = 44
+
+[TRAIN]
+models = "TeenyTweetyNet"
+normalize_spectrograms = false
+batch_size = 4
+num_epochs = 2
+val_step = 50
+ckpt_step = 200
+patience = 3
+num_workers = 4
+device = "cuda"
+root_results_dir = "./tests/data_for_tests/generated/results/train/spect_mat_annot_yarden/teenytweetynet"
+
+[TeenyTweetyNet.optimizer]
+lr = 0.001
diff --git a/tests/test_data/configs/test_eval_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/tweetynet_eval_audio_cbin_annot_notmat.toml
similarity index 69%
rename from tests/test_data/configs/test_eval_audio_cbin_annot_notmat.toml
rename to tests/data_for_tests/configs/tweetynet_eval_audio_cbin_annot_notmat.toml
index 465d46b7b..601295acb 100644
--- a/tests/test_data/configs/test_eval_audio_cbin_annot_notmat.toml
+++ b/tests/data_for_tests/configs/tweetynet_eval_audio_cbin_annot_notmat.toml
@@ -1,10 +1,10 @@
 [PREP]
 labelset = "iabcdefghjk"
-data_dir = "./tests/test_data/source/audio_cbin_annot_notmat/gy6or6/032412"
-output_dir = "./tests/test_data/generated/prep/eval/audio_cbin_annot_notmat"
+data_dir = "./tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032412"
+output_dir = "./tests/data_for_tests/generated/prep/eval/audio_cbin_annot_notmat"
 audio_format = "cbin"
 annot_format = "notmat"
-spect_output_dir = "./tests/test_data/generated/prep/eval/audio_cbin_annot_notmat"
+spect_output_dir = "./tests/data_for_tests/generated/prep/eval/audio_cbin_annot_notmat"
 
 [SPECT_PARAMS]
 fft_size = 512
@@ -24,7 +24,7 @@ batch_size = 11
 num_workers = 4
 device = "cuda"
 spect_scaler_path = "~/Documents/repos/coding/birdsong/tweetynet/results/BFSongRepository/gy6or6/results_200620_165308/StandardizeSpect"
-output_dir = "./tests/test_data/generated/results/eval/audio_cbin_annot_notmat"
+output_dir = "./tests/data_for_tests/generated/results/eval/audio_cbin_annot_notmat/tweetynet"
 
 [TweetyNet.optimizer]
 lr = 0.001
diff --git a/tests/test_data/configs/test_learncurve_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/tweetynet_learncurve_audio_cbin_annot_notmat.toml
similarity index 58%
rename from tests/test_data/configs/test_learncurve_audio_cbin_annot_notmat.toml
rename to tests/data_for_tests/configs/tweetynet_learncurve_audio_cbin_annot_notmat.toml
index 85df5935d..d4e2766ba 100644
--- a/tests/test_data/configs/test_learncurve_audio_cbin_annot_notmat.toml
+++ b/tests/data_for_tests/configs/tweetynet_learncurve_audio_cbin_annot_notmat.toml
@@ -1,9 +1,9 @@
 [PREP]
-data_dir = './tests/test_data/source/audio_cbin_annot_notmat/gy6or6/032312'
-output_dir = './tests/test_data/generated/prep/learncurve/audio_cbin_annot_notmat'
+data_dir = './tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032312'
+output_dir = './tests/data_for_tests/generated/prep/learncurve/audio_cbin_annot_notmat'
 audio_format = 'cbin'
 annot_format = 'notmat'
-spect_output_dir = "./tests/test_data/generated/prep/learncurve/audio_cbin_annot_notmat"
+spect_output_dir = "./tests/data_for_tests/generated/prep/learncurve/audio_cbin_annot_notmat"
 labelset = 'iabcdefghjk'
 train_dur = 50
 val_dur = 15
@@ -31,7 +31,7 @@ num_workers = 4
 train_set_durs = [ 4, 6 ]
 num_replicates = 2
 device = "cuda"
-root_results_dir = './tests/test_data/generated/results/learncurve/audio_cbin_annot_notmat'
+root_results_dir = './tests/data_for_tests/generated/results/learncurve/audio_cbin_annot_notmat/tweetynet'
 
 [TweetyNet]
 optimizer.lr = 0.001
diff --git a/tests/test_data/configs/test_predict_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/tweetynet_predict_audio_cbin_annot_notmat.toml
similarity index 66%
rename from tests/test_data/configs/test_predict_audio_cbin_annot_notmat.toml
rename to tests/data_for_tests/configs/tweetynet_predict_audio_cbin_annot_notmat.toml
index e04cbac25..221b6eb24 100644
--- a/tests/test_data/configs/test_predict_audio_cbin_annot_notmat.toml
+++ b/tests/data_for_tests/configs/tweetynet_predict_audio_cbin_annot_notmat.toml
@@ -1,8 +1,8 @@
 [PREP]
-data_dir = "./tests/test_data/source/audio_cbin_annot_notmat/gy6or6/032412"
-output_dir = "./tests/test_data/generated/prep/predict/audio_cbin_annot_notmat"
+data_dir = "./tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032412"
+output_dir = "./tests/data_for_tests/generated/prep/predict/audio_cbin_annot_notmat"
 audio_format = "cbin"
-spect_output_dir = "./tests/test_data/generated/prep/predict/audio_cbin_annot_notmat"
+spect_output_dir = "./tests/data_for_tests/generated/prep/predict/audio_cbin_annot_notmat"
 
 [SPECT_PARAMS]
 fft_size = 512
@@ -22,7 +22,7 @@ models = "TweetyNet"
 batch_size = 11
 num_workers = 4
 device = "cuda"
-output_dir = "./tests/test_data/generated/results/predict/audio_cbin_annot_notmat"
+output_dir = "./tests/data_for_tests/generated/results/predict/audio_cbin_annot_notmat/tweetynet"
 annot_csv_filename = "bl26lb16.041912.annot.csv"
 
 [TweetyNet.optimizer]
diff --git a/tests/test_data/configs/test_predict_audio_wav_annot_koumura.toml b/tests/data_for_tests/configs/tweetynet_predict_audio_wav_annot_koumura.toml
similarity index 66%
rename from tests/test_data/configs/test_predict_audio_wav_annot_koumura.toml
rename to tests/data_for_tests/configs/tweetynet_predict_audio_wav_annot_koumura.toml
index 2ff8df2bf..d96dbdd05 100644
--- a/tests/test_data/configs/test_predict_audio_wav_annot_koumura.toml
+++ b/tests/data_for_tests/configs/tweetynet_predict_audio_wav_annot_koumura.toml
@@ -1,8 +1,8 @@
 [PREP]
-data_dir = "./tests/test_data/source/audio_wav_annot_koumura/Bird0"
-output_dir = "./tests/test_data/generated/prep/predict/audio_wav_annot_koumura"
+data_dir = "./tests/data_for_tests/source/audio_wav_annot_koumura/Bird0"
+output_dir = "./tests/data_for_tests/generated/prep/predict/audio_wav_annot_koumura"
 audio_format = "wav"
-spect_output_dir = "./tests/test_data/generated/prep/predict/audio_wav_annot_koumura"
+spect_output_dir = "./tests/data_for_tests/generated/prep/predict/audio_wav_annot_koumura"
 
 [SPECT_PARAMS]
 fft_size = 512
@@ -22,7 +22,7 @@ models = "TweetyNet"
 batch_size = 11
 num_workers = 4
 device = "cuda"
-output_dir = "./tests/test_data/generated/results/predict/audio_wav_annot_koumura"
+output_dir = "./tests/data_for_tests/generated/results/predict/audio_wav_annot_koumura/tweetynet"
 annot_csv_filename = "Bird0.annot.csv"
 
 [TweetyNet.optimizer]
diff --git a/tests/test_data/configs/test_train_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/tweetynet_train_audio_cbin_annot_notmat.toml
similarity index 54%
rename from tests/test_data/configs/test_train_audio_cbin_annot_notmat.toml
rename to tests/data_for_tests/configs/tweetynet_train_audio_cbin_annot_notmat.toml
index 2429a8f66..9a93ac5bb 100644
--- a/tests/test_data/configs/test_train_audio_cbin_annot_notmat.toml
+++ b/tests/data_for_tests/configs/tweetynet_train_audio_cbin_annot_notmat.toml
@@ -1,9 +1,9 @@
 [PREP]
-data_dir = "./tests/test_data/source/audio_cbin_annot_notmat/gy6or6/032312"
-output_dir = "./tests/test_data/generated/prep/train/audio_cbin_annot_notmat"
+data_dir = "./tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032312"
+output_dir = "./tests/data_for_tests/generated/prep/train/audio_cbin_annot_notmat"
 audio_format = "cbin"
 annot_format = "notmat"
-spect_output_dir = "./tests/test_data/generated/prep/train/audio_cbin_annot_notmat"
+spect_output_dir = "./tests/data_for_tests/generated/prep/train/audio_cbin_annot_notmat"
 labelset = "iabcdefghjk"
 train_dur = 50
 val_dur = 15
@@ -29,7 +29,7 @@ ckpt_step = 200
 patience = 4
 num_workers = 4
 device = "cuda"
-root_results_dir = "./tests/test_data/generated/results/train/audio_cbin_annot_notmat"
+root_results_dir = "./tests/data_for_tests/generated/results/train/audio_cbin_annot_notmat/tweetynet"
 
 [TweetyNet.optimizer]
 lr = 0.001
diff --git a/tests/test_data/configs/test_train_audio_wav_annot_koumura.toml b/tests/data_for_tests/configs/tweetynet_train_audio_wav_annot_koumura.toml
similarity index 51%
rename from tests/test_data/configs/test_train_audio_wav_annot_koumura.toml
rename to tests/data_for_tests/configs/tweetynet_train_audio_wav_annot_koumura.toml
index f9536d139..3fd42fa65 100644
--- a/tests/test_data/configs/test_train_audio_wav_annot_koumura.toml
+++ b/tests/data_for_tests/configs/tweetynet_train_audio_wav_annot_koumura.toml
@@ -1,11 +1,11 @@
 [PREP]
 labelset = '012345678'
-data_dir = "./tests/test_data/source/audio_wav_annot_koumura/Bird0"
-output_dir = "./tests/test_data/generated/prep/train/audio_wav_annot_koumura"
+data_dir = "./tests/data_for_tests/source/audio_wav_annot_koumura/Bird0"
+output_dir = "./tests/data_for_tests/generated/prep/train/audio_wav_annot_koumura"
 audio_format = "wav"
-spect_output_dir = "./tests/test_data/generated/prep/train/audio_wav_annot_koumura"
+spect_output_dir = "./tests/data_for_tests/generated/prep/train/audio_wav_annot_koumura"
 annot_format = "koumura"
-annot_file = "./tests/test_data/source/audio_wav_annot_koumura/Bird0/Annotation.xml"
+annot_file = "./tests/data_for_tests/source/audio_wav_annot_koumura/Bird0/Annotation.xml"
 test_dur = 50
 train_dur = 15
 val_dur = 30
@@ -30,7 +30,7 @@ ckpt_step = 200
 patience = 4
 num_workers = 4
 device = "cuda"
-root_results_dir = "./tests/test_data/generated/results/train/audio_wav_annot_koumura"
+root_results_dir = "./tests/data_for_tests/generated/results/train/audio_wav_annot_koumura/tweetynet"
 
 [TweetyNet.optimizer]
 lr = 0.001
diff --git a/tests/test_data/configs/test_train_spect_mat_annot_yarden.toml b/tests/data_for_tests/configs/tweetynet_train_spect_mat_annot_yarden.toml
similarity index 56%
rename from tests/test_data/configs/test_train_spect_mat_annot_yarden.toml
rename to tests/data_for_tests/configs/tweetynet_train_spect_mat_annot_yarden.toml
index 724e9ce86..f3d73b24a 100644
--- a/tests/test_data/configs/test_train_spect_mat_annot_yarden.toml
+++ b/tests/data_for_tests/configs/tweetynet_train_spect_mat_annot_yarden.toml
@@ -1,9 +1,9 @@
 [PREP]
-data_dir = "./tests/test_data/source/spect_mat_annot_yarden/llb3/spect"
-output_dir = "./tests/test_data/generated/prep/train/spect_mat_annot_yarden"
+data_dir = "./tests/data_for_tests/source/spect_mat_annot_yarden/llb3/spect"
+output_dir = "./tests/data_for_tests/generated/prep/train/spect_mat_annot_yarden"
 spect_format = "mat"
 annot_format = "yarden"
-annot_file = "./tests/test_data/source/spect_mat_annot_yarden/llb3/llb3_annot_subset.mat"
+annot_file = "./tests/data_for_tests/source/spect_mat_annot_yarden/llb3/llb3_annot_subset.mat"
 labelset = "range: 1-3,6-14,17-19"
 train_dur = 213
 val_dur = 213
@@ -28,7 +28,7 @@ ckpt_step = 200
 patience = 4
 num_workers = 4
 device = "cuda"
-root_results_dir = "./tests/test_data/generated/results/train/spect_mat_annot_yarden"
+root_results_dir = "./tests/data_for_tests/generated/results/train/spect_mat_annot_yarden/tweetynet"
 
 [TweetyNet.optimizer]
 lr = 0.001
diff --git a/tests/test_data/generated/.gitkeep b/tests/data_for_tests/generated/.gitkeep
similarity index 100%
rename from tests/test_data/generated/.gitkeep
rename to tests/data_for_tests/generated/.gitkeep
diff --git a/tests/test_data/source/.gitkeep b/tests/data_for_tests/source/.gitkeep
similarity index 100%
rename from tests/test_data/source/.gitkeep
rename to tests/data_for_tests/source/.gitkeep
diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py
index 4642b04fb..eadbad0c2 100644
--- a/tests/fixtures/__init__.py
+++ b/tests/fixtures/__init__.py
@@ -3,6 +3,7 @@
 from .config import *
 from .dataframe import *
 from .device import *
+from .model import *
 from .path import *
 from .spect import *
 from .split import *
diff --git a/tests/fixtures/config.py b/tests/fixtures/config.py
index a18a70c8f..468285197 100644
--- a/tests/fixtures/config.py
+++ b/tests/fixtures/config.py
@@ -8,11 +8,12 @@
 
 @pytest.fixture
 def test_configs_root(test_data_root):
-    """Path that points to test_data/configs
+    """Path that points to data_for_tests/configs
 
     Two types of config files in this directory:
-    1) those used by the src/scripts/test_data/test_data_generate.py script.
-       All configs that start with ``test_`` prefix.
+    1) those used by the tests/scripts/generate_data_for_tests.py script.
+       Will be listed in configs.json. See ``specific_config`` fixture below
+       for details about types of configs.
     2) those used by tests that are static, e.g., ``invalid_section_config.toml``
 
     This fixture facilitates access to type (2), e.g. in test_config/test_parse
@@ -72,7 +73,7 @@ def generated_test_configs_root(generated_test_data_root):
 # ---- path to config files ----
 @pytest.fixture
 def all_generated_configs(generated_test_configs_root):
-    return sorted(generated_test_configs_root.glob('test*toml'))
+    return sorted(generated_test_configs_root.glob('*toml'))
 
 
 @pytest.fixture
@@ -98,6 +99,7 @@ def specific_config(generated_test_configs_root,
     e.g. to the ``tmp_path`` fixture used by unit tests
     """
     def _specific_config(config_type,
+                         model,
                          annot_format,
                          audio_format=None,
                          spect_format=None,
@@ -131,6 +133,7 @@ def _specific_config(config_type,
             if all(
                 [
                     schematized_config['config_type'] == config_type,
+                    schematized_config['model'] == model,
                     schematized_config['annot_format'] == annot_format,
                     schematized_config['audio_format'] == audio_format,
                     schematized_config['spect_format'] == spect_format,
@@ -208,12 +211,14 @@ def specific_config_toml(specific_config):
     `config_type`, `audio_format`, `spect_format`, `annot_format`
     """
     def _specific_config_toml(config_type,
+                              model,
                               annot_format,
                               audio_format=None,
                               spect_format=None,
                               ):
         config_path = specific_config(
             config_type,
+            model,
             annot_format,
             audio_format,
             spect_format
diff --git a/tests/fixtures/dataframe.py b/tests/fixtures/dataframe.py
index 76ce3029f..3176c86ca 100644
--- a/tests/fixtures/dataframe.py
+++ b/tests/fixtures/dataframe.py
@@ -13,12 +13,14 @@ def specific_csv_path(specific_config_toml):
     `config_type`, `audio_format`, `spect_format`, `annot_format`
     """
     def _specific_csv_path(config_type,
+                           model,
                            annot_format,
                            audio_format=None,
                            spect_format=None,
                            ):
         config_toml = specific_config_toml(
             config_type,
+            model,
             annot_format,
             audio_format,
             spect_format
@@ -38,12 +40,14 @@ def specific_dataframe(specific_csv_path):
     `config_type`, `audio_format`, `spect_format`, `annot_format`
     """
     def _specific_dataframe(config_type,
-                           annot_format,
-                           audio_format=None,
-                           spect_format=None,
-                           ):
+                            model,
+                            annot_format,
+                            audio_format=None,
+                            spect_format=None,
+                            ):
         csv_path = specific_csv_path(
             config_type,
+            model,
             annot_format,
             audio_format,
             spect_format
diff --git a/tests/fixtures/model.py b/tests/fixtures/model.py
new file mode 100644
index 000000000..c9ddab49f
--- /dev/null
+++ b/tests/fixtures/model.py
@@ -0,0 +1,12 @@
+import pytest
+
+
+@pytest.fixture
+def default_model():
+    """default model used whenever a model is needed to run a test.
+    Should work regardless of where the test is run, i.e. both on
+    CI platform and locally.
+
+    currently ``teenytweetynet``
+    """
+    return 'teenytweetynet'
diff --git a/tests/fixtures/path.py b/tests/fixtures/path.py
index 3be3c3a51..531e88819 100644
--- a/tests/fixtures/path.py
+++ b/tests/fixtures/path.py
@@ -8,13 +8,17 @@
 
 
 @pytest.fixture
-def previous_run_path(generated_test_data_root):
-    learncurve_results_root = generated_test_data_root.joinpath(
-        'results/learncurve/audio_cbin_annot_notmat'
-    )
-    results_dirs = sorted(learncurve_results_root.glob(f'{RESULTS_DIR_PREFIX}*'))
-    assert len(results_dirs) >= 1
-    return results_dirs[-1]
+def previous_run_path_factory(generated_test_data_root):
+
+    def _previous_run_path(model):
+        learncurve_results_root = generated_test_data_root.joinpath(
+            f'results/learncurve/audio_cbin_annot_notmat/{model}'
+        )
+        results_dirs = sorted(learncurve_results_root.glob(f'{RESULTS_DIR_PREFIX}*'))
+        assert len(results_dirs) >= 1
+        return results_dirs[-1]
+
+    return _previous_run_path
 
 
 @pytest.fixture
diff --git a/tests/fixtures/test_data.py b/tests/fixtures/test_data.py
index c5c786c85..d95c50b7d 100644
--- a/tests/fixtures/test_data.py
+++ b/tests/fixtures/test_data.py
@@ -7,8 +7,8 @@
 
 @pytest.fixture
 def test_data_root():
-    """Path that points to root of test_data directory"""
-    return HERE.joinpath('..', 'test_data')
+    """Path that points to root of data_for_tests directory"""
+    return HERE.joinpath('..', 'data_for_tests')
 
 
 @pytest.fixture
diff --git a/src/scripts/test_data/fix_prep_csv_paths.py b/tests/scripts/fix_prep_csv_paths.py
similarity index 85%
rename from src/scripts/test_data/fix_prep_csv_paths.py
rename to tests/scripts/fix_prep_csv_paths.py
index 0514d70d4..7af686821 100644
--- a/src/scripts/test_data/fix_prep_csv_paths.py
+++ b/tests/scripts/fix_prep_csv_paths.py
@@ -3,9 +3,9 @@
 import pandas as pd
 
 HERE = Path(__file__).parent
-PROJ_ROOT = HERE / '..' / '..' / '..'
-PROJ_ROOT_ABS = PROJ_ROOT.resolve()
-GENERATED_TEST_DATA = PROJ_ROOT / 'tests' / 'test_data' / 'generated'
+PROJ_ROOT = HERE / '..' / '..'
+PROJ_ROOT_ABS = PROJ_ROOT.resolve()  # <- used to fix paths!!!
+GENERATED_TEST_DATA = PROJ_ROOT / 'tests' / 'data_for_tests' / 'generated'
 
 
 def main():
diff --git a/tests/scripts/generate_data_for_tests.py b/tests/scripts/generate_data_for_tests.py
new file mode 100644
index 000000000..8f203030d
--- /dev/null
+++ b/tests/scripts/generate_data_for_tests.py
@@ -0,0 +1,301 @@
+"""script run by Makefile test-data-generate command
+
+makes all the 'generated' test data, i.e. files created by vak,
+It's called 'generated' test data to distinguish it from the
+'source' test data, i.e., files **not** created by vak, that is,
+the input data used when vak does create files (csv files, logs,
+neural network checkpoints, etc.)
+
+This script generates:
+* temporary config.toml files used when generating results
+* `prep`d (prepared) datasets, and results created with those datasets,
+  both of which were generated using the temporary config.toml files
+
+all the setup configs send output to one of two places:
+for any prep command, the output goes to some child directory of ./tests/data_for_tests/generated/prep
+for any command run with a `prep`d dataset, the output goes to some child dir of ./tests/data_for_tests/generated/results
+
+examples:
+    when we run `vak prep tests/data_for_tests/generated/configs/test_train_audio_wav_annot_koumura.toml`
+    the `prep`d dataset will be in a new directory created in
+    `./tests/data_for_tests/generated/prep/train/audio_wav_annot_koumura`
+
+    when we run `vak train tests/data_for_tests/genereated/configs/test_train_audio_wav_annot_koumura.toml`
+    it will use the `prep`d dataset csv that's now in
+    `./tests/data_for_tests/generated/prep/train/audio_wav_annot_koumura`, that the temporary config.toml points to,
+    and the results will go to a new directory created in
+    `./tests/data_for_tests/generated/results/train/audio_wav_annot_koumura`
+
+To set up this directory structure, we iterate through the constants defined below.
+
+The constants are:
+- TOP_LEVEL_DIRS
+    name of sub-directories in .tests/data_for_tests/generated that correspond to
+    either tempory config files, 'prep'd datasets, or `results` generated from those `prep`d datasets
+- COMMAND_DIRS
+    names of sub-sub-directories in ./tests/data_for_tests/generated/prep &
+    ./tests/data_for_tests/generated/results that correspond to cli commands;
+    e.g., dataset from running `vak prep $A_TRAIN_CONFIG.TOML` will be in
+    ./tests/data_for_tests/generated/prep/train/audio_{}_annot_{}
+- DATA_DIRS
+    names of "subsub"directories in ./tests/data_for_tests/$COMMAND that correspond to toy test data sets;
+    those sub-directories contain test data generated by $COMMAND using the specified toy test data set
+
+In other words, the parent directory for all the generated directories we need to remove
+will have a path of the form: `./tests/data_for_tests/$TOP_LEVEL_DIRS/COMMAND_DIRS/DATA_DIRS`.
+For example:
+The prep`d dataset from running `vak prep $AUDIO_CBIN_ANNOT_NOTMAT_CONFIG.TOML` will be in
+`./tests/data_for_tests/prep/train/audio_cbin_annot_notmat`
+and corresponding results will be in
+`./tests/data_for_tests/results/train/audio_cbin_annot_notmat`
+The directories will have names with timestamps like `prep_20201015_1115`.
+Those are the generated directories we want to remove.
+"""
+from pathlib import Path
+import shutil
+
+import toml
+import vak
+
+HERE = Path(__file__).parent
+TEST_DATA_ROOT = HERE / '..' / 'data_for_tests'
+GENERATED_TEST_DATA = TEST_DATA_ROOT / 'generated'
+GENERATED_TEST_CONFIGS_ROOT = GENERATED_TEST_DATA / 'configs'
+
+# convention is that all the config.toml files in tests/data_for_tests/configs
+# that should be run when generating test data
+# have filenames of the form `{MODEL}_{COMMAND}_audio_{FORMAT}_annot_{FORMAT}.toml'
+# **or** `{MODEL}_{COMMAND}_spect_{FORMAT}_annot_{FORMAT}_config.ini'
+# e.g., 'tweetynet_learncurve_audio_cbin_annot_notmat.toml'.
+# Below, we iterate over model names
+# so glob doesn't pick up static configs that are just used for testing,
+# like 'invalid_option_config.toml`
+TEST_CONFIGS_ROOT = TEST_DATA_ROOT.joinpath('configs')
+CONFIGS_TO_RUN = []
+MODELS = ('teenytweetynet', 'tweetynet')
+for model in MODELS:
+    CONFIGS_TO_RUN.extend(sorted(TEST_CONFIGS_ROOT.glob(f'{model}*.toml')))
+
+# the sub-directories that will get made inside `./tests/data_for_tests/generated`
+TOP_LEVEL_DIRS = [
+    'configs',
+    'prep',
+    'results',
+]
+
+# these sub-dirs get made in each of the TOP_LEVEL_DIRS (except for 'configs')
+COMMAND_DIRS = [
+    'eval',
+    'learncurve',
+    'predict',
+    'train',
+]
+
+# these sub-dirs get made in each of the COMMAND_DIRS (except for 'configs')
+DATA_DIRS = [
+    'audio_cbin_annot_notmat',
+    'audio_wav_annot_koumura',
+    'spect_mat_annot_yarden',
+]
+
+
+def make_subdirs_in_generated():
+    """make sub-directories inside ./tests/data_for_tests/generated
+
+    first thing that has to get done before copying configs and
+    then using those configs to generate results
+
+    makes three directories in data_for_tests/generated:
+    configs, prep, and results.
+    prep has one sub-directory for every data "type".
+    results does also, but in addition will have sub-directories
+    within those for models.
+    """
+    for top_level_dir in TOP_LEVEL_DIRS:
+        if top_level_dir == 'configs':
+            subdir_to_make = GENERATED_TEST_DATA / top_level_dir
+            subdir_to_make.mkdir(parents=True)
+        else:
+            for command_dir in COMMAND_DIRS:
+                for data_dir in DATA_DIRS:
+                    if top_level_dir == 'prep':
+                        subdir_to_make = GENERATED_TEST_DATA / top_level_dir / command_dir / data_dir
+                        subdir_to_make.mkdir(parents=True)
+                    else:
+                        for model in MODELS:
+                            subdir_to_make = GENERATED_TEST_DATA / top_level_dir / command_dir / data_dir / model
+                            subdir_to_make.mkdir(parents=True)
+
+
+def copy_config_files():
+    """copy config files from setup to data_for_tests/configs
+
+    the copied files are the ones that get modified when this setup script runs,
+    while the originals in this directory remain unchanged.
+    """
+    copied_configs = []
+
+    for toml_path in CONFIGS_TO_RUN:
+        if not toml_path.exists():
+            raise FileNotFoundError(
+                f'{toml_path} not found')
+
+        dst = GENERATED_TEST_CONFIGS_ROOT.joinpath(toml_path.name)
+        print(f"\tcopying to {dst}")
+        shutil.copy(src=toml_path, dst=dst)
+        copied_configs.append(dst)
+
+    return copied_configs
+
+
+def run_prep(config_paths):
+    """run ``vak prep`` to generate data for testing"""
+    for config_path in config_paths:
+        if not config_path.exists():
+            raise FileNotFoundError(
+                f'{config_path} not found')
+        print(f"running vak prep to generate data for tests test, using config: {config_path.name}")
+        vak.cli.prep.prep(toml_path=config_path)
+
+
+def fix_options_in_configs(config_paths, command):
+    """fix values assigned to options in predict and eval configs
+
+    Need to do this because both predict and eval configs have options
+    that can only be assigned *after* running the corresponding `train` config
+    """
+    # split configs into train and predict or eval configs
+    configs_to_fix = [config for config in config_paths if command in config.name]
+    train_configs = [config for config in config_paths if 'train' in config.name]
+
+    for config_to_fix in configs_to_fix:
+        # figure out which 'train' config corresponds to this 'predict' or 'eval' config
+        # by using 'suffix' of config file names. `train` suffix will match `predict`/'eval' suffix
+        prefix, suffix = config_to_fix.name.split(command)
+        train_config_to_use = []
+        for train_config in train_configs:
+            train_prefix, train_suffix = train_config.name.split('train')
+            if train_suffix == suffix:
+                train_config_to_use.append(train_config)
+        if len(train_config_to_use) != 1:
+            raise ValueError(
+                f'did not find just a single train config that matches with predict config:\n'
+                f'{config_to_fix}'
+                f'Matches were: {train_config_to_use}'
+            )
+        train_config_to_use = train_config_to_use[0]
+
+        # now use the config to find the results dir and get the values for the options we need to set
+        # which are checkpoint_path, spect_scaler_path, and labelmap_path
+        with train_config_to_use.open('r') as fp:
+            train_config_toml = toml.load(fp)
+        root_results_dir = Path(train_config_toml['TRAIN']['root_results_dir'])
+        results_dir = sorted(root_results_dir.glob('results_*'))
+        if len(results_dir) != 1:
+            raise ValueError(
+                f'did not find just a single results directory in root_results_dir from train_config:\n'
+                f'{train_config_to_use}'
+                f'root_results_dir was: {root_results_dir}'
+                f'Matches for "results_*" were: {results_dir}'
+            )
+        results_dir = results_dir[0]
+        # these are the only options whose values we need to change
+        # and they are the same for both predict and eval
+        checkpoint_path = sorted(results_dir.glob('**/checkpoints/checkpoint.pt'))[0]
+        spect_scaler_path = sorted(results_dir.glob('StandardizeSpect'))[0]
+        labelmap_path = sorted(results_dir.glob('labelmap.json'))[0]
+
+        # now add these values to corresponding options in predict / eval config
+        with config_to_fix.open('r') as fp:
+            config_toml = toml.load(fp)
+        config_toml[command.upper()]['checkpoint_path'] = str(checkpoint_path)
+        config_toml[command.upper()]['spect_scaler_path'] = str(spect_scaler_path)
+        config_toml[command.upper()]['labelmap_path'] = str(labelmap_path)
+        with config_to_fix.open('w') as fp:
+            toml.dump(config_toml, fp)
+
+
+# need to run 'train' config before we run 'predict'
+# so we can add checkpoints, etc., from training to predict
+COMMANDS = (
+    'train',
+    'learncurve',
+    'eval',
+    'predict',
+)
+
+
+def main():
+    print('making sub-directories in ./tests/data_for_tests/generated/ where files generated by `vak` will go')
+    make_subdirs_in_generated()
+
+    print('copying config files run to generate test data from ./tests/data_for_tests/configs to '
+          './tests/data_for_tests/generated/configs')
+    config_paths = copy_config_files()
+
+    print(
+        f'will generate test data from these config files: {config_paths}'
+    )
+
+    # ---- only need to run prep once, since prep'd data is the same regardless of model ----
+    prep_config_paths = [config_path
+                         for config_path in config_paths
+                         if config_path.name.startswith(MODELS[0])]
+    run_prep(config_paths=prep_config_paths)
+    # now add the prep csv from those configs to the corresponding config
+    # from all the other models
+    for model in MODELS[1:]:
+        model_config_paths = [config_path
+                              for config_path in config_paths
+                              if config_path.name.startswith(model)]
+        for model_config_path in model_config_paths:
+            # we want the same prep config for MODEL[0] which will have the
+            # exact same name, but with a different model name as the "prefix"
+            stem_minus_model = model_config_path.stem.replace(model, '')
+            prep_config_path = [prep_config_path
+                                for prep_config_path in prep_config_paths
+                                if prep_config_path.stem.endswith(stem_minus_model)]
+            assert len(prep_config_path) == 1
+            prep_config_path = prep_config_path[0]
+            with prep_config_path.open('r') as fp:
+                prep_config_toml = toml.load(fp)
+            with model_config_path.open('r') as fp:
+                model_config_toml = toml.load(fp)
+            # find the section that `vak prep` added the `csv_path` to,
+            # and set `csv_path` for model config to the same value in
+            # the same section for this model config
+            for section_name, options_dict in prep_config_toml.items():
+                if 'csv_path' in options_dict:
+                    model_config_toml[section_name]['csv_path'] = options_dict['csv_path']
+            with model_config_path.open('w') as fp:
+                toml.dump(model_config_toml, fp)
+
+    for model in MODELS:
+        for command in COMMANDS:
+            if command == 'prep':
+                continue  # already ran 'prep'
+            print(
+                f'running configs for command: {command}'
+            )
+            command_config_paths = [config_path
+                                    for config_path in config_paths
+                                    if config_path.name.startswith(model) and command in config_path.name]
+            print(
+                f'using the following configs:\n{command_config_paths}'
+            )
+            if command == 'predict' or command == 'eval':
+                # fix values for required options in predict / eval configs
+                # using results from running the corresponding train configs.
+                # this only works if we ran the train configs already,
+                # which we should have because of ordering of COMMANDS constant above
+                copied_config_paths_this_model = [config_path
+                                                  for config_path in config_paths
+                                                  if config_path.name.startswith(model)]
+                fix_options_in_configs(copied_config_paths_this_model, command)
+
+            for config_path in command_config_paths:
+                vak.cli.cli.cli(command, config_path)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tests/test_cli/test_eval.py b/tests/test_cli/test_eval.py
index 73cfa373f..d4cbec8f0 100644
--- a/tests/test_cli/test_eval.py
+++ b/tests/test_cli/test_eval.py
@@ -21,6 +21,7 @@ def test_eval(audio_format,
               annot_format,
               specific_config,
               tmp_path,
+              model,
               device):
     output_dir = tmp_path.joinpath(f'test_eval_{audio_format}_{spect_format}_{annot_format}')
     output_dir.mkdir()
@@ -35,6 +36,7 @@ def test_eval(audio_format,
     ]
 
     toml_path = specific_config(config_type='eval',
+                                model=model,
                                 audio_format=audio_format,
                                 annot_format=annot_format,
                                 spect_format=spect_format,
diff --git a/tests/test_cli/test_learncurve.py b/tests/test_cli/test_learncurve.py
index f61a94324..e4175cd99 100644
--- a/tests/test_cli/test_learncurve.py
+++ b/tests/test_cli/test_learncurve.py
@@ -11,6 +11,7 @@
 
 def test_learncurve(specific_config,
                     tmp_path,
+                    model,
                     device):
     root_results_dir = tmp_path.joinpath('test_learncurve_root_results_dir')
     root_results_dir.mkdir()
@@ -25,6 +26,7 @@ def test_learncurve(specific_config,
         ]
 
     toml_path = specific_config(config_type='learncurve',
+                                model=model,
                                 audio_format='cbin',
                                 annot_format='notmat',
                                 options_to_change=options_to_change)
@@ -51,8 +53,9 @@ def test_learncurve(specific_config,
 )
 def test_learncurve_previous_run_path(specific_config,
                                       tmp_path,
+                                      model,
                                       device,
-                                      previous_run_path,
+                                      previous_run_path_factory,
                                       window_size):
     root_results_dir = tmp_path.joinpath('test_learncurve_root_results_dir')
     root_results_dir.mkdir()
@@ -66,13 +69,14 @@ def test_learncurve_previous_run_path(specific_config,
          'value': device},
         {'section': 'LEARNCURVE',
          'option': 'previous_run_path',
-         'value': str(previous_run_path)},
+         'value': str(previous_run_path_factory(model))},
         {'section': 'DATALOADER',
          'option': 'window_size',
          'value': window_size}
     ]
 
     toml_path = specific_config(config_type='learncurve',
+                                model=model,
                                 audio_format='cbin',
                                 annot_format='notmat',
                                 options_to_change=options_to_change)
diff --git a/tests/test_cli/test_predict.py b/tests/test_cli/test_predict.py
index e9d86c277..24df42c59 100644
--- a/tests/test_cli/test_predict.py
+++ b/tests/test_cli/test_predict.py
@@ -22,6 +22,7 @@ def test_predict(audio_format,
                  annot_format,
                  specific_config,
                  tmp_path,
+                 model,
                  device):
     output_dir = tmp_path.joinpath(f'test_predict_{audio_format}_{spect_format}_{annot_format}')
     output_dir.mkdir()
@@ -36,6 +37,7 @@ def test_predict(audio_format,
     ]
 
     toml_path = specific_config(config_type='predict',
+                                model=model,
                                 audio_format=audio_format,
                                 annot_format=annot_format,
                                 options_to_change=options_to_change)
diff --git a/tests/test_cli/test_prep.py b/tests/test_cli/test_prep.py
index 5b5eb8d5e..69a1c1362 100644
--- a/tests/test_cli/test_prep.py
+++ b/tests/test_cli/test_prep.py
@@ -28,8 +28,10 @@ def test_purpose_from_toml(config_type,
                            spect_format,
                            annot_format,
                            specific_config,
+                           default_model,
                            tmp_path):
     toml_path = specific_config(config_type=config_type,
+                                model=default_model,
                                 audio_format=audio_format,
                                 annot_format=annot_format,
                                 spect_format=spect_format)
@@ -55,6 +57,7 @@ def test_prep(config_type,
               spect_format,
               annot_format,
               specific_config,
+              default_model,
               tmp_path):
     output_dir = tmp_path.joinpath(f'test_prep_{config_type}_{audio_format}_{spect_format}_{annot_format}')
     output_dir.mkdir()
@@ -73,6 +76,7 @@ def test_prep(config_type,
         },
     ]
     toml_path = specific_config(config_type=config_type,
+                                model=default_model,
                                 audio_format=audio_format,
                                 annot_format=annot_format,
                                 spect_format=spect_format,
@@ -107,6 +111,7 @@ def test_prep_csv_path_raises(config_type,
                               spect_format,
                               annot_format,
                               specific_config,
+                              default_model,
                               tmp_path):
     output_dir = tmp_path.joinpath(f'test_prep_{config_type}_{audio_format}_{spect_format}_{annot_format}')
     output_dir.mkdir()
@@ -119,6 +124,7 @@ def test_prep_csv_path_raises(config_type,
         },
     ]
     toml_path = specific_config(config_type=config_type,
+                                model=default_model,
                                 audio_format=audio_format,
                                 annot_format=annot_format,
                                 spect_format=spect_format,
diff --git a/tests/test_cli/test_train.py b/tests/test_cli/test_train.py
index 3f0b43258..a2788cfc3 100644
--- a/tests/test_cli/test_train.py
+++ b/tests/test_cli/test_train.py
@@ -23,6 +23,7 @@ def test_train(audio_format,
                annot_format,
                specific_config,
                tmp_path,
+               model,
                device):
     root_results_dir = tmp_path.joinpath('test_train_root_results_dir')
     root_results_dir.mkdir()
@@ -37,6 +38,7 @@ def test_train(audio_format,
         ]
 
     toml_path = specific_config(config_type='train',
+                                model=model,
                                 audio_format=audio_format,
                                 annot_format=annot_format,
                                 spect_format=spect_format,
diff --git a/tests/test_config/test_config.py b/tests/test_config/test_config.py
index 771add883..f64b7382b 100644
--- a/tests/test_config/test_config.py
+++ b/tests/test_config/test_config.py
@@ -2,11 +2,15 @@
 
 
 def test_config_attrs_class(
-        all_generated_configs_toml_path_pairs
+        all_generated_configs_toml_path_pairs,
+        default_model,
 ):
     """test that instantiating Config class works as expected"""
     for config_toml, toml_path in all_generated_configs_toml_path_pairs:
-    # this is basically the body of the ``config.parse.from_toml`` function.
+        if default_model not in str(toml_path):
+            continue  # only need to check configs for one model
+            # also avoids FileNotFoundError on CI
+        # this is basically the body of the ``config.parse.from_toml`` function.
         config_dict = {}
         for section_name in list(vak.config.parse.SECTION_CLASSES.keys()):
             if section_name in config_toml:
diff --git a/tests/test_config/test_parse.py b/tests/test_config/test_parse.py
index a14945a5d..a52f9a6a0 100644
--- a/tests/test_config/test_parse.py
+++ b/tests/test_config/test_parse.py
@@ -24,11 +24,15 @@
 )
 def test_parse_config_section_returns_attrs_class(
         section_name,
-        all_generated_configs_toml_path_pairs
+        all_generated_configs_toml_path_pairs,
+        default_model,
 ):
     """test that ``vak.config.parse.parse_config_section``
     returns an instance of ``vak.config.learncurve.LearncurveConfig``"""
     for config_toml, toml_path in all_generated_configs_toml_path_pairs:
+        if default_model not in str(toml_path):
+            continue  # only need to check configs for one model
+            # also avoids FileNotFoundError on CI
         if section_name in config_toml:
             config_section_obj = vak.config.parse.parse_config_section(
                 config_toml=config_toml,
@@ -52,18 +56,25 @@ def test_parse_config_section_returns_attrs_class(
 )
 def test_parse_config_section_missing_options_raises(
         section_name,
-        all_generated_configs_toml_path_pairs
+        all_generated_configs_toml_path_pairs,
+        default_model,
 ):
     """test that configs without the required options in a section raise KeyError"""
     if vak.config.parse.REQUIRED_OPTIONS[section_name] is None:
         pytest.skip(f'no required options to test for section: {section_name}')
 
+    # in comprehensions below, filter by default model
+    # because we only need to check configs for one model
+    # also avoids FileNotFoundError on CI
     if section_name == 'PREP':
-        configs_toml_path_pairs = all_generated_configs_toml_path_pairs
+        configs_toml_path_pairs = ((config_toml, toml_path)
+                                   for config_toml, toml_path in all_generated_configs_toml_path_pairs
+                                   if default_model in str(toml_path))
     else:
         configs_toml_path_pairs = ((config_toml, toml_path)
                                    for config_toml, toml_path in all_generated_configs_toml_path_pairs
-                                   if section_name.lower() in toml_path.name)
+                                   if section_name.lower() in toml_path.name and default_model in str(toml_path))
+
     for config_toml, toml_path in configs_toml_path_pairs:
         if section_name in config_toml:
             for option in vak.config.parse.REQUIRED_OPTIONS[section_name]:
@@ -232,8 +243,11 @@ def test_load_from_toml_path_raises_when_config_doesnt_exist(config_that_doesnt_
         vak.config.parse._load_toml_from_path(config_that_doesnt_exist)
 
 
-def test_from_toml_path_returns_instance_of_config(all_generated_configs):
+def test_from_toml_path_returns_instance_of_config(all_generated_configs, default_model):
     for toml_path in all_generated_configs:
+        if default_model not in str(toml_path):
+            continue  # only need to check configs for one model
+            # also avoids FileNotFoundError on CI
         config_obj = vak.config.parse.from_toml_path(toml_path)
         assert isinstance(config_obj, vak.config.parse.Config)
 
@@ -243,8 +257,11 @@ def test_from_toml_path_raises_when_config_doesnt_exist(config_that_doesnt_exist
         vak.config.parse.from_toml_path(config_that_doesnt_exist)
 
 
-def test_from_toml(all_generated_configs_toml_path_pairs):
+def test_from_toml(all_generated_configs_toml_path_pairs, default_model):
     for config_toml, toml_path in all_generated_configs_toml_path_pairs:
+        if default_model not in str(toml_path):
+            continue  # only need to check configs for one model
+            # also avoids FileNotFoundError on CI
         config_obj = vak.config.parse.from_toml(config_toml, toml_path)
         assert isinstance(config_obj, vak.config.parse.Config)
 
diff --git a/tests/test_core/test_eval.py b/tests/test_core/test_eval.py
index a34e70a47..532f2e34d 100644
--- a/tests/test_core/test_eval.py
+++ b/tests/test_core/test_eval.py
@@ -28,6 +28,7 @@ def test_eval(audio_format,
               annot_format,
               specific_config,
               tmp_path,
+              model,
               device):
     output_dir = tmp_path.joinpath(f'test_eval_{audio_format}_{spect_format}_{annot_format}')
     output_dir.mkdir()
@@ -42,6 +43,7 @@ def test_eval(audio_format,
     ]
 
     toml_path = specific_config(config_type='eval',
+                                model=model,
                                 audio_format=audio_format,
                                 annot_format=annot_format,
                                 spect_format=spect_format,
diff --git a/tests/test_core/test_learncurve.py b/tests/test_core/test_learncurve.py
index 9162b2163..112c6fcf4 100644
--- a/tests/test_core/test_learncurve.py
+++ b/tests/test_core/test_learncurve.py
@@ -51,6 +51,7 @@ def learncurve_output_matches_expected(cfg,
 
 def test_learncurve(specific_config,
                     tmp_path,
+                    model,
                     device):
     options_to_change = {
         'section': 'LEARNCURVE',
@@ -59,6 +60,7 @@ def test_learncurve(specific_config,
     }
 
     toml_path = specific_config(config_type='learncurve',
+                                model=model,
                                 audio_format='cbin',
                                 annot_format='notmat',
                                 options_to_change=options_to_change)
@@ -98,6 +100,7 @@ def test_learncurve(specific_config,
 
 def test_learncurve_no_results_path(specific_config,
                                     tmp_path,
+                                    model,
                                     device):
     root_results_dir = tmp_path.joinpath('test_learncurve_no_results_path')
     root_results_dir.mkdir()
@@ -116,6 +119,7 @@ def test_learncurve_no_results_path(specific_config,
     ]
 
     toml_path = specific_config(config_type='learncurve',
+                                model=model,
                                 audio_format='cbin',
                                 annot_format='notmat',
                                 options_to_change=options_to_change)
diff --git a/tests/test_core/test_predict.py b/tests/test_core/test_predict.py
index 421731a05..14128a707 100644
--- a/tests/test_core/test_predict.py
+++ b/tests/test_core/test_predict.py
@@ -32,6 +32,7 @@ def test_predict(audio_format,
                  save_net_outputs,
                  specific_config,
                  tmp_path,
+                 model,
                  device):
     output_dir = tmp_path.joinpath(f'test_predict_{audio_format}_{spect_format}_{annot_format}')
     output_dir.mkdir()
@@ -48,6 +49,7 @@ def test_predict(audio_format,
          'value': save_net_outputs}
     ]
     toml_path = specific_config(config_type='predict',
+                                model=model,
                                 audio_format=audio_format,
                                 annot_format=annot_format,
                                 options_to_change=options_to_change)
diff --git a/tests/test_core/test_prep.py b/tests/test_core/test_prep.py
index 3f972c925..8a70c5b2d 100644
--- a/tests/test_core/test_prep.py
+++ b/tests/test_core/test_prep.py
@@ -45,6 +45,7 @@ def test_prep(config_type,
               spect_format,
               annot_format,
               specific_config,
+              default_model,
               tmp_path):
     output_dir = tmp_path.joinpath(f'test_prep_{config_type}_{audio_format}_{spect_format}_{annot_format}')
     output_dir.mkdir()
@@ -55,6 +56,7 @@ def test_prep(config_type,
         'value': str(output_dir)
     }
     toml_path = specific_config(config_type=config_type,
+                                model=default_model,
                                 audio_format=audio_format,
                                 annot_format=annot_format,
                                 spect_format=spect_format,
diff --git a/tests/test_core/test_train.py b/tests/test_core/test_train.py
index 679d8b0ef..f7944d7f4 100644
--- a/tests/test_core/test_train.py
+++ b/tests/test_core/test_train.py
@@ -44,6 +44,7 @@ def test_train(audio_format,
                annot_format,
                specific_config,
                tmp_path,
+               model,
                device):
     options_to_change = {
         'section': 'TRAIN',
@@ -51,6 +52,7 @@ def test_train(audio_format,
         'value': device
     }
     toml_path = specific_config(config_type='train',
+                                model=model,
                                 audio_format=audio_format,
                                 annot_format=annot_format,
                                 spect_format=spect_format,
diff --git a/tests/test_data/configs/configs.json b/tests/test_data/configs/configs.json
deleted file mode 100644
index 0bf924980..000000000
--- a/tests/test_data/configs/configs.json
+++ /dev/null
@@ -1,53 +0,0 @@
-{
-  "configs": [
-    {
-      "filename": "test_eval_audio_cbin_annot_notmat.toml",
-      "config_type": "eval",
-      "audio_format": "cbin",
-      "spect_format": null,
-      "annot_format": "notmat"
-    },
-    {
-      "filename": "test_learncurve_audio_cbin_annot_notmat.toml",
-      "config_type": "learncurve",
-      "audio_format": "cbin",
-      "spect_format": null,
-      "annot_format": "notmat"
-    },
-    {
-      "filename": "test_predict_audio_cbin_annot_notmat.toml",
-      "config_type": "predict",
-      "audio_format": "cbin",
-      "spect_format": null,
-      "annot_format": "notmat"
-    },
-    {
-      "filename": "test_predict_audio_wav_annot_koumura.toml",
-      "config_type": "predict",
-      "audio_format": "wav",
-      "spect_format": null,
-      "annot_format": "koumura"
-    },
-    {
-      "filename": "test_train_audio_cbin_annot_notmat.toml",
-      "config_type": "train",
-      "audio_format": "cbin",
-      "spect_format": null,
-      "annot_format": "notmat"
-    },
-    {
-      "filename": "test_train_audio_wav_annot_koumura.toml",
-      "config_type": "train",
-      "audio_format": "wav",
-      "spect_format": null,
-      "annot_format": "koumura"
-    },
-    {
-      "filename": "test_train_spect_mat_annot_yarden.toml",
-      "config_type": "train",
-      "audio_format": null,
-      "spect_format": "mat",
-      "annot_format": "yarden"
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/test_files/test_files.py b/tests/test_files/test_files.py
index ba7999f61..d32c370cc 100644
--- a/tests/test_files/test_files.py
+++ b/tests/test_files/test_files.py
@@ -29,8 +29,8 @@ def test_files_from_dir_with_cbin(audio_dir_cbin,
 
 @pytest.mark.parametrize(
     ('dir_path', 'ext'),
-    [('./tests/test_data/source/audio_wav_annot_textgrid/AGBk/', 'WAV'),
-     ('./tests/test_data/source/audio_wav_annot_koumura/Bird0/Wave', 'wav'),
+    [('./tests/data_for_tests/source/audio_wav_annot_textgrid/AGBk/', 'WAV'),
+     ('./tests/data_for_tests/source/audio_wav_annot_koumura/Bird0/Wave', 'wav'),
      ]
 )
 def test_from_dir_is_case_insensitive(dir_path, ext):
@@ -43,8 +43,8 @@ def test_from_dir_is_case_insensitive(dir_path, ext):
 
 @pytest.mark.parametrize(
     ('dir_path', 'ext'),
-    [('./tests/test_data/source/audio_wav_annot_textgrid/', 'WAV'),
-     ('./tests/test_data/source/audio_wav_annot_koumura/Bird0', 'wav'),
+    [('./tests/data_for_tests/source/audio_wav_annot_textgrid/', 'WAV'),
+     ('./tests/data_for_tests/source/audio_wav_annot_koumura/Bird0', 'wav'),
      ]
 )
 def test_from_dir_searches_child_dir(dir_path, ext):
diff --git a/tests/test_labeled_timebins.py b/tests/test_labeled_timebins.py
index 2a1b32870..9d872aea1 100644
--- a/tests/test_labeled_timebins.py
+++ b/tests/test_labeled_timebins.py
@@ -136,9 +136,11 @@ def test_lbl_tb2segments_recovers_onsets_offsets_labels():
 def test_lbl_tb2segments_recovers_onsets_offsets_labels_from_real_data(
         specific_dataframe,
         labelset_yarden,
+        model,
 ):
     """test that ``lbl_tb2segments`` recovers onsets and offsets from real data"""
     vak_df = specific_dataframe(config_type='train',
+                                model=model,
                                 spect_format='mat',
                                 annot_format='yarden')
     labelmap = vak.labels.to_map(