From 96d2f21e023f7b81c57ad4e7b5767946c89f6ebe Mon Sep 17 00:00:00 2001 From: Josh Joseph Date: Thu, 13 Feb 2020 19:56:55 -0500 Subject: [PATCH 1/7] replaced Pipfile with requirements.txt --- Pipfile | 31 ------------------------------- requirements.txt | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 31 deletions(-) delete mode 100644 Pipfile create mode 100644 requirements.txt diff --git a/Pipfile b/Pipfile deleted file mode 100644 index 214f584..0000000 --- a/Pipfile +++ /dev/null @@ -1,31 +0,0 @@ -[[source]] -name = "pypi" -url = "https://pypi.org/simple" -verify_ssl = true - -[dev-packages] - -[packages] -numpy = "*" -tensorflow-gpu = "*" -opencv-python = "*" -scikit-image = "*" -sklearn = "*" -progress = "*" -Keras = "*" -ipython = "*" -segmentation-models = "*" -pytz = "*" -tensorboard = "*" -pillow = "*" -pandas = "*" -google-cloud-storage = "*" -pyyaml = "*" -jupyter = "*" -crcmod = "*" -gitpython = "*" -matplotlib = "*" -ipykernel = "*" - -[requires] -python_version = "3.5" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c545f33 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,20 @@ +numpy +tensorflow-gpu +opencv-python +scikit-image +sklearn +progress +Keras +ipython +segmentation-models +pytz +tensorboard +pillow +pandas +google-cloud-storage +pyyaml +jupyter +crcmod +gitpython +matplotlib +ipykernel \ No newline at end of file From ccaca00cf9747bb1261c04fa1a6000483332a443 Mon Sep 17 00:00:00 2001 From: Josh Joseph Date: Thu, 13 Feb 2020 19:57:19 -0500 Subject: [PATCH 2/7] removed references to pipenv in docs --- docs/data_ingestion.md | 2 +- docs/dataset_preparation.md | 2 +- docs/inference.md | 2 +- docs/testing.md | 2 +- docs/training.md | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/data_ingestion.md b/docs/data_ingestion.md index a83fcfe..c9d12dc 100644 --- a/docs/data_ingestion.md +++ b/docs/data_ingestion.md @@ -19,7 +19,7 @@ Infrastructure that will be used: 1. When this completes, you should see your stack in `gs:///raw-data/`. 1. Use Terraform to start the appropriate GCP virtual machine (`terraform apply` or `terraform apply -lock=false`). 1. Once Terraform finishes, you can check the GCP virtual machine console to ensure a virtual machine has been created named `-` where `` is the name of your GCP project and `` is your GCP user name. -1. SSH into the GCP virtual machine, start tmux (`tmux`), `cd` into the code directory (`cd necstlab-damage-segmentation`), and process a single zip file by running the command: `pipenv run python3 ingest_raw_data.py --gcp-bucket gs:// --zipped-stack gs:///raw-data/`. Alternatively, to process an entire folder of zipped stacks, use `pipenv run python3 ingest_raw_data.py --gcp-bucket gs://` (excluding the `--zipped-stack` argument), which will process all of the files in `gs:///raw-data` (`ingest_raw_data.py` knows to process only `/raw-data`). +1. SSH into the GCP virtual machine, start tmux (`tmux`), `cd` into the code directory (`cd necstlab-damage-segmentation`), and process a single zip file by running the command: `python3 ingest_raw_data.py --gcp-bucket gs:// --zipped-stack gs:///raw-data/`. Alternatively, to process an entire folder of zipped stacks, use `python3 ingest_raw_data.py --gcp-bucket gs://` (excluding the `--zipped-stack` argument), which will process all of the files in `gs:///raw-data` (`ingest_raw_data.py` knows to process only `/raw-data`). 1. When this completes, you should see your stack in `gs:///processed-data/`. 1. Use Terraform to terminate the appropriate GCP virtual machine (`terraform destroy`). Once Terraform finishes, you can check the GCP virtual machine console to ensure a virtual machine has been destroyed. diff --git a/docs/dataset_preparation.md b/docs/dataset_preparation.md index b10c3f5..bc05e39 100644 --- a/docs/dataset_preparation.md +++ b/docs/dataset_preparation.md @@ -15,6 +15,6 @@ Infrastructure that will be used: 1. Either edit the configuration file `configs/data_preparation.yaml` or create your own configuration file and place it in the `configs` folder. 1. Use Terraform to start the appropriate GCP virtual machine (`terraform apply`). This will copy the current code base from your local machine to the GCP machine so make sure any changes to the configuration file are saved before this step is run. 1. Once Terraform finishes, you can check the GCP virtual machine console to ensure a virtual machine has been created named `-` where `` is the name of your GCP project and `` is your GCP user name. -1. To create a dataset, SSH into the virtual machine `-`, start tmux (`tmux`), `cd` into the code directory (`cd necstlab-damage-segmentation`), and run `pipenv run python3 prepare_dataset.py --gcp-bucket --config-file configs/.yaml`. +1. To create a dataset, SSH into the virtual machine `-`, start tmux (`tmux`), `cd` into the code directory (`cd necstlab-damage-segmentation`), and run `python3 prepare_dataset.py --gcp-bucket --config-file configs/.yaml`. 1. Once dataset preparation has finished, you should see the folder `/datasets/` has been created and populated, where `` was defined in `configs/data_preparation.yaml`. 1. Use Terraform to terminate the appropriate GCP virtual machine (`terraform destroy`). Once Terraform finishes, you can check the GCP virtual machine console to ensure a virtual machine has been destroyed. diff --git a/docs/inference.md b/docs/inference.md index 47de087..8aaaca1 100644 --- a/docs/inference.md +++ b/docs/inference.md @@ -14,7 +14,7 @@ Infrastructure that will be used: 1. If the unsegmented stacks are not in a GCP bucket, see the previous workflow `Copying the raw data into the cloud for storage and usage`. 1. Use Terraform to start the appropriate GCP virtual machine (`terraform apply`). 1. Once Terraform finishes, you can check the GCP virtual machine console to ensure a virtual machine has been created named `-` where `` is the name of your GCP project and `` is your GCP user name. -1. To infer (segment) the damage of the stacks, SSH into the virtual machine `-`, start tmux (`tmux`), `cd` into the code directory (`cd necstlab-damage-segmentation`), and run `pipenv run python3 infer_segmentation.py --gcp-bucket --stack-id --model-id `. +1. To infer (segment) the damage of the stacks, SSH into the virtual machine `-`, start tmux (`tmux`), `cd` into the code directory (`cd necstlab-damage-segmentation`), and run `python3 infer_segmentation.py --gcp-bucket --stack-id --model-id `. 1. Once inference has finished, you should see the folder `/inferences/` has been created and populated, where `` is `_`. 1. Use Terraform to terminate the appropriate GCP virtual machine (`terraform destroy`). Once Terraform finishes, you can check the GCP virtual machine console to ensure a virtual machine has been destroyed. diff --git a/docs/testing.md b/docs/testing.md index e32a62c..f73f7ba 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -13,6 +13,6 @@ Infrastructure that will be used: 1. If the stacks are not in a GCP bucket, see the previous workflow `Copying the raw data into the cloud for storage and usage`. 1. Use Terraform to start the appropriate GCP virtual machine (`terraform apply`). 1. Once Terraform finishes, you can check the GCP virtual machine console to ensure a virtual machine has been created named `-` where `` is the name of your GCP project and `` is your GCP user name. -1. To create a dataset, SSH into the virtual machine `-`, start tmux (`tmux`), `cd` into the code directory (`cd necstlab-damage-segmentation`), and run `pipenv run python3 test_segmentation_model.py --gcp-bucket --dataset-id --model-id `. +1. To create a dataset, SSH into the virtual machine `-`, start tmux (`tmux`), `cd` into the code directory (`cd necstlab-damage-segmentation`), and run `python3 test_segmentation_model.py --gcp-bucket --dataset-id --model-id `. 1. Once dataset preparation has finished, you should see the folder `/tests/` has been created and populated, where `` is `_`. 1. Use Terraform to terminate the appropriate GCP virtual machine (`terraform destroy`). Once Terraform finishes, you can check the GCP virtual machine console to ensure a virtual machine has been destroyed. diff --git a/docs/training.md b/docs/training.md index 2a34334..e9d727c 100644 --- a/docs/training.md +++ b/docs/training.md @@ -14,7 +14,7 @@ Infrastructure that will be used: 1. Either edit the configuration file `configs/train_config.yaml` or create your own configuration file and place it in the `configs` folder. 1. Use Terraform to start the appropriate GCP virtual machine (`terraform apply`). This will copy the current code base from your local machine to the GCP machine so make sure any changes to the configuration file are saved before this step is run. 1. Once Terraform finishes, you can check the GCP virtual machine console to ensure a virtual machine has been created named `-` where `` is the name of your GCP project and `` is your GCP user name. -1. To create a dataset, SSH into the virtual machine `-`, start tmux (`tmux`), `cd` into the code directory (`cd necstlab-damage-segmentation`), and run `pipenv run python3 train_segmentation_model.py --gcp-bucket --config-file configs/.yaml`. +1. To create a dataset, SSH into the virtual machine `-`, start tmux (`tmux`), `cd` into the code directory (`cd necstlab-damage-segmentation`), and run `python3 train_segmentation_model.py --gcp-bucket --config-file configs/.yaml`. 1. Once dataset preparation has finished, you should see the folder `/models/-` has been created and populated, where `` was defined in `configs/train_config.yaml`. 1. Use Terraform to terminate the appropriate GCP virtual machine (`terraform destroy`). Once Terraform finishes, you can check the GCP virtual machine console to ensure a virtual machine has been destroyed. From 401103d745007debf294fb37ec62a1483629abe7 Mon Sep 17 00:00:00 2001 From: Josh Joseph Date: Thu, 13 Feb 2020 19:57:35 -0500 Subject: [PATCH 3/7] fixed uses of pipenv in scripts --- scripts/resource-creation.sh | 8 +++----- scripts/run_all_large.sh | 12 ++++++------ scripts/run_all_small.sh | 12 ++++++------ 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/scripts/resource-creation.sh b/scripts/resource-creation.sh index 950cdfd..09b76d5 100644 --- a/scripts/resource-creation.sh +++ b/scripts/resource-creation.sh @@ -34,8 +34,6 @@ sudo apt-get install -y cmake \ tree \ p7zip-full -sudo pip3 uninstall crcmod -sudo pip3 install pipenv -sudo pip3 install --no-cache-dir -U crcmod - -cd necstlab-damage-segmentation && pipenv install \ No newline at end of file +pip3 uninstall crcmod +pip3 install --no-cache-dir -U crcmod +pip3 install -r requirements.txt diff --git a/scripts/run_all_large.sh b/scripts/run_all_large.sh index 1332d8b..bbe57e6 100644 --- a/scripts/run_all_large.sh +++ b/scripts/run_all_large.sh @@ -1,9 +1,9 @@ #!/bin/bash -pipenv run python3 ingest_raw_data.py --gcp-bucket gs://necstlab-sandbox -pipenv run python3 prepare_dataset.py --gcp-bucket gs://necstlab-sandbox --config-file configs/dataset-large.yaml -pipenv run python3 train_segmentation_model.py --gcp-bucket gs://necstlab-sandbox --config-file configs/train-large.yaml -pipenv run python3 test_segmentation_model.py --gcp-bucket gs://necstlab-sandbox --dataset-id dataset-large --model-id segmentation-model-large_20190924T180419Z -pipenv run python3 infer_segmentation.py --gcp-bucket gs://necstlab-sandbox --model-id segmentation-model-large_20190924T180419Z --stack-id THIN_REF_S2_P1_L3_2496_1563_2159 -pipenv run python3 infer_segmentation.py --gcp-bucket gs://necstlab-sandbox --model-id segmentation-model-large_20190924T180419Z --stack-id 8bit_AS4_S2_P1_L6_2560_1750_2160 \ No newline at end of file +python3 ingest_raw_data.py --gcp-bucket gs://necstlab-sandbox +python3 prepare_dataset.py --gcp-bucket gs://necstlab-sandbox --config-file configs/dataset-large.yaml +python3 train_segmentation_model.py --gcp-bucket gs://necstlab-sandbox --config-file configs/train-large.yaml +python3 test_segmentation_model.py --gcp-bucket gs://necstlab-sandbox --dataset-id dataset-large --model-id segmentation-model-large_20190924T180419Z +python3 infer_segmentation.py --gcp-bucket gs://necstlab-sandbox --model-id segmentation-model-large_20190924T180419Z --stack-id THIN_REF_S2_P1_L3_2496_1563_2159 +python3 infer_segmentation.py --gcp-bucket gs://necstlab-sandbox --model-id segmentation-model-large_20190924T180419Z --stack-id 8bit_AS4_S2_P1_L6_2560_1750_2160 \ No newline at end of file diff --git a/scripts/run_all_small.sh b/scripts/run_all_small.sh index c1d28b9..8e44afd 100644 --- a/scripts/run_all_small.sh +++ b/scripts/run_all_small.sh @@ -1,9 +1,9 @@ #!/bin/bash -pipenv run python3 ingest_raw_data.py --gcp-bucket gs://necstlab-sandbox -pipenv run python3 prepare_dataset.py --gcp-bucket gs://necstlab-sandbox --config-file configs/dataset-small.yaml -pipenv run python3 train_segmentation_model.py --gcp-bucket gs://necstlab-sandbox --config-file configs/train-small.yaml -pipenv run python3 test_segmentation_model.py --gcp-bucket gs://necstlab-sandbox --dataset-id dataset-small --model-id segmentation-model-small_20190924T191717Z -pipenv run python3 infer_segmentation.py --gcp-bucket gs://necstlab-sandbox --model-id segmentation-model-small_20190924T191717Z --stack-id THIN_REF_S2_P1_L3_2496_1563_2159 -pipenv run python3 infer_segmentation.py --gcp-bucket gs://necstlab-sandbox --model-id segmentation-model-small_20190924T191717Z --stack-id 8bit_AS4_S2_P1_L6_2560_1750_2160 \ No newline at end of file +python3 ingest_raw_data.py --gcp-bucket gs://necstlab-sandbox +python3 prepare_dataset.py --gcp-bucket gs://necstlab-sandbox --config-file configs/dataset-small.yaml +python3 train_segmentation_model.py --gcp-bucket gs://necstlab-sandbox --config-file configs/train-small.yaml +python3 test_segmentation_model.py --gcp-bucket gs://necstlab-sandbox --dataset-id dataset-small --model-id segmentation-model-small_20190924T191717Z +python3 infer_segmentation.py --gcp-bucket gs://necstlab-sandbox --model-id segmentation-model-small_20190924T191717Z --stack-id THIN_REF_S2_P1_L3_2496_1563_2159 +python3 infer_segmentation.py --gcp-bucket gs://necstlab-sandbox --model-id segmentation-model-small_20190924T191717Z --stack-id 8bit_AS4_S2_P1_L6_2560_1750_2160 \ No newline at end of file From 4fe871a376eff0ec8d854086751f3aa53c73df80 Mon Sep 17 00:00:00 2001 From: Josh Joseph Date: Thu, 13 Feb 2020 22:07:47 -0500 Subject: [PATCH 4/7] fix to Error: rpc error: code = ResourceExhausted desc = grpc: received message larger than max error by logging to file --- gcp.tf | 5 ++++- scripts/resource-creation.sh | 11 +++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/gcp.tf b/gcp.tf index 0733092..e7d0400 100644 --- a/gcp.tf +++ b/gcp.tf @@ -71,7 +71,10 @@ resource "google_compute_instance" "vm" { } provisioner "remote-exec" { - script = "./scripts/resource-creation.sh" + inline = [ + "echo 'Running resource creation script... (this may take 10+ minutes)'", + "bash ~/${var.repository_name}/scripts/resource-creation.sh > resource-creation.log" + ] connection { user = "${var.username}" type = "ssh" diff --git a/scripts/resource-creation.sh b/scripts/resource-creation.sh index 09b76d5..78627da 100644 --- a/scripts/resource-creation.sh +++ b/scripts/resource-creation.sh @@ -15,13 +15,13 @@ sudo apt-get -y install cuda export PATH=/usr/local/cuda-10.1/bin${PATH:+:${PATH}} # install cudnn +echo "Installing cudnn" wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/libcudnn7_7.6.5.32-1+cuda10.1_amd64.deb sudo dpkg -i libcudnn7_7.6.5.32-1+cuda10.1_amd64.deb wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/libcudnn7-dev_7.6.5.32-1+cuda10.1_amd64.deb sudo dpkg -i libcudnn7-dev_7.6.5.32-1+cuda10.1_amd64.deb - # install needed packages sudo apt-get install -y cmake \ git \ @@ -34,6 +34,9 @@ sudo apt-get install -y cmake \ tree \ p7zip-full -pip3 uninstall crcmod -pip3 install --no-cache-dir -U crcmod -pip3 install -r requirements.txt +pip3 install --upgrade pip +pip3 install --upgrade setuptools +pip3 uninstall crcmod -y +pip3 install --no-cache-dir crcmod +echo "Installing python requirements from the requirements file" +cd necstlab-damage-segmentation && pip3 install -r requirements.txt From 36d021a4fdabed1921bb38464c542e43a747149e Mon Sep 17 00:00:00 2001 From: josh Date: Fri, 14 Feb 2020 15:50:24 -0500 Subject: [PATCH 5/7] added some text about virtual envs --- docs/assumed_knowledge.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/assumed_knowledge.md b/docs/assumed_knowledge.md index ed25737..b6900d9 100644 --- a/docs/assumed_knowledge.md +++ b/docs/assumed_knowledge.md @@ -8,3 +8,4 @@ The workflows contained in this repository assume: * You are familiar with image annotations and how they are used in image segmentation. If you are unfamiliar with this, see [here]() for more information. TODO: add link/link content * You are familiar with how datasets are used in Machine Learning (for example, splitting your data into train, validation, and test). If you are unfamiliar with this, see [here]() for more information. TODO: add link/link content * You are familiar with how use tmux on a remote machine and how we will use it to keep processes running even if the SSH window is closed or disconnected. If you are unfamiliar with this, see [here]() for more information. TODO: add link/link content +* The codebase is meant to be run on a virtual machine so it installs the python package user-wide. If you wish to run the code locally, we suggest using `virtualenv`. See [here]() for more information. From d1866b8815f55c5f49ec5b99919eb89a4830a5e3 Mon Sep 17 00:00:00 2001 From: josh Date: Fri, 14 Feb 2020 15:50:40 -0500 Subject: [PATCH 6/7] fix to this occuring: https://github.com/googleapis/google-cloud-python/issues/5234 --- scripts/resource-creation.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/resource-creation.sh b/scripts/resource-creation.sh index 78627da..4b23f04 100644 --- a/scripts/resource-creation.sh +++ b/scripts/resource-creation.sh @@ -15,7 +15,6 @@ sudo apt-get -y install cuda export PATH=/usr/local/cuda-10.1/bin${PATH:+:${PATH}} # install cudnn -echo "Installing cudnn" wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/libcudnn7_7.6.5.32-1+cuda10.1_amd64.deb sudo dpkg -i libcudnn7_7.6.5.32-1+cuda10.1_amd64.deb @@ -38,5 +37,5 @@ pip3 install --upgrade pip pip3 install --upgrade setuptools pip3 uninstall crcmod -y pip3 install --no-cache-dir crcmod -echo "Installing python requirements from the requirements file" +pip3 install --upgrade pyasn1 cd necstlab-damage-segmentation && pip3 install -r requirements.txt From d8f588ca5d5887db3e5a300a1d8e786e7ef33a94 Mon Sep 17 00:00:00 2001 From: josh Date: Fri, 14 Feb 2020 16:54:28 -0500 Subject: [PATCH 7/7] added virtualenv instructions --- .gitignore | 3 ++- docs/assumed_knowledge.md | 2 +- docs/virtual_environment.md | 10 ++++++++++ 3 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 docs/virtual_environment.md diff --git a/.gitignore b/.gitignore index bc379e2..e0c4f55 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,5 @@ terraform.tfstate terraform.tfstate.backup .terraform.tfstate.lock.info .terraform -.DS_Store \ No newline at end of file +.DS_Store +.venv diff --git a/docs/assumed_knowledge.md b/docs/assumed_knowledge.md index b6900d9..dd160a8 100644 --- a/docs/assumed_knowledge.md +++ b/docs/assumed_knowledge.md @@ -8,4 +8,4 @@ The workflows contained in this repository assume: * You are familiar with image annotations and how they are used in image segmentation. If you are unfamiliar with this, see [here]() for more information. TODO: add link/link content * You are familiar with how datasets are used in Machine Learning (for example, splitting your data into train, validation, and test). If you are unfamiliar with this, see [here]() for more information. TODO: add link/link content * You are familiar with how use tmux on a remote machine and how we will use it to keep processes running even if the SSH window is closed or disconnected. If you are unfamiliar with this, see [here]() for more information. TODO: add link/link content -* The codebase is meant to be run on a virtual machine so it installs the python package user-wide. If you wish to run the code locally, we suggest using `virtualenv`. See [here]() for more information. +* The codebase is meant to be run on a virtual machine so it installs the python package user-wide. If you wish to run the code locally, we suggest using `virtualenv` (see [here](virtual_environment.md) for instructions). diff --git a/docs/virtual_environment.md b/docs/virtual_environment.md new file mode 100644 index 0000000..bad7072 --- /dev/null +++ b/docs/virtual_environment.md @@ -0,0 +1,10 @@ +To set up a virtual environment: +- Install it: `pip install virtualenv` +- Create the virtual environment: `virtualenv --always-copy --system-site-packages --python=python3 .venv` +- Install the needed packages: `.venv/bin/pip install -q -r requirements.txt` + +To use the virtual environment, enter it: `source .venv/bin/activate` + +To exit the virtual environment use: `deactivate` + +To delete the virtual environment just delete the `.venv` folder: `rm -r .venv`