From 787acf494ffd07602179304cb5c883336ede93e6 Mon Sep 17 00:00:00 2001 From: xutingfeng Date: Sun, 11 Sep 2022 21:27:14 +0800 Subject: [PATCH 1/8] add load local pdb_files to ProteinGraphDataset --- .../ml/datasets/torch_geometric_dataset.py | 30 ++++++++++++++----- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/graphein/ml/datasets/torch_geometric_dataset.py b/graphein/ml/datasets/torch_geometric_dataset.py index 3665918f3..5834b2b48 100644 --- a/graphein/ml/datasets/torch_geometric_dataset.py +++ b/graphein/ml/datasets/torch_geometric_dataset.py @@ -299,6 +299,7 @@ class ProteinGraphDataset(Dataset): def __init__( self, root, + pdb_paths:Optional[Union[List[str], str]] =None, pdb_codes: Optional[List[str]] = None, uniprot_ids: Optional[List[str]] = None, # graph_label_map: Optional[Dict[str, int]] = None, @@ -388,14 +389,22 @@ def __init__( if uniprot_ids is not None else None ) + self.pdb_paths = pdb_paths + if self.pdb_paths is None: + if self.pdb_codes and self.uniprot_ids: + self.structures = self.pdb_codes + self.uniprot_ids + elif self.pdb_codes: + self.structures = pdb_codes + elif self.uniprot_ids: + self.structures = uniprot_ids + # Use local saved pdb_files instead of download or move them to self.root/raw dir + else: + if isinstance(self.pdb_paths, list): + self.structures = [os.path.splitext(os.path.split(pdb_path)[-1])[0] for pdb_path in self.pdb_paths] + self.pdb_path, _ = os.path.split(self.pdb_paths[0]) + print(self.structures) + print(self.pdb_path) - if self.pdb_codes and self.uniprot_ids: - self.structures = self.pdb_codes + self.uniprot_ids - elif self.pdb_codes: - self.structures = pdb_codes - elif self.uniprot_ids: - self.structures = uniprot_ids - self.af_version = af_version # Labels & Chains @@ -449,6 +458,12 @@ def processed_file_names(self) -> List[str]: ] else: return [f"{pdb}.pt" for pdb in self.structures] + @property + def raw_dir(self) -> str: + if self.pdb_paths is not None: + return self.pdb_path # replace raw dir with user local pdb_path + else: + return os.path.join(self.root, 'raw') def validate_input(self): if self.graph_label_map is not None: @@ -554,6 +569,7 @@ def divide_chunks(l: List[str], n: int = 2) -> Generator: # Create graph objects file_names = [f"{self.raw_dir}/{pdb}.pdb" for pdb in pdbs] + graphs = construct_graphs_mp( pdb_path_it=file_names, config=self.config, From f7e92c3e70afbf4766f0c31009a2f00d3d2f932b Mon Sep 17 00:00:00 2001 From: Nicktf <49584439+1511878618@users.noreply.github.com> Date: Wed, 14 Sep 2022 07:10:12 +0000 Subject: [PATCH 2/8] load local pdb_files from a list --- .../ml/datasets/torch_geometric_dataset.py | 30 ++- notebooks/dataloader_tutorial.ipynb | 207 +++++++++++++++++- 2 files changed, 226 insertions(+), 11 deletions(-) diff --git a/graphein/ml/datasets/torch_geometric_dataset.py b/graphein/ml/datasets/torch_geometric_dataset.py index 5834b2b48..988410f14 100644 --- a/graphein/ml/datasets/torch_geometric_dataset.py +++ b/graphein/ml/datasets/torch_geometric_dataset.py @@ -41,6 +41,7 @@ def __init__( self, root: str, name: str, + pdb_paths:Optional[List[str]] =None, pdb_codes: Optional[List[str]] = None, uniprot_ids: Optional[List[str]] = None, graph_label_map: Optional[Dict[str, torch.Tensor]] = None, @@ -72,6 +73,8 @@ def __init__( :type root: str :param name: Name of the dataset. Will be saved to ``data_$name.pt``. :type name: str + :param pdb_paths:List of full path of pdb files to load. Defaults to None + :type pdb_paths:Optional[List[str]], optional :param pdb_codes: List of PDB codes to download and parse from the PDB. Defaults to None. :type pdb_codes: Optional[List[str]], optional @@ -135,6 +138,20 @@ def __init__( else None ) + self.pdb_paths = pdb_paths + if self.pdb_paths is None: + if self.pdb_codes and self.uniprot_ids: + self.structures = self.pdb_codes + self.uniprot_ids + elif self.pdb_codes: + self.structures = pdb_codes + elif self.uniprot_ids: + self.structures = uniprot_ids + # Use local saved pdb_files instead of download or move them to self.root/raw dir + else: + if isinstance(self.pdb_paths, list): + self.structures = [os.path.splitext(os.path.split(pdb_path)[-1])[0] for pdb_path in self.pdb_paths] + self.pdb_path, _ = os.path.split(self.pdb_paths[0]) + if self.pdb_codes and self.uniprot_ids: self.structures = self.pdb_codes + self.uniprot_ids elif self.pdb_codes: @@ -175,6 +192,12 @@ def raw_file_names(self) -> List[str]: def processed_file_names(self) -> List[str]: """Name of the processed file.""" return [f"data_{self.name}.pt"] + @property + def raw_dir(self) -> str: + if self.pdb_paths is not None: + return self.pdb_path # replace raw dir with user local pdb_path + else: + return os.path.join(self.root, 'raw') def download(self): """Download the PDB files from RCSB or Alphafold.""" @@ -299,7 +322,7 @@ class ProteinGraphDataset(Dataset): def __init__( self, root, - pdb_paths:Optional[Union[List[str], str]] =None, + pdb_paths:Optional[List[str]] =None, pdb_codes: Optional[List[str]] = None, uniprot_ids: Optional[List[str]] = None, # graph_label_map: Optional[Dict[str, int]] = None, @@ -328,6 +351,8 @@ def __init__( :param root: Root directory where the dataset should be saved. :type root: str + :param pdb_paths:List of full path of pdb files to load. Defaults to None + :type pdb_paths:Optional[List[str]], optional :param pdb_codes: List of PDB codes to download and parse from the PDB. Defaults to ``None``. :type pdb_codes: Optional[List[str]], optional @@ -402,9 +427,6 @@ def __init__( if isinstance(self.pdb_paths, list): self.structures = [os.path.splitext(os.path.split(pdb_path)[-1])[0] for pdb_path in self.pdb_paths] self.pdb_path, _ = os.path.split(self.pdb_paths[0]) - print(self.structures) - print(self.pdb_path) - # Labels & Chains diff --git a/notebooks/dataloader_tutorial.ipynb b/notebooks/dataloader_tutorial.ipynb index 57d68cd46..3b5dcfd8a 100644 --- a/notebooks/dataloader_tutorial.ipynb +++ b/notebooks/dataloader_tutorial.ipynb @@ -54,6 +54,8 @@ " # Root directory where the dataset should be saved.\n", " name: str, \n", " # Name of the dataset. Will be saved to ``data_$name.pt``.\n", + " pdb_paths:Optional[List[str]] =None,\n", + " # List of full path of pdb files to load.\n", " pdb_codes: Optional[List[str]] = None, \n", " # List of PDB codes to download and parse from the PDB.\n", " uniprot_ids: Optional[List[str]] = None, \n", @@ -90,7 +92,7 @@ "#### Directory Structure\n", "Creating a ``ProteinGraphDataset`` will create two directories under ``root``:\n", "\n", - "* ``root/raw`` - Contains raw PDB files\n", + "* ``root/raw`` - Contains raw PDB files which are downloaded\n", "* ``root/processed`` - Contains processed graphs (in ``pytorch_geometric.data.Data`` format) saved as ``$PDB.pt / $UNIPROT_ID.pt``" ] }, @@ -156,6 +158,75 @@ " break" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Load from local path\n", + "\n", + "\n", + "Creating a ``ProteinGraphDataset`` from a list of full path of pdb files:\n", + "\n", + "* ``root/raw`` - Will be empty since no pdb files are downloaded\n", + "* ``root/processed`` - Contains processed graphs (in ``pytorch_geometric.data.Data`` format) saved as ``$PDB.pt / $UNIPROT_ID.pt``" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['../tests/protein/test_data/1lds.pdb', '../tests/protein/test_data/4hhb.pdb', '../tests/protein/test_data/alphafold_structure.pdb']\n" + ] + } + ], + "source": [ + "# import sys\n", + "# sys.path.append('../') # add system path for python\n", + "\n", + "import os \n", + "from graphein.protein.config import ProteinGraphConfig\n", + "from graphein.ml import ProteinGraphDataset, ProteinGraphListDataset\n", + "import torch \n", + "\n", + "local_dir = \"../tests/protein/test_data/\"\n", + "pdb_paths = [os.path.join(local_dir, pdb_path) for pdb_path in os.listdir(local_dir) if pdb_path.endswith(\".pdb\")]\n", + "print(pdb_paths)\n", + "\n", + "# let's load local dataset from local_dir!\n", + "ds = ProteinGraphDataset(\n", + " root = \"../graphein/ml/datasets/test\",\n", + " pdb_paths = pdb_paths,\n", + " graphein_config=ProteinGraphConfig(),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DataBatch(edge_index=[2, 666], node_id=[2], coords=[2], name=[2], dist_mat=[2], num_nodes=671, batch=[671], ptr=[3])\n" + ] + } + ], + "source": [ + "# Create a dataloader from dataset and inspect a batch\n", + "from torch_geometric.loader import DataLoader\n", + "dl = DataLoader(ds, batch_size=2, shuffle=True, drop_last=True)\n", + "for i in dl:\n", + " print(i)\n", + " break" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -171,6 +242,8 @@ " # Root directory where the dataset should be saved.\n", " name: str, \n", " # Name of the dataset. Will be saved to ``data_$name.pt``.\n", + " pdb_paths:Optional[List[str]] =None,\n", + " # List of full path of pdb files to load.\n", " pdb_codes: Optional[List[str]] = None, \n", " # List of PDB codes to download and parse from the PDB.\n", " uniprot_ids: Optional[List[str]] = None, \n", @@ -292,6 +365,124 @@ " break" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Load from local path\n", + "\n", + "\n", + "Creating an ``InMemoryProteinGraphDataset`` from a list of full path of pdb files:\n", + "\n", + "* ``root/raw`` - Will be empty since no pdb files are downloaded\n", + "* ``root/processed`` - Contains processed datasets saved as ``data_{name}.pt``\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['../tests/protein/test_data/1lds.pdb', '../tests/protein/test_data/4hhb.pdb', '../tests/protein/test_data/alphafold_structure.pdb']\n", + "Constructing Graphs...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing...\n" + ] + }, + { + "data": { + "application/json": { + "ascii": false, + "bar_format": null, + "colour": null, + "elapsed": 0.2526402473449707, + "initial": 0, + "n": 0, + "ncols": null, + "nrows": null, + "postfix": null, + "prefix": "", + "rate": null, + "total": 3, + "unit": "it", + "unit_divisor": 1000, + "unit_scale": false + }, + "application/vnd.jupyter.widget-view+json": { + "model_id": "d5ed353098664f6f803fa502264df986", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/3 [00:00 Date: Wed, 14 Sep 2022 07:40:35 +0000 Subject: [PATCH 3/8] test and black and isort and add CHANGELOG.md --- CHANGELOG.md | 19 ++++++++++++++++++ .../ml/datasets/torch_geometric_dataset.py | 20 +++++++++++++------ 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 80ccadf3a..e8e55b429 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,22 @@ +### local_dataset + +* changes: support for loading local pdb files by ``ProteinGraphDataset`` and ``InMemoryProteinGraphDataset``. + + +* `python -m py.test tests/`, a part result of testing is as below: +![](https://tva1.sinaimg.cn/large/e6c9d24egy1h6664wn7zsj21qs0d444h.jpg) +* Also, some documentation are added into `dataloader_tutorial` + +#### ML + +* [Feature] add support for loading local pdb files to both ``ProteinGraphDataset`` and ``InMemoryProteinGraphDataset`` + +>by adding a params:`pdb_paths` and set the `self.raw_dir` to the root path(`self.pdb_path`) of pdb_paths list (the root path should be only one, pdb files should be under the same folder). +> +>it will works from loading pdb files from the `self.pdb_path` instead of loading from self.raw. +> If desire to download from af2 or pdb, just set `pdb_paths` to `None` and it goes back to the former version. + +If this change would be accepted, i'll try to make the downloading and loadoing from local could work together. ### 1.5.1 #### Protein diff --git a/graphein/ml/datasets/torch_geometric_dataset.py b/graphein/ml/datasets/torch_geometric_dataset.py index 988410f14..de886e24e 100644 --- a/graphein/ml/datasets/torch_geometric_dataset.py +++ b/graphein/ml/datasets/torch_geometric_dataset.py @@ -41,7 +41,7 @@ def __init__( self, root: str, name: str, - pdb_paths:Optional[List[str]] =None, + pdb_paths: Optional[List[str]] = None, pdb_codes: Optional[List[str]] = None, uniprot_ids: Optional[List[str]] = None, graph_label_map: Optional[Dict[str, torch.Tensor]] = None, @@ -149,7 +149,10 @@ def __init__( # Use local saved pdb_files instead of download or move them to self.root/raw dir else: if isinstance(self.pdb_paths, list): - self.structures = [os.path.splitext(os.path.split(pdb_path)[-1])[0] for pdb_path in self.pdb_paths] + self.structures = [ + os.path.splitext(os.path.split(pdb_path)[-1])[0] + for pdb_path in self.pdb_paths + ] self.pdb_path, _ = os.path.split(self.pdb_paths[0]) if self.pdb_codes and self.uniprot_ids: @@ -192,12 +195,13 @@ def raw_file_names(self) -> List[str]: def processed_file_names(self) -> List[str]: """Name of the processed file.""" return [f"data_{self.name}.pt"] + @property def raw_dir(self) -> str: if self.pdb_paths is not None: return self.pdb_path # replace raw dir with user local pdb_path else: - return os.path.join(self.root, 'raw') + return os.path.join(self.root, "raw") def download(self): """Download the PDB files from RCSB or Alphafold.""" @@ -322,7 +326,7 @@ class ProteinGraphDataset(Dataset): def __init__( self, root, - pdb_paths:Optional[List[str]] =None, + pdb_paths: Optional[List[str]] = None, pdb_codes: Optional[List[str]] = None, uniprot_ids: Optional[List[str]] = None, # graph_label_map: Optional[Dict[str, int]] = None, @@ -425,7 +429,10 @@ def __init__( # Use local saved pdb_files instead of download or move them to self.root/raw dir else: if isinstance(self.pdb_paths, list): - self.structures = [os.path.splitext(os.path.split(pdb_path)[-1])[0] for pdb_path in self.pdb_paths] + self.structures = [ + os.path.splitext(os.path.split(pdb_path)[-1])[0] + for pdb_path in self.pdb_paths + ] self.pdb_path, _ = os.path.split(self.pdb_paths[0]) # Labels & Chains @@ -480,12 +487,13 @@ def processed_file_names(self) -> List[str]: ] else: return [f"{pdb}.pt" for pdb in self.structures] + @property def raw_dir(self) -> str: if self.pdb_paths is not None: return self.pdb_path # replace raw dir with user local pdb_path else: - return os.path.join(self.root, 'raw') + return os.path.join(self.root, "raw") def validate_input(self): if self.graph_label_map is not None: From 60a31bbdad3656d53b866cf3bb4864288a1c2fd8 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Wed, 14 Sep 2022 14:31:05 +0200 Subject: [PATCH 4/8] docstring formatting --- graphein/ml/datasets/torch_geometric_dataset.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/graphein/ml/datasets/torch_geometric_dataset.py b/graphein/ml/datasets/torch_geometric_dataset.py index de886e24e..96133271c 100644 --- a/graphein/ml/datasets/torch_geometric_dataset.py +++ b/graphein/ml/datasets/torch_geometric_dataset.py @@ -73,8 +73,8 @@ def __init__( :type root: str :param name: Name of the dataset. Will be saved to ``data_$name.pt``. :type name: str - :param pdb_paths:List of full path of pdb files to load. Defaults to None - :type pdb_paths:Optional[List[str]], optional + :param pdb_paths: List of full path of pdb files to load. Defaults to ``None``. + :type pdb_paths: Optional[List[str]], optional :param pdb_codes: List of PDB codes to download and parse from the PDB. Defaults to None. :type pdb_codes: Optional[List[str]], optional @@ -325,7 +325,7 @@ def process(self): class ProteinGraphDataset(Dataset): def __init__( self, - root, + root: str, pdb_paths: Optional[List[str]] = None, pdb_codes: Optional[List[str]] = None, uniprot_ids: Optional[List[str]] = None, @@ -355,8 +355,8 @@ def __init__( :param root: Root directory where the dataset should be saved. :type root: str - :param pdb_paths:List of full path of pdb files to load. Defaults to None - :type pdb_paths:Optional[List[str]], optional + :param pdb_paths: List of full path of pdb files to load. Defaults to ``None``. + :type pdb_paths: Optional[List[str]], optional :param pdb_codes: List of PDB codes to download and parse from the PDB. Defaults to ``None``. :type pdb_codes: Optional[List[str]], optional From 7758a913fb1a637eec33ae3e2005f24f1f3d9a87 Mon Sep 17 00:00:00 2001 From: Nicktf <49584439+1511878618@users.noreply.github.com> Date: Wed, 14 Sep 2022 21:05:20 +0800 Subject: [PATCH 5/8] comment jupyter; just run and ssh or vscode to interact with container --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c86d49b18..579127df1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -44,7 +44,7 @@ RUN conda install -c dglteam dgl RUN conda install -c salilab dssp RUN conda install -c conda-forge ipywidgets -RUN jupyter nbextension enable --py widgetsnbextension +# RUN jupyter nbextension enable --py widgetsnbextension RUN export CUDA=$(python -c "import torch; print('cu'+torch.version.cuda.replace('.',''))") \ && export TORCH=$(python -c "import torch; print(torch.__version__)") \ From 9f16e40d6f4c9f5238324426611a984f9516926f Mon Sep 17 00:00:00 2001 From: Nicktf <49584439+1511878618@users.noreply.github.com> Date: Wed, 14 Sep 2022 21:22:37 +0800 Subject: [PATCH 6/8] install dependency of jupyter nbextension --- Dockerfile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 579127df1..e8a9760bd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -42,9 +42,12 @@ RUN conda install -c fvcore -c iopath -c conda-forge fvcore iopath RUN conda install -c pytorch3d pytorch3d RUN conda install -c dglteam dgl RUN conda install -c salilab dssp - RUN conda install -c conda-forge ipywidgets -# RUN jupyter nbextension enable --py widgetsnbextension + +# or conda install; may be it will work; or just comment `jupyter nbextension enable --py widgetsnbextension` +RUN pip install jupyter_contrib_nbextensions + +RUN jupyter nbextension enable --py widgetsnbextension RUN export CUDA=$(python -c "import torch; print('cu'+torch.version.cuda.replace('.',''))") \ && export TORCH=$(python -c "import torch; print(torch.__version__)") \ From 2d17b5b0934c9e0d3defeeeae759908e04b8fcd8 Mon Sep 17 00:00:00 2001 From: ryan Date: Thu, 15 Sep 2022 11:50:56 +0100 Subject: [PATCH 7/8] Fixed Dockerfile and unit tests --- Dockerfile | 7 ++----- graphein/grn/parse_regnetwork.py | 18 ++++++++++++++---- .../ml/datasets/torch_geometric_dataset.py | 2 ++ 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index e8a9760bd..ec0d69048 100644 --- a/Dockerfile +++ b/Dockerfile @@ -44,11 +44,6 @@ RUN conda install -c dglteam dgl RUN conda install -c salilab dssp RUN conda install -c conda-forge ipywidgets -# or conda install; may be it will work; or just comment `jupyter nbextension enable --py widgetsnbextension` -RUN pip install jupyter_contrib_nbextensions - -RUN jupyter nbextension enable --py widgetsnbextension - RUN export CUDA=$(python -c "import torch; print('cu'+torch.version.cuda.replace('.',''))") \ && export TORCH=$(python -c "import torch; print(torch.__version__)") \ && pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-${TORCH}+${CUDA}.html --no-cache-dir \ @@ -57,6 +52,8 @@ RUN export CUDA=$(python -c "import torch; print('cu'+torch.version.cuda.replace && pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-${TORCH}+${CUDA}.html --no-cache-dir \ && pip install torch-geometric --no-cache-dir +RUN pip install jupyter_contrib_nbextensions +RUN jupyter nbextension enable --py widgetsnbextension # Testing # docker-compose -f docker-compose.cpu.yml up -d --build diff --git a/graphein/grn/parse_regnetwork.py b/graphein/grn/parse_regnetwork.py index 8b0a22829..8677866b0 100644 --- a/graphein/grn/parse_regnetwork.py +++ b/graphein/grn/parse_regnetwork.py @@ -14,6 +14,7 @@ import pandas as pd import wget +import ssl from graphein.utils.utils import filter_dataframe, ping @@ -41,10 +42,10 @@ def _download_RegNetwork( "RegNetwork is not available. Please check your internet connection or verify at: http://www.regnetworkweb.org" ) - mouse_url = "http://regnetworkweb.org/download/mouse.zip" + mouse_url = "https://regnetworkweb.org/download/mouse.zip" if network_type == "human": - human_url = "http://www.regnetworkweb.org/download/human.zip" + human_url = "https://regnetworkweb.org/download/human.zip" url = human_url elif network_type == "mouse": url = mouse_url @@ -66,8 +67,12 @@ def _download_RegNetwork( # Download data and unzip if not os.path.exists(file): log.info("Downloading RegNetwork ...") + # switch ssl context for unverified download + default_https_context = ssl._create_default_https_context + ssl._create_default_https_context = ssl._create_unverified_context wget.download(url, compressed_file) - + # switch ssl context back to default + ssl._create_default_https_context = default_https_context with zipfile.ZipFile(compressed_file, "r") as zip_ref: zip_ref.extractall(out_dir) @@ -80,7 +85,7 @@ def _download_RegNetwork_regtypes(root_dir: Optional[Path] = None) -> str: :param root_dir: Path object specifying the location to download RegNetwork to """ - url = "http://www.regnetworkweb.org/download/RegulatoryDirections.zip" + url = "https://regnetworkweb.org/download/RegulatoryDirections.zip" if root_dir is None: root_dir = Path(__file__).parent.parent.parent / "datasets" @@ -94,7 +99,12 @@ def _download_RegNetwork_regtypes(root_dir: Optional[Path] = None) -> str: # Download data and unzip if not os.path.exists(file): log.info("Downloading RegNetwork reg types ...") + # switch ssl context for unverified download + default_https_context = ssl._create_default_https_context + ssl._create_default_https_context = ssl._create_unverified_context wget.download(url, compressed_file) + # switch ssl context back to default + ssl._create_default_https_context = default_https_context with zipfile.ZipFile(compressed_file, "r") as zip_ref: zip_ref.extractall(out_dir) diff --git a/graphein/ml/datasets/torch_geometric_dataset.py b/graphein/ml/datasets/torch_geometric_dataset.py index 96133271c..5de6f9eb6 100644 --- a/graphein/ml/datasets/torch_geometric_dataset.py +++ b/graphein/ml/datasets/torch_geometric_dataset.py @@ -177,6 +177,7 @@ def __init__( self.graph_transformation_funcs = graph_transformation_funcs self.pdb_transform = pdb_transform self.num_cores = num_cores + self.af_version = af_version super().__init__( root, transform=transform, @@ -462,6 +463,7 @@ def __init__( self.num_cores = num_cores self.pdb_transform = pdb_transform self.graph_transformation_funcs = graph_transformation_funcs + self.af_version = af_version super().__init__( root, transform=transform, From f209284f8d2148971b7902235d18e55562076122 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Fri, 16 Sep 2022 13:38:30 +0200 Subject: [PATCH 8/8] Update chage log --- CHANGELOG.md | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e8e55b429..57f7d4b55 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,22 +1,20 @@ -### local_dataset +### 1.5.2 -* changes: support for loading local pdb files by ``ProteinGraphDataset`` and ``InMemoryProteinGraphDataset``. +### GRN +* [Bugfix] - [#208](https://github.com/a-r-j/graphein/pull/208) - Resolves SSL issues with RegNetwork. -* `python -m py.test tests/`, a part result of testing is as below: -![](https://tva1.sinaimg.cn/large/e6c9d24egy1h6664wn7zsj21qs0d444h.jpg) -* Also, some documentation are added into `dataloader_tutorial` - #### ML - -* [Feature] add support for loading local pdb files to both ``ProteinGraphDataset`` and ``InMemoryProteinGraphDataset`` - +* [Feature] - [#208](https://github.com/a-r-j/graphein/pull/208) support for loading local pdb files by ``ProteinGraphDataset`` and ``InMemoryProteinGraphDataset``. >by adding a params:`pdb_paths` and set the `self.raw_dir` to the root path(`self.pdb_path`) of pdb_paths list (the root path should be only one, pdb files should be under the same folder). > >it will works from loading pdb files from the `self.pdb_path` instead of loading from self.raw. > If desire to download from af2 or pdb, just set `pdb_paths` to `None` and it goes back to the former version. -If this change would be accepted, i'll try to make the downloading and loadoing from local could work together. +#### CI +* [Bugfix] - [#208](https://github.com/a-r-j/graphein/pull/208) explicitly installs `jupyter_contrib_nbextensions` in Docker. + + ### 1.5.1 #### Protein