From 787acf494ffd07602179304cb5c883336ede93e6 Mon Sep 17 00:00:00 2001
From: xutingfeng <xutingfeng@big.ac.cn>
Date: Sun, 11 Sep 2022 21:27:14 +0800
Subject: [PATCH 1/8] add load local pdb_files to ProteinGraphDataset

---
 .../ml/datasets/torch_geometric_dataset.py    | 30 ++++++++++++++-----
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/graphein/ml/datasets/torch_geometric_dataset.py b/graphein/ml/datasets/torch_geometric_dataset.py
index 3665918f3..5834b2b48 100644
--- a/graphein/ml/datasets/torch_geometric_dataset.py
+++ b/graphein/ml/datasets/torch_geometric_dataset.py
@@ -299,6 +299,7 @@ class ProteinGraphDataset(Dataset):
     def __init__(
         self,
         root,
+        pdb_paths:Optional[Union[List[str], str]] =None,
         pdb_codes: Optional[List[str]] = None,
         uniprot_ids: Optional[List[str]] = None,
         # graph_label_map: Optional[Dict[str, int]] = None,
@@ -388,14 +389,22 @@ def __init__(
             if uniprot_ids is not None
             else None
         )
+        self.pdb_paths = pdb_paths
+        if self.pdb_paths is None:
+            if self.pdb_codes and self.uniprot_ids:
+                self.structures = self.pdb_codes + self.uniprot_ids
+            elif self.pdb_codes:
+                self.structures = pdb_codes
+            elif self.uniprot_ids:
+                self.structures = uniprot_ids
+        # Use local saved pdb_files instead of download or move them to self.root/raw dir
+        else:
+            if isinstance(self.pdb_paths, list):
+                self.structures = [os.path.splitext(os.path.split(pdb_path)[-1])[0] for pdb_path in self.pdb_paths]
+                self.pdb_path, _ = os.path.split(self.pdb_paths[0])
+                print(self.structures)
+                print(self.pdb_path)
 
-        if self.pdb_codes and self.uniprot_ids:
-            self.structures = self.pdb_codes + self.uniprot_ids
-        elif self.pdb_codes:
-            self.structures = pdb_codes
-        elif self.uniprot_ids:
-            self.structures = uniprot_ids
-        self.af_version = af_version
 
         # Labels & Chains
 
@@ -449,6 +458,12 @@ def processed_file_names(self) -> List[str]:
             ]
         else:
             return [f"{pdb}.pt" for pdb in self.structures]
+    @property
+    def raw_dir(self) -> str:
+        if self.pdb_paths is not None:
+            return self.pdb_path  # replace raw dir with user local pdb_path
+        else:
+            return os.path.join(self.root, 'raw')
 
     def validate_input(self):
         if self.graph_label_map is not None:
@@ -554,6 +569,7 @@ def divide_chunks(l: List[str], n: int = 2) -> Generator:
 
             # Create graph objects
             file_names = [f"{self.raw_dir}/{pdb}.pdb" for pdb in pdbs]
+
             graphs = construct_graphs_mp(
                 pdb_path_it=file_names,
                 config=self.config,

From f7e92c3e70afbf4766f0c31009a2f00d3d2f932b Mon Sep 17 00:00:00 2001
From: Nicktf <49584439+1511878618@users.noreply.github.com>
Date: Wed, 14 Sep 2022 07:10:12 +0000
Subject: [PATCH 2/8] load local pdb_files from a list

---
 .../ml/datasets/torch_geometric_dataset.py    |  30 ++-
 notebooks/dataloader_tutorial.ipynb           | 207 +++++++++++++++++-
 2 files changed, 226 insertions(+), 11 deletions(-)

diff --git a/graphein/ml/datasets/torch_geometric_dataset.py b/graphein/ml/datasets/torch_geometric_dataset.py
index 5834b2b48..988410f14 100644
--- a/graphein/ml/datasets/torch_geometric_dataset.py
+++ b/graphein/ml/datasets/torch_geometric_dataset.py
@@ -41,6 +41,7 @@ def __init__(
         self,
         root: str,
         name: str,
+        pdb_paths:Optional[List[str]] =None,
         pdb_codes: Optional[List[str]] = None,
         uniprot_ids: Optional[List[str]] = None,
         graph_label_map: Optional[Dict[str, torch.Tensor]] = None,
@@ -72,6 +73,8 @@ def __init__(
         :type root: str
         :param name: Name of the dataset. Will be saved to ``data_$name.pt``.
         :type name: str
+        :param pdb_paths:List of full path of pdb files to load. Defaults to None
+        :type pdb_paths:Optional[List[str]], optional
         :param pdb_codes: List of PDB codes to download and parse from the PDB.
             Defaults to None.
         :type pdb_codes: Optional[List[str]], optional
@@ -135,6 +138,20 @@ def __init__(
             else None
         )
 
+        self.pdb_paths = pdb_paths
+        if self.pdb_paths is None:
+            if self.pdb_codes and self.uniprot_ids:
+                self.structures = self.pdb_codes + self.uniprot_ids
+            elif self.pdb_codes:
+                self.structures = pdb_codes
+            elif self.uniprot_ids:
+                self.structures = uniprot_ids
+        # Use local saved pdb_files instead of download or move them to self.root/raw dir
+        else:
+            if isinstance(self.pdb_paths, list):
+                self.structures = [os.path.splitext(os.path.split(pdb_path)[-1])[0] for pdb_path in self.pdb_paths]
+                self.pdb_path, _ = os.path.split(self.pdb_paths[0])
+
         if self.pdb_codes and self.uniprot_ids:
             self.structures = self.pdb_codes + self.uniprot_ids
         elif self.pdb_codes:
@@ -175,6 +192,12 @@ def raw_file_names(self) -> List[str]:
     def processed_file_names(self) -> List[str]:
         """Name of the processed file."""
         return [f"data_{self.name}.pt"]
+    @property
+    def raw_dir(self) -> str:
+        if self.pdb_paths is not None:
+            return self.pdb_path  # replace raw dir with user local pdb_path
+        else:
+            return os.path.join(self.root, 'raw')
 
     def download(self):
         """Download the PDB files from RCSB or Alphafold."""
@@ -299,7 +322,7 @@ class ProteinGraphDataset(Dataset):
     def __init__(
         self,
         root,
-        pdb_paths:Optional[Union[List[str], str]] =None,
+        pdb_paths:Optional[List[str]] =None,
         pdb_codes: Optional[List[str]] = None,
         uniprot_ids: Optional[List[str]] = None,
         # graph_label_map: Optional[Dict[str, int]] = None,
@@ -328,6 +351,8 @@ def __init__(
 
         :param root: Root directory where the dataset should be saved.
         :type root: str
+        :param pdb_paths:List of full path of pdb files to load. Defaults to None
+        :type pdb_paths:Optional[List[str]], optional
         :param pdb_codes: List of PDB codes to download and parse from the PDB.
             Defaults to ``None``.
         :type pdb_codes: Optional[List[str]], optional
@@ -402,9 +427,6 @@ def __init__(
             if isinstance(self.pdb_paths, list):
                 self.structures = [os.path.splitext(os.path.split(pdb_path)[-1])[0] for pdb_path in self.pdb_paths]
                 self.pdb_path, _ = os.path.split(self.pdb_paths[0])
-                print(self.structures)
-                print(self.pdb_path)
-
 
         # Labels & Chains
 
diff --git a/notebooks/dataloader_tutorial.ipynb b/notebooks/dataloader_tutorial.ipynb
index 57d68cd46..3b5dcfd8a 100644
--- a/notebooks/dataloader_tutorial.ipynb
+++ b/notebooks/dataloader_tutorial.ipynb
@@ -54,6 +54,8 @@
     "        # Root directory where the dataset should be saved.\n",
     "        name: str,                                                             \n",
     "        # Name of the dataset. Will be saved to ``data_$name.pt``.\n",
+    "        pdb_paths:Optional[List[str]] =None,\n",
+    "        # List of full path of pdb files to load.\n",
     "        pdb_codes: Optional[List[str]] = None,                                 \n",
     "        #  List of PDB codes to download and parse from the PDB.\n",
     "        uniprot_ids: Optional[List[str]] = None,                               \n",
@@ -90,7 +92,7 @@
     "#### Directory Structure\n",
     "Creating a ``ProteinGraphDataset`` will create two directories under ``root``:\n",
     "\n",
-    "* ``root/raw`` - Contains raw PDB files\n",
+    "* ``root/raw`` - Contains raw PDB files which are downloaded\n",
     "* ``root/processed`` - Contains processed graphs (in ``pytorch_geometric.data.Data`` format) saved as ``$PDB.pt / $UNIPROT_ID.pt``"
    ]
   },
@@ -156,6 +158,75 @@
     "    break"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Load from local path\n",
+    "\n",
+    "\n",
+    "Creating a ``ProteinGraphDataset`` from a list of full path of pdb files:\n",
+    "\n",
+    "* ``root/raw`` - Will be empty since no pdb files are downloaded\n",
+    "* ``root/processed`` - Contains processed graphs (in ``pytorch_geometric.data.Data`` format) saved as ``$PDB.pt / $UNIPROT_ID.pt``"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['../tests/protein/test_data/1lds.pdb', '../tests/protein/test_data/4hhb.pdb', '../tests/protein/test_data/alphafold_structure.pdb']\n"
+     ]
+    }
+   ],
+   "source": [
+    "# import sys\n",
+    "# sys.path.append('../')  # add system path for python\n",
+    "\n",
+    "import os \n",
+    "from graphein.protein.config import ProteinGraphConfig\n",
+    "from graphein.ml import ProteinGraphDataset, ProteinGraphListDataset\n",
+    "import torch \n",
+    "\n",
+    "local_dir = \"../tests/protein/test_data/\"\n",
+    "pdb_paths = [os.path.join(local_dir, pdb_path) for pdb_path in os.listdir(local_dir) if pdb_path.endswith(\".pdb\")]\n",
+    "print(pdb_paths)\n",
+    "\n",
+    "# let's load local dataset from local_dir!\n",
+    "ds = ProteinGraphDataset(\n",
+    "    root = \"../graphein/ml/datasets/test\",\n",
+    "    pdb_paths = pdb_paths,\n",
+    "    graphein_config=ProteinGraphConfig(),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "DataBatch(edge_index=[2, 666], node_id=[2], coords=[2], name=[2], dist_mat=[2], num_nodes=671, batch=[671], ptr=[3])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create a dataloader from dataset and inspect a batch\n",
+    "from torch_geometric.loader import DataLoader\n",
+    "dl = DataLoader(ds, batch_size=2, shuffle=True, drop_last=True)\n",
+    "for i in dl:\n",
+    "    print(i)\n",
+    "    break"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -171,6 +242,8 @@
     "        # Root directory where the dataset should be saved.\n",
     "        name: str,                                                             \n",
     "        # Name of the dataset. Will be saved to ``data_$name.pt``.\n",
+    "        pdb_paths:Optional[List[str]] =None,\n",
+    "        # List of full path of pdb files to load.\n",
     "        pdb_codes: Optional[List[str]] = None,                                 \n",
     "        #  List of PDB codes to download and parse from the PDB.\n",
     "        uniprot_ids: Optional[List[str]] = None,                               \n",
@@ -292,6 +365,124 @@
     "    break"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Load from local path\n",
+    "\n",
+    "\n",
+    "Creating an ``InMemoryProteinGraphDataset`` from a list of full path of pdb files:\n",
+    "\n",
+    "* ``root/raw`` - Will be empty since no pdb files are downloaded\n",
+    "* ``root/processed`` - Contains processed datasets saved as ``data_{name}.pt``\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['../tests/protein/test_data/1lds.pdb', '../tests/protein/test_data/4hhb.pdb', '../tests/protein/test_data/alphafold_structure.pdb']\n",
+      "Constructing Graphs...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Processing...\n"
+     ]
+    },
+    {
+     "data": {
+      "application/json": {
+       "ascii": false,
+       "bar_format": null,
+       "colour": null,
+       "elapsed": 0.2526402473449707,
+       "initial": 0,
+       "n": 0,
+       "ncols": null,
+       "nrows": null,
+       "postfix": null,
+       "prefix": "",
+       "rate": null,
+       "total": 3,
+       "unit": "it",
+       "unit_divisor": 1000,
+       "unit_scale": false
+      },
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d5ed353098664f6f803fa502264df986",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/3 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Converting Graphs...\n",
+      "Saving Data...\n",
+      "Done!\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Done!\n"
+     ]
+    }
+   ],
+   "source": [
+    "from graphein.ml.datasets.torch_geometric_dataset import InMemoryProteinGraphDataset\n",
+    "\n",
+    "\n",
+    "local_dir = \"../tests/protein/test_data/\"\n",
+    "pdb_paths = [os.path.join(local_dir, pdb_path) for pdb_path in os.listdir(local_dir) if pdb_path.endswith(\".pdb\")]\n",
+    "print(pdb_paths)\n",
+    "\n",
+    "# let's load local dataset from local_dir!\n",
+    "ds = InMemoryProteinGraphDataset(\n",
+    "    root = \"../graphein/ml/datasets/test\",\n",
+    "    name = \"test\",\n",
+    "    pdb_paths = pdb_paths,\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "DataBatch(edge_index=[2, 951], node_id=[2], coords=[2], name=[2], dist_mat=[2], num_nodes=956, batch=[956], ptr=[3])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create a dataloader from dataset and inspect a batch\n",
+    "dl = DataLoader(ds, batch_size=2, shuffle=True, drop_last=True)\n",
+    "for i in dl:\n",
+    "    print(i)\n",
+    "    break"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -649,11 +840,8 @@
   }
  ],
  "metadata": {
-  "interpreter": {
-   "hash": "2084dd4fc0c9f9186ef9bb5d9f5c6652432726a285d6ac2dcf2b1a616ab39cbb"
-  },
   "kernelspec": {
-   "display_name": "Python 3.8.12 ('graphein-wip')",
+   "display_name": "Python 3.7.13 ('base')",
    "language": "python",
    "name": "python3"
   },
@@ -667,9 +855,14 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.12"
+   "version": "3.7.11"
   },
-  "orig_nbformat": 4
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "d4d1e4263499bec80672ea0156c357c1ee493ec2b1c70f0acce89fc37c4a6abe"
+   }
+  }
  },
  "nbformat": 4,
  "nbformat_minor": 2

From dfefcd99252e6d7b712fbd0058de6e5ed6dc13f2 Mon Sep 17 00:00:00 2001
From: Nicktf <49584439+1511878618@users.noreply.github.com>
Date: Wed, 14 Sep 2022 07:40:35 +0000
Subject: [PATCH 3/8] test and black and isort and add CHANGELOG.md

---
 CHANGELOG.md                                  | 19 ++++++++++++++++++
 .../ml/datasets/torch_geometric_dataset.py    | 20 +++++++++++++------
 2 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 80ccadf3a..e8e55b429 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,22 @@
+### local_dataset
+
+* changes: support for loading local pdb files by ``ProteinGraphDataset`` and ``InMemoryProteinGraphDataset``.
+
+
+* `python -m py.test tests/`, a part result of testing is as below:
+![](https://tva1.sinaimg.cn/large/e6c9d24egy1h6664wn7zsj21qs0d444h.jpg)
+* Also, some documentation are added into `dataloader_tutorial`
+
+#### ML
+
+* [Feature] add support for loading local pdb files to both ``ProteinGraphDataset`` and ``InMemoryProteinGraphDataset``
+
+>by adding a params:`pdb_paths` and set the `self.raw_dir` to the root path(`self.pdb_path`) of pdb_paths list (the root path should be only one, pdb files should be under the same folder).
+>
+>it will works from loading pdb files from the `self.pdb_path` instead of loading from self.raw. 
+> If desire to download from af2 or pdb, just set `pdb_paths` to `None` and it goes back to the former version.
+
+If this change would be accepted, i'll try to make the downloading and loadoing from local could work together.
 ### 1.5.1
 
 #### Protein
diff --git a/graphein/ml/datasets/torch_geometric_dataset.py b/graphein/ml/datasets/torch_geometric_dataset.py
index 988410f14..de886e24e 100644
--- a/graphein/ml/datasets/torch_geometric_dataset.py
+++ b/graphein/ml/datasets/torch_geometric_dataset.py
@@ -41,7 +41,7 @@ def __init__(
         self,
         root: str,
         name: str,
-        pdb_paths:Optional[List[str]] =None,
+        pdb_paths: Optional[List[str]] = None,
         pdb_codes: Optional[List[str]] = None,
         uniprot_ids: Optional[List[str]] = None,
         graph_label_map: Optional[Dict[str, torch.Tensor]] = None,
@@ -149,7 +149,10 @@ def __init__(
         # Use local saved pdb_files instead of download or move them to self.root/raw dir
         else:
             if isinstance(self.pdb_paths, list):
-                self.structures = [os.path.splitext(os.path.split(pdb_path)[-1])[0] for pdb_path in self.pdb_paths]
+                self.structures = [
+                    os.path.splitext(os.path.split(pdb_path)[-1])[0]
+                    for pdb_path in self.pdb_paths
+                ]
                 self.pdb_path, _ = os.path.split(self.pdb_paths[0])
 
         if self.pdb_codes and self.uniprot_ids:
@@ -192,12 +195,13 @@ def raw_file_names(self) -> List[str]:
     def processed_file_names(self) -> List[str]:
         """Name of the processed file."""
         return [f"data_{self.name}.pt"]
+
     @property
     def raw_dir(self) -> str:
         if self.pdb_paths is not None:
             return self.pdb_path  # replace raw dir with user local pdb_path
         else:
-            return os.path.join(self.root, 'raw')
+            return os.path.join(self.root, "raw")
 
     def download(self):
         """Download the PDB files from RCSB or Alphafold."""
@@ -322,7 +326,7 @@ class ProteinGraphDataset(Dataset):
     def __init__(
         self,
         root,
-        pdb_paths:Optional[List[str]] =None,
+        pdb_paths: Optional[List[str]] = None,
         pdb_codes: Optional[List[str]] = None,
         uniprot_ids: Optional[List[str]] = None,
         # graph_label_map: Optional[Dict[str, int]] = None,
@@ -425,7 +429,10 @@ def __init__(
         # Use local saved pdb_files instead of download or move them to self.root/raw dir
         else:
             if isinstance(self.pdb_paths, list):
-                self.structures = [os.path.splitext(os.path.split(pdb_path)[-1])[0] for pdb_path in self.pdb_paths]
+                self.structures = [
+                    os.path.splitext(os.path.split(pdb_path)[-1])[0]
+                    for pdb_path in self.pdb_paths
+                ]
                 self.pdb_path, _ = os.path.split(self.pdb_paths[0])
 
         # Labels & Chains
@@ -480,12 +487,13 @@ def processed_file_names(self) -> List[str]:
             ]
         else:
             return [f"{pdb}.pt" for pdb in self.structures]
+
     @property
     def raw_dir(self) -> str:
         if self.pdb_paths is not None:
             return self.pdb_path  # replace raw dir with user local pdb_path
         else:
-            return os.path.join(self.root, 'raw')
+            return os.path.join(self.root, "raw")
 
     def validate_input(self):
         if self.graph_label_map is not None:

From 60a31bbdad3656d53b866cf3bb4864288a1c2fd8 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arjamasb@gmail.com>
Date: Wed, 14 Sep 2022 14:31:05 +0200
Subject: [PATCH 4/8] docstring formatting

---
 graphein/ml/datasets/torch_geometric_dataset.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/graphein/ml/datasets/torch_geometric_dataset.py b/graphein/ml/datasets/torch_geometric_dataset.py
index de886e24e..96133271c 100644
--- a/graphein/ml/datasets/torch_geometric_dataset.py
+++ b/graphein/ml/datasets/torch_geometric_dataset.py
@@ -73,8 +73,8 @@ def __init__(
         :type root: str
         :param name: Name of the dataset. Will be saved to ``data_$name.pt``.
         :type name: str
-        :param pdb_paths:List of full path of pdb files to load. Defaults to None
-        :type pdb_paths:Optional[List[str]], optional
+        :param pdb_paths: List of full path of pdb files to load. Defaults to ``None``.
+        :type pdb_paths: Optional[List[str]], optional
         :param pdb_codes: List of PDB codes to download and parse from the PDB.
             Defaults to None.
         :type pdb_codes: Optional[List[str]], optional
@@ -325,7 +325,7 @@ def process(self):
 class ProteinGraphDataset(Dataset):
     def __init__(
         self,
-        root,
+        root: str,
         pdb_paths: Optional[List[str]] = None,
         pdb_codes: Optional[List[str]] = None,
         uniprot_ids: Optional[List[str]] = None,
@@ -355,8 +355,8 @@ def __init__(
 
         :param root: Root directory where the dataset should be saved.
         :type root: str
-        :param pdb_paths:List of full path of pdb files to load. Defaults to None
-        :type pdb_paths:Optional[List[str]], optional
+        :param pdb_paths: List of full path of pdb files to load. Defaults to ``None``.
+        :type pdb_paths: Optional[List[str]], optional
         :param pdb_codes: List of PDB codes to download and parse from the PDB.
             Defaults to ``None``.
         :type pdb_codes: Optional[List[str]], optional

From 7758a913fb1a637eec33ae3e2005f24f1f3d9a87 Mon Sep 17 00:00:00 2001
From: Nicktf <49584439+1511878618@users.noreply.github.com>
Date: Wed, 14 Sep 2022 21:05:20 +0800
Subject: [PATCH 5/8] comment jupyter; just run  and ssh or vscode to interact
 with container

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index c86d49b18..579127df1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -44,7 +44,7 @@ RUN conda install -c dglteam dgl
 RUN conda install -c salilab dssp
 
 RUN conda install -c conda-forge ipywidgets
-RUN jupyter nbextension enable --py widgetsnbextension
+# RUN jupyter nbextension enable --py widgetsnbextension
 
 RUN export CUDA=$(python -c "import torch; print('cu'+torch.version.cuda.replace('.',''))") \
     && export TORCH=$(python -c "import torch; print(torch.__version__)") \

From 9f16e40d6f4c9f5238324426611a984f9516926f Mon Sep 17 00:00:00 2001
From: Nicktf <49584439+1511878618@users.noreply.github.com>
Date: Wed, 14 Sep 2022 21:22:37 +0800
Subject: [PATCH 6/8] install dependency of jupyter nbextension

---
 Dockerfile | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 579127df1..e8a9760bd 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -42,9 +42,12 @@ RUN conda install -c fvcore -c iopath -c conda-forge fvcore iopath
 RUN conda install -c pytorch3d pytorch3d
 RUN conda install -c dglteam dgl
 RUN conda install -c salilab dssp
-
 RUN conda install -c conda-forge ipywidgets
-# RUN jupyter nbextension enable --py widgetsnbextension
+
+# or conda install; may be it will work; or just comment `jupyter nbextension enable --py widgetsnbextension`
+RUN pip install jupyter_contrib_nbextensions 
+
+RUN jupyter nbextension enable --py widgetsnbextension
 
 RUN export CUDA=$(python -c "import torch; print('cu'+torch.version.cuda.replace('.',''))") \
     && export TORCH=$(python -c "import torch; print(torch.__version__)") \

From 2d17b5b0934c9e0d3defeeeae759908e04b8fcd8 Mon Sep 17 00:00:00 2001
From: ryan <ryan.greenhalgh@hotmail.co.uk>
Date: Thu, 15 Sep 2022 11:50:56 +0100
Subject: [PATCH 7/8] Fixed Dockerfile and unit tests

---
 Dockerfile                                     |  7 ++-----
 graphein/grn/parse_regnetwork.py               | 18 ++++++++++++++----
 .../ml/datasets/torch_geometric_dataset.py     |  2 ++
 3 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index e8a9760bd..ec0d69048 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -44,11 +44,6 @@ RUN conda install -c dglteam dgl
 RUN conda install -c salilab dssp
 RUN conda install -c conda-forge ipywidgets
 
-# or conda install; may be it will work; or just comment `jupyter nbextension enable --py widgetsnbextension`
-RUN pip install jupyter_contrib_nbextensions 
-
-RUN jupyter nbextension enable --py widgetsnbextension
-
 RUN export CUDA=$(python -c "import torch; print('cu'+torch.version.cuda.replace('.',''))") \
     && export TORCH=$(python -c "import torch; print(torch.__version__)") \
     && pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-${TORCH}+${CUDA}.html --no-cache-dir \
@@ -57,6 +52,8 @@ RUN export CUDA=$(python -c "import torch; print('cu'+torch.version.cuda.replace
     && pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-${TORCH}+${CUDA}.html --no-cache-dir \
     && pip install torch-geometric --no-cache-dir
 
+RUN pip install jupyter_contrib_nbextensions 
+RUN jupyter nbextension enable --py widgetsnbextension
 
 # Testing
 # docker-compose -f docker-compose.cpu.yml up -d --build
diff --git a/graphein/grn/parse_regnetwork.py b/graphein/grn/parse_regnetwork.py
index 8b0a22829..8677866b0 100644
--- a/graphein/grn/parse_regnetwork.py
+++ b/graphein/grn/parse_regnetwork.py
@@ -14,6 +14,7 @@
 
 import pandas as pd
 import wget
+import ssl
 
 from graphein.utils.utils import filter_dataframe, ping
 
@@ -41,10 +42,10 @@ def _download_RegNetwork(
             "RegNetwork is not available. Please check your internet connection or verify at: http://www.regnetworkweb.org"
         )
 
-    mouse_url = "http://regnetworkweb.org/download/mouse.zip"
+    mouse_url = "https://regnetworkweb.org/download/mouse.zip"
 
     if network_type == "human":
-        human_url = "http://www.regnetworkweb.org/download/human.zip"
+        human_url = "https://regnetworkweb.org/download/human.zip"
         url = human_url
     elif network_type == "mouse":
         url = mouse_url
@@ -66,8 +67,12 @@ def _download_RegNetwork(
     # Download data and unzip
     if not os.path.exists(file):
         log.info("Downloading RegNetwork ...")
+        # switch ssl context for unverified download
+        default_https_context = ssl._create_default_https_context
+        ssl._create_default_https_context = ssl._create_unverified_context
         wget.download(url, compressed_file)
-
+        # switch ssl context back to default
+        ssl._create_default_https_context = default_https_context
         with zipfile.ZipFile(compressed_file, "r") as zip_ref:
             zip_ref.extractall(out_dir)
 
@@ -80,7 +85,7 @@ def _download_RegNetwork_regtypes(root_dir: Optional[Path] = None) -> str:
 
     :param root_dir: Path object specifying the location to download RegNetwork to
     """
-    url = "http://www.regnetworkweb.org/download/RegulatoryDirections.zip"
+    url = "https://regnetworkweb.org/download/RegulatoryDirections.zip"
 
     if root_dir is None:
         root_dir = Path(__file__).parent.parent.parent / "datasets"
@@ -94,7 +99,12 @@ def _download_RegNetwork_regtypes(root_dir: Optional[Path] = None) -> str:
     # Download data and unzip
     if not os.path.exists(file):
         log.info("Downloading RegNetwork reg types ...")
+        # switch ssl context for unverified download
+        default_https_context = ssl._create_default_https_context
+        ssl._create_default_https_context = ssl._create_unverified_context
         wget.download(url, compressed_file)
+        # switch ssl context back to default
+        ssl._create_default_https_context = default_https_context
 
         with zipfile.ZipFile(compressed_file, "r") as zip_ref:
             zip_ref.extractall(out_dir)
diff --git a/graphein/ml/datasets/torch_geometric_dataset.py b/graphein/ml/datasets/torch_geometric_dataset.py
index 96133271c..5de6f9eb6 100644
--- a/graphein/ml/datasets/torch_geometric_dataset.py
+++ b/graphein/ml/datasets/torch_geometric_dataset.py
@@ -177,6 +177,7 @@ def __init__(
         self.graph_transformation_funcs = graph_transformation_funcs
         self.pdb_transform = pdb_transform
         self.num_cores = num_cores
+        self.af_version = af_version
         super().__init__(
             root,
             transform=transform,
@@ -462,6 +463,7 @@ def __init__(
         self.num_cores = num_cores
         self.pdb_transform = pdb_transform
         self.graph_transformation_funcs = graph_transformation_funcs
+        self.af_version = af_version
         super().__init__(
             root,
             transform=transform,

From f209284f8d2148971b7902235d18e55562076122 Mon Sep 17 00:00:00 2001
From: Arian Jamasb <arjamasb@gmail.com>
Date: Fri, 16 Sep 2022 13:38:30 +0200
Subject: [PATCH 8/8] Update chage log

---
 CHANGELOG.md | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e8e55b429..57f7d4b55 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,22 +1,20 @@
-### local_dataset
+### 1.5.2
 
-* changes: support for loading local pdb files by ``ProteinGraphDataset`` and ``InMemoryProteinGraphDataset``.
+### GRN
+* [Bugfix] - [#208](https://github.com/a-r-j/graphein/pull/208) - Resolves SSL issues with RegNetwork.
 
 
-* `python -m py.test tests/`, a part result of testing is as below:
-![](https://tva1.sinaimg.cn/large/e6c9d24egy1h6664wn7zsj21qs0d444h.jpg)
-* Also, some documentation are added into `dataloader_tutorial`
-
 #### ML
-
-* [Feature] add support for loading local pdb files to both ``ProteinGraphDataset`` and ``InMemoryProteinGraphDataset``
-
+* [Feature] - [#208](https://github.com/a-r-j/graphein/pull/208) support for loading local pdb files by ``ProteinGraphDataset`` and ``InMemoryProteinGraphDataset``.
 >by adding a params:`pdb_paths` and set the `self.raw_dir` to the root path(`self.pdb_path`) of pdb_paths list (the root path should be only one, pdb files should be under the same folder).
 >
 >it will works from loading pdb files from the `self.pdb_path` instead of loading from self.raw. 
 > If desire to download from af2 or pdb, just set `pdb_paths` to `None` and it goes back to the former version.
 
-If this change would be accepted, i'll try to make the downloading and loadoing from local could work together.
+#### CI
+* [Bugfix] - [#208](https://github.com/a-r-j/graphein/pull/208) explicitly installs `jupyter_contrib_nbextensions` in Docker.
+
+
 ### 1.5.1
 
 #### Protein