BioPandas · a-r-j · Jul 31, 2023 · May 30, 2023 · May 30, 2023 · May 30, 2023
diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py
@@ -10,6 +10,7 @@
 import gzip
 import sys
 import warnings
+import textwrap
 from copy import deepcopy
 from io import StringIO
 from typing import List, Optional
@@ -225,7 +226,7 @@ def impute_element(self, records=("ATOM", "HETATM"), inplace=False):
             Coordinate sections for which the element symbols should be
             imputed.
 
-        inplace : bool, (default: False
+        inplace : bool, default: False
             Performs the operation in-place if True and returns a copy of the
             PDB DataFrame otherwise.
 
@@ -246,6 +247,85 @@ def impute_element(self, records=("ATOM", "HETATM"), inplace=False):
                 lambda x: x[0][1] if len(x[1]) == 3 else x[0][0], axis=1
             )
         return t
+
+    def add_remark(self, code, text='', indent=0):
+        """Add custom REMARK entry.
+
+        The remark will be inserted to preserve the ordering of REMARK codes, i.e. if the code is
+        `n` it will be added after all remarks with codes less or equal to `n`. If the object does
+        not store any remarks the remark will be inserted right before the first of ATOM, HETATM or 
+        ANISOU records.
+
+        Parameters
+        ----------
+        code : int
+            REMARK code according to PDB standards.
+
+        text : str
+            The text of the remark. If the text does not fit into a single line it will be wrapped
+            into multiple lines of REMARK entries. Likewise, if the text contains new line 
+            characters it will be split accordingly.
+
+        indent : int, default: 0
+            Number of white spaces inserted before the text of the remark.
+
+        Returns
+        ---------
+        Nothing
+
+        """
+        # Prepare info from self
+        if 'OTHERS' in self.df:
+            df_others = self.df['OTHERS']
+        else:
+            df_others = pd.DataFrame(columns=['record_name', 'entry', 'line_idx'])
+        record_types = list(filter(lambda x: x in self.df, ['ATOM', 'HETATM', 'ANISOU']))
+        remarks = df_others[df_others['record_name'] == 'REMARK']['entry']
+
+        # Find index and line_idx where to insert the remark to preserve remark code order
+        if len(remarks):
+            remark_codes = remarks.apply(lambda x: x.split(maxsplit=1)[0]).astype(int)
+            insertion_pos = remark_codes.searchsorted(code, side='right')
+            if insertion_pos < len(remark_codes):  # Remark in the middle
+                insertion_idx = remark_codes.index[insertion_pos]
+                insertion_line_idx = df_others.loc[insertion_idx]['line_idx']
+            else:  # Last remark
+                insertion_idx = len(remark_codes)
+                insertion_line_idx = df_others['line_idx'].iloc[-1] + 1
+        else:  # First remark
+            insertion_idx = 0
+            insertion_line_idx = min([self.df[r]['line_idx'].min() for r in record_types])
+
+        # Wrap remark to fit into 80 characters per line and add indentation
+        wrapper = textwrap.TextWrapper(width=80 - (11 + indent))
+        lines = sum([wrapper.wrap(l.strip()) or [' '] for l in text.split('\n')], [])
+        lines = list(map(lambda x: f'{code:4} ' +  indent*' ' + x, lines))
+
+        # Shift data frame indices and row indices to create space for the remark
+        # Create space in OTHERS
+        line_idx = df_others['line_idx'].copy()
+        line_idx[line_idx >= insertion_line_idx] += len(lines)
+        df_others['line_idx'] = line_idx
+        index = pd.Series(df_others.index.copy())
+        index[index >= insertion_idx] += len(lines)
+        df_others.index = index
+        # Shift all other record types that follow inserted remark
+        for records in record_types:
+            df_records = self.df[records]
+            if not insertion_line_idx > df_records['line_idx'].max():
+                df_records['line_idx'] += len(lines)
+
+        # Put remark into 'OTHERS' data frame
+        df_remark = {
+            idx: ['REMARK', line, line_idx]
+            for idx, line, line_idx in zip(
+                range(insertion_idx, insertion_idx + len(lines)),
+                lines,
+                range(insertion_line_idx, insertion_line_idx + len(lines)),
+            )
+        }
+        df_remark = pd.DataFrame.from_dict(df_remark, orient='index', columns=df_others.columns)
+        self.df['OTHERS'] = pd.concat([df_others, df_remark]).sort_index()
 
     @staticmethod
     def rmsd(df1, df2, s=None, invert=False, decimals=4):

diff --git a/biopandas/pdb/tests/test_write_pdb.py b/biopandas/pdb/tests/test_write_pdb.py
@@ -14,6 +14,9 @@
 TESTDATA_FILENAME2 = os.path.join(
     os.path.dirname(__file__), "data", "4eiy_anisouchunk.pdb"
 )
+TESTDATA_FILENAME3 = os.path.join(
+    os.path.dirname(__file__), "data", "5mtn_multichain.pdb"
+)
 OUTFILE = os.path.join(os.path.dirname(__file__), "data", "tmp.pdb")
 OUTFILE_GZ = os.path.join(os.path.dirname(__file__), "data", "tmp.pdb.gz")
 
@@ -73,6 +76,71 @@ def test_anisou():
     assert f1 == four_eiy
 
 
+def test_add_remark():
+    """Test adding a REMARK entry."""
+    # Add remark
+    code = 3
+    remark1 = 'THIS IS A HIGHLY IMPORTANT FREE-TEXT REMARK WHICH IS EXACTLY 80 CHARACTERS LONG.'
+    remark2 = ''
+    remark3 = 'THIS IS A NEXT MULTI-LINE INDENTED REMARK\n FOLLOWING THE BLANK REMARK.'
+    ppdb = PandasPdb()
+    ppdb.read_pdb(TESTDATA_FILENAME)
+    n_atoms = len(ppdb.df['ATOM'])
+    ppdb.add_remark(code, remark1)
+    ppdb.add_remark(code, remark2)
+    ppdb.add_remark(code, remark3, 5)
+    ppdb.to_pdb(path=OUTFILE)
+
+    # Test modified file contains remarks
+    with open(OUTFILE, "r") as f:
+        f1 = f.read()
+    expected_substr = (
+        "REMARK   3  OTHER REFINEMENT REMARKS: HYDROGENS HAVE BEEN ADDED IN THE RIDING   \n"
+        "REMARK   3  POSITIONS                                                           \n"
+        "REMARK   3 THIS IS A HIGHLY IMPORTANT FREE-TEXT REMARK WHICH IS EXACTLY 80      \n"
+        "REMARK   3 CHARACTERS LONG.                                                     \n"
+        "REMARK   3                                                                      \n"
+        "REMARK   3      THIS IS A NEXT MULTI-LINE INDENTED REMARK                       \n"
+        "REMARK   3      FOLLOWING THE BLANK REMARK.                                     \n"
+        "REMARK   4                                                                      \n"
+    )
+    assert expected_substr in f1
+
+    # Test number of atoms remained the same
+    ppdb = PandasPdb()
+    ppdb.read_pdb(OUTFILE)
+    os.remove(OUTFILE)
+    assert len(ppdb.df['ATOM']) == n_atoms
+
+
+def test_introduce_remark():
+    """Test introducing a REMARK entry to the file with no remarks."""
+    # Add remark
+    code = 3
+    remark = 'THIS IS A HIGHLY IMPORTANT FREE-TEXT REMARK WHICH IS EXACTLY 80 CHARACTERS LONG.'
+    indent = 1
+    ppdb = PandasPdb()
+    ppdb.read_pdb(TESTDATA_FILENAME3)
+    n_atoms = len(ppdb.df['ATOM'])
+    ppdb.add_remark(code, remark, indent)
+    ppdb.to_pdb(path=OUTFILE)
+
+    # Test modified file starts with new remark
+    with open(OUTFILE, "r") as f:
+        f1 = f.read()
+    expected_prefix = (
+        "REMARK   3  THIS IS A HIGHLY IMPORTANT FREE-TEXT REMARK WHICH IS EXACTLY 80     \n"
+        "REMARK   3  CHARACTERS LONG.                                                    \n"
+    )
+    assert f1.startswith(expected_prefix)
+
+    # Test number of atoms remained the same
+    ppdb = PandasPdb()
+    ppdb.read_pdb(OUTFILE)
+    os.remove(OUTFILE)
+    assert len(ppdb.df['ATOM']) == n_atoms
+
+
 def test_b_factor_shift():
     """Test b_factor shifting one white space when saving the fetched pdb."""
     ppdb = PandasPdb()
@@ -82,3 +150,4 @@ def test_b_factor_shift():
     os.remove(OUTFILE)
     assert tmp_df[tmp_df["element_symbol"].isnull() | (tmp_df["element_symbol"] == '')].empty
     assert not tmp_df[tmp_df["blank_4"].isnull() | (tmp_df["blank_4"] == '')].empty
+
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
@@ -4,8 +4,8 @@ The CHANGELOG for the current development version is available at
 [https://github.com/rasbt/biopandas/blob/main/docs/sources/CHANGELOG.md](https://github.com/rasbt/biopandas/blob/main/docs/sources/CHANGELOG.md).
 
 
-### 0.5.0dev1 (24/5/2023)
-
+### 0.5.0dev1 (31/7/2023)
+- Implement add_remark for PandasPdb, (Via [Anton Bushuiev](https://github.com/anton-bushuiev) PR #[129](https://github.com/BioPandas/biopandas/pull/129))
 - B_factor shifting one white space issue fix. (Via [Zehra Sarica](https://github.com/zehraacarsarica), PR #[134](https://github.com/BioPandas/biopandas/pull/134))
 - Adds support for pathlib. (Via [Anton Bushuiev](https://github.com/anton-bushuiev), PR #[128](https://github.com/BioPandas/biopandas/pull/128))
 - Adds support for reading Gzipped MMTF files. (Via [Arian Jamasb](https://github.com/a-r-j), PR #[123](https://github.com/rasbt/biopandas/pull/123/files))