open2c · sergpolly · Apr 1, 2022 · Jan 13, 2022 · Jan 13, 2022 · Jan 20, 2022
diff --git a/cooltools/api/snipping.py b/cooltools/api/snipping.py
@@ -10,7 +10,7 @@
     is_cooler_balanced,
     is_valid_expected,
 )
-from ..lib.common import assign_regions, make_cooler_view
+from ..lib.common import assign_regions, make_cooler_view, assign_view_paired
 
 from ..lib.numutils import LazyToeplitz
 import warnings
@@ -82,12 +82,7 @@ def expand_align_features(features_df, flank, resolution, format="bed"):
 
 
 def make_bin_aligned_windows(
-    binsize,
-    chroms,
-    centers_bp,
-    flank_bp=0,
-    region_start_bp=0,
-    ignore_index=False,
+    binsize, chroms, centers_bp, flank_bp=0, region_start_bp=0, ignore_index=False,
 ):
     """
     Convert genomic loci into bin spans on a fixed bin-segmentation of a
@@ -305,10 +300,7 @@ def __init__(self, clr, cooler_opts=None, view_df=None, min_diag=2):
             # Make sure view_df is a proper viewframe
             try:
                 _ = is_compatible_viewframe(
-                    view_df,
-                    clr,
-                    check_sorting=True,
-                    raise_errors=True,
+                    view_df, clr, check_sorting=True, raise_errors=True,
                 )
             except Exception as e:
                 raise ValueError(
@@ -436,10 +428,7 @@ def __init__(
             # Make sure view_df is a proper viewframe
             try:
                 _ = is_compatible_viewframe(
-                    view_df,
-                    clr,
-                    check_sorting=True,
-                    raise_errors=True,
+                    view_df, clr, check_sorting=True, raise_errors=True,
                 )
             except Exception as e:
                 raise ValueError(
@@ -452,9 +441,7 @@ def __init__(
                 "cis",
                 view_df,
                 verify_cooler=clr,
-                expected_value_cols=[
-                    self.expected_value_col,
-                ],
+                expected_value_cols=[self.expected_value_col,],
                 raise_errors=True,
             )
         except Exception as e:
@@ -582,10 +569,7 @@ def __init__(
             # Make sure view_df is a proper viewframe
             try:
                 _ = is_compatible_viewframe(
-                    view_df,
-                    clr,
-                    check_sorting=True,
-                    raise_errors=True,
+                    view_df, clr, check_sorting=True, raise_errors=True,
                 )
             except Exception as e:
                 raise ValueError(
@@ -598,9 +582,7 @@ def __init__(
                 "cis",
                 view_df,
                 verify_cooler=clr,
-                expected_value_cols=[
-                    self.expected_value_col,
-                ],
+                expected_value_cols=[self.expected_value_col,],
                 raise_errors=True,
             )
         except Exception as e:
@@ -660,6 +642,7 @@ def pileup(
     clr,
     features_df,
     view_df=None,
+    view_name_col="name",
     expected_df=None,
     expected_value_col="balanced.avg",
     flank=100_000,
@@ -713,8 +696,22 @@ def pileup(
     else:
         raise ValueError("Unknown feature_df format")
 
-    features_df = assign_regions(features_df, view_df)
-    # TODO: switch to bioframe.assign_view upon update
+    if feature_type == "bed":
+        features_df = bioframe.assign_view(
+            features_df, view_df, df_view_col="region", view_name_col=view_name_col,
+        )
+    else:
+        features_df = assign_view_paired(
+            features_df, view_df, view_name_col=view_name_col
+        )
+        # Now we consolidate the region annotations into one column `region`
+        # Features that cross between regions get region `None`
+        # The rest get the `region1` value assigned to a new column `region`
+        features_df["region"] = np.where(
+            features_df["region1"] == features_df["region1"],
+            features_df["region1"],
+            None,
+        )
 
     if flank is not None:
         features_df = expand_align_features(
@@ -736,10 +733,7 @@ def pileup(
     else:
         try:
             _ = is_compatible_viewframe(
-                view_df,
-                clr,
-                check_sorting=True,
-                raise_errors=True,
+                view_df, clr, check_sorting=True, raise_errors=True,
             )
         except Exception as e:
             raise ValueError("view_df is not a valid viewframe or incompatible") from e
@@ -770,27 +764,18 @@ def pileup(
     if feature_type == "bed":
         features_df[["lo", "hi"]] = (
             features_df[["lo", "hi"]]
-            .subtract(
-                features_df["region_offset"].fillna(0),
-                axis=0,
-            )
+            .subtract(features_df["region_offset"].fillna(0), axis=0,)
             .astype(int)
         )
     else:
         features_df[["lo1", "hi1"]] = (
             features_df[["lo1", "hi1"]]
-            .subtract(
-                features_df["region_offset"].fillna(0),
-                axis=0,
-            )
+            .subtract(features_df["region_offset"].fillna(0), axis=0,)
             .astype(int)
         )
         features_df[["lo2", "hi2"]] = (
             features_df[["lo2", "hi2"]]
-            .subtract(
-                features_df["region_offset"].fillna(0),
-                axis=0,
-            )
+            .subtract(features_df["region_offset"].fillna(0), axis=0,)
             .astype(int)
         )
 

diff --git a/cooltools/lib/common.py b/cooltools/lib/common.py
@@ -9,6 +9,76 @@
 import bioframe
 
 
+def assign_view_paired(
+    features,
+    view_df,
+    cols_paired=["chrom1", "start1", "end1", "chrom2", "start2", "end2"],
+    cols_view=["chrom", "start", "end"],
+    features_view_cols=["region1", "region2"],
+    view_name_col="name",
+    drop_unassigned=False,
+):
+    """Assign region names from the view to each feature
+
+    Assigns a regular 1D view independently to each side of a bedpe-style dataframe.
+    Will add two columns with region names (`features_view_cols`)
+
+    Parameters
+    ----------
+    features : pd.DataFrame
+        bedpe-style dataframe
+    view_df : pandas.DataFrame
+        ViewFrame specifying region start and ends for assignment. Attempts to
+        convert dictionary and pd.Series formats to viewFrames.
+    cols_paired : list of str
+        he names of columns containing the chromosome, start and end of the
+        genomic intervals. The default values are 'chrom', 'start', 'end'.
+    cols_view : list of str
+        The names of columns containing the chromosome, start and end of the
+        genomic intervals in the view. The default values are 'chrom', 'start', 'end'.
+    features_view_cols : list of str
+        Names of the columns where to save the assigned region names
+    view_name_col : str
+        Column of ``view_df`` with region names. Default 'name'.
+    drop_unassigned : bool
+        If True, drop intervals in df that do not overlap a region in the view.
+        Default False.
+    """
+    features = features.copy()
+    features.reset_index(inplace=True, drop=True)
+
+    cols_left = cols_paired[:3]
+    cols_right = cols_paired[3:]
+
+    bioframe.core.checks.is_bedframe(features, raise_errors=True, cols=cols_left)
+    bioframe.core.checks.is_bedframe(features, raise_errors=True, cols=cols_right)
+    view_df = bioframe.core.construction.make_viewframe(
+        view_df, view_name_col=view_name_col, cols=cols_view
+    )
+    features = bioframe.assign_view(
+        features,
+        view_df,
+        drop_unassigned=drop_unassigned,
+        df_view_col=features_view_cols[0],
+        view_name_col=view_name_col,
+        cols=cols_left,
+        cols_view=cols_view,
+    )
+    features[cols_right[1:]] = features[cols_right[1:]].astype(
+        int
+    )  # gets cast to float above...
+    features = bioframe.assign_view(
+        features,
+        view_df,
+        drop_unassigned=drop_unassigned,
+        df_view_col=features_view_cols[1],
+        view_name_col=view_name_col,
+        cols=cols_right,
+        cols_view=cols_view,
+    )
+    return features
+
+
 def assign_regions(features, supports):
     """
     DEPRECATED. Will be removed in the future versions and replaced with bioframe.overlap()
@@ -278,7 +348,10 @@ def align_track_with_cooler(
         .copy()
         .merge(
             track.rename(columns={c: "chrom", s: "start", e: "end", v: "value"}),
-    how="left", on=["chrom", "start"], suffixes=("", "_"))
+            how="left",
+            on=["chrom", "start"],
+            suffixes=("", "_"),
+        )
     )
 
     if clr_weight_name:
@@ -310,5 +383,4 @@ def align_track_with_cooler(
     if mask_bad_bins:
         clr_track.loc[~valid_bins, "value"] = np.nan
 
-
     return clr_track[["chrom", "start", "end", "value"]]
diff --git a/tests/test_snipping.py b/tests/test_snipping.py
@@ -104,12 +104,16 @@ def test_pileup(request):
             "end": [107_000_000, 113_000_000],
         }
     )
-    stack = cooltools.api.snipping.pileup(clr, windows, view_df, exp, flank=None)
+
+    stack = cooltools.api.snipping.pileup(
+        clr, windows, view_df=view_df, expected_df=exp, flank=None
+    )
+
     # Check that the size of snips is OK and there are two of them:
     assert stack.shape == (5, 5, 2)
 
     # II.
-    # Example off-diagonal features, two regions from annotated genomic regions:
+    # Example off-diagonal features, two features from annotated genomic regions:
     windows = pd.DataFrame(
         {
             "chrom1": ["chr1", "chr1"],
@@ -120,7 +124,9 @@ def test_pileup(request):
             "end2": [112_000_000, 118_000_000],
         }
     )
-    stack = cooltools.api.snipping.pileup(clr, windows, view_df, exp, flank=None)
+    stack = cooltools.api.snipping.pileup(
+        clr, windows, view_df=view_df, expected_df=exp, flank=None
+    )
     # Check that the size of snips is OK and there are two of them:
     assert stack.shape == (5, 5, 2)
 
@@ -136,7 +142,9 @@ def test_pileup(request):
             "end2": [110_000_000, 115_000_000],
         }
     )
-    stack = cooltools.api.snipping.pileup(clr, windows, view_df, exp, flank=None)
+    stack = cooltools.api.snipping.pileup(
+        clr, windows, view_df=view_df, expected_df=exp, flank=None
+    )
     # Check that the size of snips is OK and there are two of them:
     assert stack.shape == (5, 5, 2)
 
@@ -172,7 +180,9 @@ def test_pileup(request):
         }
     )
     with pytest.raises(ValueError):
-        stack = cooltools.api.snipping.pileup(clr, windows, view_df, exp, flank=None)
+        stack = cooltools.api.snipping.pileup(
+            clr, windows, view_df=view_df, expected_df=exp, flank=None
+        )
 
     # DRAFT # Should work with force=True:
     # stack = cooltools.api.snipping.pileup(clr, windows, view_df, exp, flank=0, force=True)