Merge pull request #108 from cytomining/docedits

documentation edits
cytomining · Nov 15, 2020 · ac3ef68 · ac3ef68
2 parents c1aa34b + 2628816
commit ac3ef68
Show file tree

Hide file tree

Showing 4 changed files with 41 additions and 35 deletions.
diff --git a/pycytominer/aggregate.py b/pycytominer/aggregate.py
@@ -1,5 +1,6 @@
 """
-Aggregate single cell data based on given grouping variables
+Aggregate single cell data based on given grouping variables.
+Aggregation assumes input data is arrayed such that each well has a single perturbation.
 """
 
 import numpy as np
@@ -15,7 +16,8 @@
 
 class AggregateProfiles:
     """
-    Class to aggregate single cell morphological profiles
+    Class to aggregate single cell morphological profiles into per-well measurements.
+    You can currently choose to do this by either median or mean.
     """
 
     def __init__(
@@ -42,6 +44,7 @@ def __init__(
         output_file - [default: "none"] string if specified, write to location
         compartments - list of compartments to process
         merge_cols - column indicating which columns to merge images and compartments
+        load_image_data - [default: True] whether or not to load the image table.
         subsample_frac - [default: 1] float (0 < subsample <= 1) indicating percentage of
                          single cells to select
         subsample_n - [default: "all"] int indicating how many samples to include
@@ -119,11 +122,12 @@ def load_image(self):
 
     def count_cells(self, compartment="cells", count_subset=False):
         """
-        Determine how many cells are measured per well.
+        Determine how many objects are present in each well (or subset of each well).
 
         Arguments:
-        compartment - string indicating the compartment to subset
-        count_subset - [default: False] count the number of cells in subset partition
+        compartment - [default: "cells"] string indicating the compartment name to subset
+        count_subset - [default: False] count the number of objects in the current subset partition.
+                       If set to True you must have set up a subset with get_subsample beforehand.
         """
         check_compartments(compartment)
 
@@ -247,8 +251,8 @@ def aggregate_profiles(
         output_file - [default: "none"] if provided, will write annotated profiles to file
                   if not specified, will return the annotated profiles. We recommend
                   that this output file be suffixed with "_augmented.csv".
-        compression - the mechanism to compress [default: None]
-        float_format - decimal precision to use in writing output file [default: None]
+        compression - [default: None] the mechanism to compress. See cyto_utils/output.py for options.
+        float_format - [default: None] decimal precision to use in writing output file
                            For example, use "%.3g" for 3 decimal precision.
 
         Return:

diff --git a/pycytominer/annotate.py b/pycytominer/annotate.py
@@ -39,7 +39,7 @@ def annotate(
     output_file - [default: "none"] if provided, will write annotated profiles to file
                   if not specified, will return the annotated profiles. We recommend
                   that this output file be suffixed with "_augmented.csv".
-    add_metadata_id_to_platemap - boolean if the platemap variables should be recoded
+    add_metadata_id_to_platemap - [default: True] boolean if the platemap variables possibly need "Metadata" pre-pended
     format_broad_cmap - [default: False] boolean if we need to add columns to make
                         compatible with Broad CMAP naming conventions.
     perturbation_mode - [default: "none"] - either "chemical", "genetic" or "none" and only
@@ -48,7 +48,7 @@ def annotate(
                         metadata information
     external_join_left - [default: "none"] the merge column in the profile metadata
     external_join_right - [default: "none"] the merge column in the external metadata
-    compression - the mechanism to compress [default: None]
+    compression - the mechanism to compress [default: None] See cyto_utils/output.py for options.
     float_format - decimal precision to use in writing output file [default: None]
                        For example, use "%.3g" for 3 decimal precision.
 

diff --git a/pycytominer/feature_select.py b/pycytominer/feature_select.py
@@ -40,28 +40,30 @@ def feature_select(
 
     Arguments:
     profiles - either pandas DataFrame or a file that stores profile data
-    features - list of cell painting features [default: "infer"]
+    features - [default: "infer"] list of cell painting features
                if "infer", then assume cell painting features are those that start with
                "Cells", "Nuclei", or "Cytoplasm"
-    samples - if provided, a list of samples to provide operation on
-              [default: "all"] - if "all", use all samples to calculate
-    operation - str or list of given operations to perform on input profiles
+    samples - [default: "all"] if provided, a list of samples to provide operation on
+              if "all", use all samples to calculate
+    operation - [default: "variance_threshold"] str or list of given operations to perform on input profiles.
+                See all_ops for available operations.
     output_file - [default: "none"] if provided, will write annotated profiles to file
                   if not specified, will return the annotated profiles. We recommend
                   that this output file be suffixed with
                   "_normalized_variable_selected.csv".
-    na_cutoff - proportion of missing values in a column to tolerate before removing
-    corr_threshold - float between (0, 1) to exclude features above [default: 0.9]
-    freq_cut - float of ratio (2nd most common feature val / most common) [default: 0.1]
-    unique_cut - float of ratio (num unique features / num samples) [default: 0.1]
-    compression - the mechanism to compress [default: None]
-    float_format - decimal precision to use in writing output file [default: None]
+    na_cutoff - [default: 0.05] proportion of missing values in a column to tolerate before removing
+    corr_threshold - [default: 0.9] float between (0, 1) to exclude features above if any
+                     two features are correlated above this threshold.
+    freq_cut - [default: 0.1] float of ratio (2nd most common feature val / most common)
+    unique_cut - [default: 0.1] float of ratio (num unique features / num samples)
+    compression - [default: None] the mechanism to compress. See cyto_utils/output.py for options.
+    float_format - [default: None] decimal precision to use in writing output file
                    For example, use "%.3g" for 3 decimal precision.
-    blocklist_file - file location of dataframe with features to exclude [default: None]
+    blocklist_file - [default: None] file location of dataframe with features to exclude
                      Note that if "blocklist" in operation then will remove standard
                      blocklist
-    outlier_cutoff - the threshold at which the maximum or minimum value of a feature
-                     across a full experiment is excluded [default: 15]. Note that this
+    outlier_cutoff - [default: 15] the threshold at which the maximum or minimum value of a feature
+                     across a full experiment is excluded. Note that this
                      procedure is typically applied (and therefore the default is
                      suitable) for after normalization.
     """

diff --git a/pycytominer/normalize.py b/pycytominer/normalize.py
@@ -30,27 +30,27 @@ def normalize(
 
     Arguments:
     profiles - either pandas DataFrame or a file that stores profile data
-    features - list of cell painting features [default: "infer"]
+    features - [default: "infer"] list of cell painting features
                if "infer", then assume cell painting features are those that do not
                start with "Cells", "Nuclei", or "Cytoplasm"
-    meta_features - if specified, then output these with specified features
-                    [default: "infer"]
-    samples - string indicating which metadata column and values to use to subset
-              the control samples are often used here [default: 'all']
+    meta_features - [default: "infer"] if specified, then output these with
+                    specified features
+    samples - [default: 'all'] string indicating which metadata column and
+              values to use to the control samples are often used here
               the format of this variable will be used in a pd.query() function. An
               example is "Metadata_treatment == 'control'" (include all quotes)
-    method - string indicating how the dataframe will be normalized
-             [default: 'standardize']
+    method - [default: 'standardize'] string indicating how the dataframe will
+             be normalized. Check avail_methods for available normalization methods.
     output_file - [default: "none"] if provided, will write annotated profiles to file
                   if not specified, will return the annotated profiles. We recommend
                   that this output file be suffixed with "_normalized.csv".
-    compression - the mechanism to compress [default: None]
-    float_format - decimal precision to use in writing output file [default: None]
+    compression - [default: None] the mechanism to compress. See cyto_utils/output.py for options.
+    float_format - [default: None] decimal precision to use in writing output file
                    For example, use "%.3g" for 3 decimal precision.
-    spherize_center - if data should be centered before sphering (aka whitening)
-                      transform (only used if method = "spherize") [default: True]
-    spherize_method - the type of sphering (aka whitening) normalization used (only
-                      used if method = "spherize") [default: 'ZCA-cor']
+    spherize_center - [default: True] if data should be centered before sphering
+                      (aka whitening) transform (only used if method = "spherize")
+    spherize_method - [default: 'ZCA-cor'] the type of sphering (aka whitening)
+                      normalization used (only used if method = "spherize")
 
     Return:
     A normalized DataFrame