nf-core · apeltzer · Mar 25, 2019 · Mar 25, 2019 · Mar 25, 2019 · Mar 25, 2019
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,6 +13,7 @@
 #### Bug fixes
 * Fixing HISAT2 Index Building for large reference genomes [#153](https://github.com/nf-core/rnaseq/issues/153)
 * Fixing HISAT2 BAM sorting using more memory than available on the system
+* Fixing MarkDuplicates memory consumption issues following [#179](https://github.com/nf-core/rnaseq/pull/179)
 
 
 #### Dependency Updates

diff --git a/conf/base.config b/conf/base.config
@@ -23,6 +23,12 @@ process {
   withName: trim_galore {
     time = { check_max( 8.h * task.attempt, 'time' ) }
   }
+  withName:markDuplicates {
+	  // Actually the -Xmx value should be kept lower,
+    // and is set through the markdup_java_options
+    cpus = { check_max( 8, 'cpus' ) }
+    memory = { check_max( 8.GB * task.attempt, 'memory' ) }
+  }
   withName: makeHISATindex {
     cpus = { check_max( 10, 'cpus' ) }
     memory = { check_max( 200.GB * task.attempt, 'memory' ) }

diff --git a/main.nf b/main.nf
@@ -852,7 +852,6 @@ process preseq {
  * STEP 6 Mark duplicates
  */
 process markDuplicates {
-    label 'low_memory'
     tag "${bam.baseName - '.sorted'}"
     publishDir "${params.outdir}/markDuplicates", mode: 'copy',
         saveAs: {filename -> filename.indexOf("_metrics.txt") > 0 ? "metrics/$filename" : "$filename"}
@@ -869,14 +868,10 @@ process markDuplicates {
     file "${bam.baseName}.markDups.bam.bai"
 
     script:
-    if( !task.memory ){
-        log.info "[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this."
-        avail_mem = 3
-    } else {
-        avail_mem = task.memory.toGiga()
-    }
+    markdup_java_options = (task.memory.toGiga() > 8) ? ${params.markdup_java_options} : "\"-Xms" +  (task.memory.toGiga() / 2 )+"g "+ "-Xmx" + (task.memory.toGiga() - 1)+ "g\""
+
     """
-    picard -Xmx${avail_mem}g MarkDuplicates \\
+    picard ${markdup_java_options} MarkDuplicates \\
         INPUT=$bam \\
         OUTPUT=${bam.baseName}.markDups.bam \\
         METRICS_FILE=${bam.baseName}.markDups_metrics.txt \\

diff --git a/nextflow.config b/nextflow.config
@@ -18,6 +18,7 @@ params {
   fcExtraAttributes = 'gene_name'
   fcGroupFeatures = 'gene_id'
   fcGroupFeaturesType = 'gene_biotype'
+  markdup_java_options = '"-Xms4000m -Xmx7g"' //Established values for markDuplicate memory consumption, see issue PR #689 (in Sarek) for details  
   splicesites = false
   saveReference = false
   saveTrimmed = false