diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b779dab3..e614a2446 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ #### Bug fixes * Fixing HISAT2 Index Building for large reference genomes [#153](https://github.com/nf-core/rnaseq/issues/153) * Fixing HISAT2 BAM sorting using more memory than available on the system +* Fixing MarkDuplicates memory consumption issues following [#179](https://github.com/nf-core/rnaseq/pull/179) #### Dependency Updates diff --git a/conf/base.config b/conf/base.config index 4ae0de834..3b087a99d 100644 --- a/conf/base.config +++ b/conf/base.config @@ -23,6 +23,12 @@ process { withName: trim_galore { time = { check_max( 8.h * task.attempt, 'time' ) } } + withName:markDuplicates { + // Actually the -Xmx value should be kept lower, + // and is set through the markdup_java_options + cpus = { check_max( 8, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + } withName: makeHISATindex { cpus = { check_max( 10, 'cpus' ) } memory = { check_max( 200.GB * task.attempt, 'memory' ) } diff --git a/main.nf b/main.nf index 7e82b02d3..48a09d33b 100644 --- a/main.nf +++ b/main.nf @@ -852,7 +852,6 @@ process preseq { * STEP 6 Mark duplicates */ process markDuplicates { - label 'low_memory' tag "${bam.baseName - '.sorted'}" publishDir "${params.outdir}/markDuplicates", mode: 'copy', saveAs: {filename -> filename.indexOf("_metrics.txt") > 0 ? "metrics/$filename" : "$filename"} @@ -869,14 +868,10 @@ process markDuplicates { file "${bam.baseName}.markDups.bam.bai" script: - if( !task.memory ){ - log.info "[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this." - avail_mem = 3 - } else { - avail_mem = task.memory.toGiga() - } + markdup_java_options = (task.memory.toGiga() > 8) ? ${params.markdup_java_options} : "\"-Xms" + (task.memory.toGiga() / 2 )+"g "+ "-Xmx" + (task.memory.toGiga() - 1)+ "g\"" + """ - picard -Xmx${avail_mem}g MarkDuplicates \\ + picard ${markdup_java_options} MarkDuplicates \\ INPUT=$bam \\ OUTPUT=${bam.baseName}.markDups.bam \\ METRICS_FILE=${bam.baseName}.markDups_metrics.txt \\ diff --git a/nextflow.config b/nextflow.config index d633a46a6..ea4b94ed6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -18,6 +18,7 @@ params { fcExtraAttributes = 'gene_name' fcGroupFeatures = 'gene_id' fcGroupFeaturesType = 'gene_biotype' + markdup_java_options = '"-Xms4000m -Xmx7g"' //Established values for markDuplicate memory consumption, see issue PR #689 (in Sarek) for details splicesites = false saveReference = false saveTrimmed = false