diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a6ecb5cd..9923b7602 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Enhancements & fixes +- [[#746](https://github.com/nf-core/rnaseq/issues/746)] - Add `tin.py` output to MultiQC report +- [[#841](https://github.com/nf-core/rnaseq/issues/841)] - Turn `--deseq2_vst` on by default +- [[#857](https://github.com/nf-core/rnaseq/issues/857)] - Missing parameter required by StringTie if using STAR as aligner +- [[#862](https://github.com/nf-core/rnaseq/issues/862)] - Filter samples that have no reads after trimming - Expose additional arguments to UMI-tools as pipeline params: `--umitools_bc_pattern2` is required if the UMI is located on read 2. `--umitools_umi_separator` will often be needed in conjunction with `--skip_umi_extract` as most other tools such as Illumina's `BCL Convert` use a colon instead of an underscore to separate the UMIs. The `--umitools_grouping_method` allows to fine-tune handling of similar but non-identical UMIs. - Updated pipeline template to [nf-core/tools 2.5.1](https://github.com/nf-core/tools/releases/tag/2.5.1) @@ -18,6 +22,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | | `--umitools_umi_separator` | | | `--umitools_grouping_method` | +### Software dependencies + +Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| `multiqc` | 1.11 | 1.13 | +| `picard` | 2.26.10 | 2.27.4 | + +> **NB:** Dependency has been **updated** if both old and new version information is present. +> **NB:** Dependency has been **added** if just the new version information is present. +> **NB:** Dependency has been **removed** if version information isn't present. + ## [[3.8.1](https://github.com/nf-core/rnaseq/releases/tag/3.8.1)] - 2022-05-27 - [[#834](https://github.com/nf-core/rnaseq/issues/834)] - `nf-core download` fails with version 3.8 of the pipeline diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 31e1b1c06..68bc9750c 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -104,7 +104,7 @@ sp: rseqc/infer_experiment: fn: "*.infer_experiment.txt" rseqc/tin: - fn: "*.tin.txt" + fn: "*.summary.txt" picard/markdups: fn: "*.MarkDuplicates.metrics.txt" diff --git a/conf/modules.config b/conf/modules.config index 3166f4b52..5cb46caba 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -294,6 +294,7 @@ if (params.remove_ribo_rna) { if (!params.skip_alignment) { process { withName: 'NFCORE_RNASEQ:RNASEQ:.*:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.sorted.bam" } publishDir = [ path: { "${params.outdir}/${params.aligner}/samtools_stats" }, mode: params.publish_dir_mode, @@ -333,7 +334,7 @@ if (!params.skip_alignment) { if (!params.skip_markduplicates) { process { withName: '.*:MARK_DUPLICATES_PICARD:PICARD_MARKDUPLICATES' { - ext.args = 'ASSUME_SORTED=true REMOVE_DUPLICATES=false VALIDATION_STRINGENCY=LENIENT TMP_DIR=tmp' + ext.args = '--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp' ext.prefix = { "${meta.id}.markdup.sorted" } publishDir = [ [ @@ -360,6 +361,7 @@ if (!params.skip_alignment) { } withName: '.*:MARK_DUPLICATES_PICARD:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.markdup.sorted.bam" } publishDir = [ path: { "${params.outdir}/${params.aligner}/samtools_stats" }, mode: params.publish_dir_mode, @@ -413,6 +415,7 @@ if (!params.skip_alignment) { } withName: '.*:DEDUP_UMI_UMITOOLS_GENOME:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.umi_dedup.sorted.bam" } publishDir = [ path: { "${params.outdir}/${params.aligner}/samtools_stats" }, mode: params.publish_dir_mode, @@ -513,6 +516,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { '--alignSJDBoverhangMin 1', '--outSAMattributes NH HI AS NM MD', '--quantTranscriptomeBan Singleend', + '--outSAMstrandField intronMotif', params.save_unaligned ? '--outReadsUnmapped Fastx' : '' ].join(' ').trim() publishDir = [ @@ -605,6 +609,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { } withName: 'NFCORE_RNASEQ:RNASEQ:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.transcriptome.sorted.bam" } publishDir = [ path: { "${params.outdir}/${params.aligner}" }, enabled: false @@ -633,6 +638,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { } withName: '.*:DEDUP_UMI_UMITOOLS_TRANSCRIPTOME:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.umi_dedup.transcriptome.sorted.bam" } publishDir = [ path: { "${params.outdir}/${params.aligner}/samtools_stats" }, mode: params.publish_dir_mode, diff --git a/docs/output.md b/docs/output.md index c85c649b5..380ffb890 100644 --- a/docs/output.md +++ b/docs/output.md @@ -617,7 +617,7 @@ The [Preseq](http://smithlabresearch.org/software/preseq/) package is aimed at p The script included in the pipeline uses DESeq2 to normalise read counts across all of the provided samples in order to create a PCA plot and a clustered heatmap showing pairwise Euclidean distances between the samples in the experiment. These help to show the similarity between groups of samples and can reveal batch effects and other potential issues with the experiment. -For larger experiments, it may be recommended to use the `vst` transformation instead of the default `rlog` option. You can do this by providing the `--deseq2_vst` parameter to the pipeline. See [DESeq2 docs](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#data-transformations-and-visualization) for a more detailed explanation. +By default, the pipeline uses the `vst` transformation which is more suited to larger experiments. You can set the parameter `--deseq2_vst false` if you wish to use the DESeq2 native `rlog` option. See [DESeq2 docs](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#data-transformations-and-visualization) for a more detailed explanation. The PCA plots are generated based alternately on the top five hundred most variable genes, or all genes. The former is the conventional approach that is more likely to pick up strong effects (ie the biological signal) and the latter, when different, is picking up a weaker but consistent effect that is synchronised across many transcripts. We project both of these onto the first two PCs (shown in the top row of the figure below), which is the best two dimensional representation of the variation between samples. diff --git a/modules.json b/modules.json index fd36af099..787f246f7 100644 --- a/modules.json +++ b/modules.json @@ -7,15 +7,15 @@ "modules": { "bbmap/bbsplit": { "branch": "master", - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "848ee9a215d02d80be033bfa60881700f2bd914c" }, "cat/fastq": { "branch": "master", - "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" + "git_sha": "e9bc33485e420b51cfbed3cf63ac7463cd5739d3" }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "82501fe6d0d12614db67751d30af98d16e63dc59" }, "custom/getchromsizes": { "branch": "master", @@ -31,31 +31,31 @@ }, "gunzip": { "branch": "master", - "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" + "git_sha": "90aef30f432332bdf0ce9f4b9004aa5d5c4960bb" }, "hisat2/align": { "branch": "master", - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" + "git_sha": "233fa70811a03a4cecb2ece483b5c8396e2cee1d" }, "hisat2/build": { "branch": "master", - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "233fa70811a03a4cecb2ece483b5c8396e2cee1d" }, "hisat2/extractsplicesites": { "branch": "master", - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "233fa70811a03a4cecb2ece483b5c8396e2cee1d" }, "picard/markduplicates": { "branch": "master", - "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "preseq/lcextrap": { "branch": "master", - "git_sha": "7111e571cc5b6069de4673cd6165af680f17b4d7" + "git_sha": "90aef30f432332bdf0ce9f4b9004aa5d5c4960bb" }, "qualimap/rnaseq": { "branch": "master", - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" + "git_sha": "82501fe6d0d12614db67751d30af98d16e63dc59" }, "rsem/calculateexpression": { "branch": "master", @@ -107,11 +107,11 @@ }, "samtools/flagstat": { "branch": "master", - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" + "git_sha": "bbb99cb8d679555cc01c98766de7869f83283545" }, "samtools/idxstats": { "branch": "master", - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" + "git_sha": "f0a86eaf5bf6ca73c5571193edd00c25bfa308f5" }, "samtools/index": { "branch": "master", @@ -119,11 +119,11 @@ }, "samtools/sort": { "branch": "master", - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" + "git_sha": "d5d785b3d8b422cda9c6d84a23f629a8e9ff8cd8" }, "samtools/stats": { "branch": "master", - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" + "git_sha": "f4eab7945952dc4934224309701a49913ea05ae6" }, "sortmerna": { "branch": "master", @@ -131,15 +131,15 @@ }, "star/align": { "branch": "master", - "git_sha": "fb6c7bca3d55c19a793372513395e3a567bdd7ba" + "git_sha": "cd22a76c78b09f7837a5a75642774a4bf48c92ba" }, "star/genomegenerate": { "branch": "master", - "git_sha": "fb6c7bca3d55c19a793372513395e3a567bdd7ba" + "git_sha": "cd22a76c78b09f7837a5a75642774a4bf48c92ba" }, "stringtie/stringtie": { "branch": "master", - "git_sha": "6d88f2da8cc5d586456e801b535cc4213e0fa2f7" + "git_sha": "47cc7a77bfa10c0aa6e6c393d09e895a85d1583e" }, "subread/featurecounts": { "branch": "master", @@ -147,19 +147,19 @@ }, "trimgalore": { "branch": "master", - "git_sha": "85ec13ff1fc2196c5a507ea497de468101baabed" + "git_sha": "bfbdc4f79bb26bc698d30b74abd081db8af77f4d" }, "ucsc/bedclip": { "branch": "master", - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "233fa70811a03a4cecb2ece483b5c8396e2cee1d" }, "ucsc/bedgraphtobigwig": { "branch": "master", - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" + "git_sha": "90aef30f432332bdf0ce9f4b9004aa5d5c4960bb" }, "umitools/dedup": { "branch": "master", - "git_sha": "84f02a08a5d6d881cb18c31fb15faca4e735a1f2" + "git_sha": "9d489648a7425fa18d8bed18935442c104d8733c" }, "umitools/extract": { "branch": "master", @@ -167,7 +167,7 @@ }, "untar": { "branch": "master", - "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" + "git_sha": "b63b9f752dc8e43fc70b0491aad5e0a270ab0e10" } } } diff --git a/modules/local/multiqc.nf b/modules/local/multiqc.nf index f31f440ad..b725b9011 100644 --- a/modules/local/multiqc.nf +++ b/modules/local/multiqc.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_medium' - conda (params.enable_conda ? "bioconda::multiqc=1.11" : null) + conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" input: path multiqc_config diff --git a/modules/nf-core/modules/bbmap/bbsplit/meta.yml b/modules/nf-core/modules/bbmap/bbsplit/meta.yml index 9d9f10daf..2b9ab8cbb 100644 --- a/modules/nf-core/modules/bbmap/bbsplit/meta.yml +++ b/modules/nf-core/modules/bbmap/bbsplit/meta.yml @@ -1,8 +1,9 @@ name: bbmap_bbsplit -description: write your description here +description: Split sequencing reads by mapping them to multiple references simultaneously keywords: - align - map + - fastq - genome - reference tools: diff --git a/modules/nf-core/modules/cat/fastq/main.nf b/modules/nf-core/modules/cat/fastq/main.nf index b68548959..4fa365d33 100644 --- a/modules/nf-core/modules/cat/fastq/main.nf +++ b/modules/nf-core/modules/cat/fastq/main.nf @@ -1,6 +1,6 @@ process CAT_FASTQ { tag "$meta.id" - label 'process_low' + label 'process_single' conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -20,9 +20,9 @@ process CAT_FASTQ { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def readList = reads.collect{ it.toString() } + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] if (meta.single_end) { - if (readList.size > 1) { + if (readList.size >= 1) { """ cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz @@ -33,7 +33,7 @@ process CAT_FASTQ { """ } } else { - if (readList.size > 2) { + if (readList.size >= 2) { def read1 = [] def read2 = [] readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v } @@ -48,4 +48,33 @@ process CAT_FASTQ { """ } } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size > 1) { + """ + touch ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size > 2) { + """ + touch ${prefix}_1.merged.fastq.gz + touch ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } + } diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf index 327d51005..cebb6e058 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf @@ -1,11 +1,11 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_low' + label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? "bioconda::multiqc=1.11" : null) + conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py index 787bdb7b1..7c2abfa49 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -1,10 +1,9 @@ #!/usr/bin/env python +import yaml import platform from textwrap import dedent -import yaml - def _make_versions_html(versions): html = [ diff --git a/modules/nf-core/modules/gunzip/main.nf b/modules/nf-core/modules/gunzip/main.nf index 61bf1afa5..fa6ba26a5 100644 --- a/modules/nf-core/modules/gunzip/main.nf +++ b/modules/nf-core/modules/gunzip/main.nf @@ -1,6 +1,6 @@ process GUNZIP { tag "$archive" - label 'process_low' + label 'process_single' conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -31,4 +31,14 @@ process GUNZIP { gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') END_VERSIONS """ + + stub: + gunzip = archive.toString() - '.gz' + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/hisat2/align/main.nf b/modules/nf-core/modules/hisat2/align/main.nf index 0a45ce724..8b6eb3313 100644 --- a/modules/nf-core/modules/hisat2/align/main.nf +++ b/modules/nf-core/modules/hisat2/align/main.nf @@ -1,9 +1,8 @@ -def VERSION = '2.2.0' // Version information not provided by tool on CLI - process HISAT2_ALIGN { tag "$meta.id" label 'process_high' + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. conda (params.enable_conda ? "bioconda::hisat2=2.2.0 bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:0e773bb207600fcb4d38202226eb20a33c7909b6-0' : @@ -26,6 +25,7 @@ process HISAT2_ALIGN { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '2.2.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. def strandedness = '' if (meta.strandedness == 'forward') { diff --git a/modules/nf-core/modules/hisat2/build/main.nf b/modules/nf-core/modules/hisat2/build/main.nf index 50f65e3a1..1b36c6b8e 100644 --- a/modules/nf-core/modules/hisat2/build/main.nf +++ b/modules/nf-core/modules/hisat2/build/main.nf @@ -1,10 +1,9 @@ -def VERSION = '2.2.0' // Version information not provided by tool on CLI - process HISAT2_BUILD { tag "$fasta" label 'process_high' label 'process_high_memory' + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. conda (params.enable_conda ? 'bioconda::hisat2=2.2.1' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hisat2:2.2.1--h1b792b2_3' : @@ -45,6 +44,7 @@ process HISAT2_BUILD { log.info "[HISAT2 index build] Less than ${hisat2_build_memory} GB available, so NOT using splice sites and exons to build HISAT2 index." log.info "[HISAT2 index build] Use --hisat2_build_memory [small number] to skip this check." } + def VERSION = '2.2.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ mkdir hisat2 $extract_exons diff --git a/modules/nf-core/modules/hisat2/extractsplicesites/main.nf b/modules/nf-core/modules/hisat2/extractsplicesites/main.nf index 941359779..d644bcc5f 100644 --- a/modules/nf-core/modules/hisat2/extractsplicesites/main.nf +++ b/modules/nf-core/modules/hisat2/extractsplicesites/main.nf @@ -1,9 +1,8 @@ -def VERSION = '2.2.0' // Version information not provided by tool on CLI - process HISAT2_EXTRACTSPLICESITES { tag "$gtf" label 'process_medium' + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. conda (params.enable_conda ? 'bioconda::hisat2=2.2.1' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hisat2:2.2.1--h1b792b2_3' : @@ -21,6 +20,7 @@ process HISAT2_EXTRACTSPLICESITES { script: def args = task.ext.args ?: '' + def VERSION = '2.2.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ hisat2_extract_splice_sites.py $gtf > ${gtf.baseName}.splice_sites.txt cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/modules/picard/markduplicates/main.nf b/modules/nf-core/modules/picard/markduplicates/main.nf index e754a587e..4e559fea0 100644 --- a/modules/nf-core/modules/picard/markduplicates/main.nf +++ b/modules/nf-core/modules/picard/markduplicates/main.nf @@ -2,10 +2,10 @@ process PICARD_MARKDUPLICATES { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.26.10" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.4" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.26.10--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.26.10--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.4--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.4--hdfd78af_0' }" input: tuple val(meta), path(bam) @@ -33,9 +33,9 @@ process PICARD_MARKDUPLICATES { -Xmx${avail_mem}g \\ MarkDuplicates \\ $args \\ - I=$bam \\ - O=${prefix}.bam \\ - M=${prefix}.MarkDuplicates.metrics.txt + --INPUT $bam \\ + --OUTPUT ${prefix}.bam \\ + --METRICS_FILE ${prefix}.MarkDuplicates.metrics.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/preseq/lcextrap/main.nf b/modules/nf-core/modules/preseq/lcextrap/main.nf index 97261557e..a98a922c0 100644 --- a/modules/nf-core/modules/preseq/lcextrap/main.nf +++ b/modules/nf-core/modules/preseq/lcextrap/main.nf @@ -1,6 +1,6 @@ process PRESEQ_LCEXTRAP { tag "$meta.id" - label 'process_medium' + label 'process_single' label 'error_ignore' conda (params.enable_conda ? "bioconda::preseq=3.1.2" : null) diff --git a/modules/nf-core/modules/preseq/lcextrap/meta.yml b/modules/nf-core/modules/preseq/lcextrap/meta.yml old mode 100644 new mode 100755 diff --git a/modules/nf-core/modules/qualimap/rnaseq/main.nf b/modules/nf-core/modules/qualimap/rnaseq/main.nf index 3b2f88ade..38293d27f 100644 --- a/modules/nf-core/modules/qualimap/rnaseq/main.nf +++ b/modules/nf-core/modules/qualimap/rnaseq/main.nf @@ -49,4 +49,15 @@ process QUALIMAP_RNASEQ { qualimap: \$(echo \$(qualimap 2>&1) | sed 's/^.*QualiMap v.//; s/Built.*\$//') END_VERSIONS """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qualimap: \$(echo \$(qualimap 2>&1) | sed 's/^.*QualiMap v.//; s/Built.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/samtools/flagstat/main.nf b/modules/nf-core/modules/samtools/flagstat/main.nf index b87b21081..c3152acae 100644 --- a/modules/nf-core/modules/samtools/flagstat/main.nf +++ b/modules/nf-core/modules/samtools/flagstat/main.nf @@ -1,6 +1,6 @@ process SAMTOOLS_FLAGSTAT { tag "$meta.id" - label 'process_low' + label 'process_single' conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -19,12 +19,13 @@ process SAMTOOLS_FLAGSTAT { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ samtools \\ flagstat \\ - --threads ${task.cpus-1} \\ + --threads ${task.cpus} \\ $bam \\ - > ${bam}.flagstat + > ${prefix}.flagstat cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/samtools/idxstats/main.nf b/modules/nf-core/modules/samtools/idxstats/main.nf index a49ff35f9..87618e5f3 100644 --- a/modules/nf-core/modules/samtools/idxstats/main.nf +++ b/modules/nf-core/modules/samtools/idxstats/main.nf @@ -19,11 +19,14 @@ process SAMTOOLS_IDXSTATS { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ samtools \\ idxstats \\ + --threads ${task.cpus-1} \\ $bam \\ - > ${bam}.idxstats + > ${prefix}.idxstats cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/samtools/sort/main.nf b/modules/nf-core/modules/samtools/sort/main.nf index b4fc1cbe9..ab7f1ccaf 100644 --- a/modules/nf-core/modules/samtools/sort/main.nf +++ b/modules/nf-core/modules/samtools/sort/main.nf @@ -12,6 +12,7 @@ process SAMTOOLS_SORT { output: tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.csi"), emit: csi, optional: true path "versions.yml" , emit: versions when: diff --git a/modules/nf-core/modules/samtools/sort/meta.yml b/modules/nf-core/modules/samtools/sort/meta.yml index a820c55a3..092897512 100644 --- a/modules/nf-core/modules/samtools/sort/meta.yml +++ b/modules/nf-core/modules/samtools/sort/meta.yml @@ -39,6 +39,10 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - csi: + type: file + description: BAM index file (optional) + pattern: "*.csi" authors: - "@drpatelh" - "@ewels" diff --git a/modules/nf-core/modules/samtools/stats/main.nf b/modules/nf-core/modules/samtools/stats/main.nf index bbdc3240d..9b0c38678 100644 --- a/modules/nf-core/modules/samtools/stats/main.nf +++ b/modules/nf-core/modules/samtools/stats/main.nf @@ -1,6 +1,6 @@ process SAMTOOLS_STATS { tag "$meta.id" - label 'process_low' + label 'process_single' conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -20,14 +20,15 @@ process SAMTOOLS_STATS { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--reference ${fasta}" : "" """ samtools \\ stats \\ - --threads ${task.cpus-1} \\ + --threads ${task.cpus} \\ ${reference} \\ ${input} \\ - > ${input}.stats + > ${prefix}.stats cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -38,7 +39,7 @@ process SAMTOOLS_STATS { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${input}.stats + touch ${prefix}.stats cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/star/align/main.nf b/modules/nf-core/modules/star/align/main.nf index e5aa36f56..47c58eb0b 100644 --- a/modules/nf-core/modules/star/align/main.nf +++ b/modules/nf-core/modules/star/align/main.nf @@ -70,4 +70,28 @@ process STAR_ALIGN { gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}Xd.out.bam + touch ${prefix}.Log.final.out + touch ${prefix}.Log.out + touch ${prefix}.Log.progress.out + touch ${prefix}.sortedByCoord.out.bam + touch ${prefix}.toTranscriptome.out.bam + touch ${prefix}.Aligned.unsort.out.bam + touch ${prefix}.unmapped_1.fastq.gz + touch ${prefix}.unmapped_2.fastq.gz + touch ${prefix}.tab + touch ${prefix}.Chimeric.out.junction + touch ${prefix}.out.sam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/star/genomegenerate/main.nf b/modules/nf-core/modules/star/genomegenerate/main.nf index 3c298016f..6ec634a17 100644 --- a/modules/nf-core/modules/star/genomegenerate/main.nf +++ b/modules/nf-core/modules/star/genomegenerate/main.nf @@ -65,4 +65,32 @@ process STAR_GENOMEGENERATE { END_VERSIONS """ } + + stub: + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/exonGeTrInfo.tab + touch star/exonInfo.tab + touch star/geneInfo.tab + touch star/genomeParameters.txt + touch star/sjdbInfo.txt + touch star/sjdbList.fromGTF.out.tab + touch star/sjdbList.out.tab + touch star/transcriptInfo.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/stringtie/stringtie/main.nf b/modules/nf-core/modules/stringtie/stringtie/main.nf index c70c9819a..f53176512 100644 --- a/modules/nf-core/modules/stringtie/stringtie/main.nf +++ b/modules/nf-core/modules/stringtie/stringtie/main.nf @@ -48,4 +48,18 @@ process STRINGTIE_STRINGTIE { stringtie: \$(stringtie --version 2>&1) END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.transcripts.gtf + touch ${prefix}.gene.abundance.txt + touch ${prefix}.coverage.gtf + touch ${prefix}.ballgown + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + stringtie: \$(stringtie --version 2>&1) + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/trimgalore/main.nf b/modules/nf-core/modules/trimgalore/main.nf index 3a3fca904..a69e3de62 100644 --- a/modules/nf-core/modules/trimgalore/main.nf +++ b/modules/nf-core/modules/trimgalore/main.nf @@ -32,7 +32,7 @@ process TRIMGALORE { cores = (task.cpus as int) - 4 if (meta.single_end) cores = (task.cpus as int) - 3 if (cores < 1) cores = 1 - if (cores > 4) cores = 4 + if (cores > 8) cores = 8 } // Clipping presets have to be evaluated in the context of SE/PE diff --git a/modules/nf-core/modules/ucsc/bedclip/main.nf b/modules/nf-core/modules/ucsc/bedclip/main.nf old mode 100644 new mode 100755 index 969a8f73e..42dd08bb1 --- a/modules/nf-core/modules/ucsc/bedclip/main.nf +++ b/modules/nf-core/modules/ucsc/bedclip/main.nf @@ -1,9 +1,8 @@ -def VERSION = '377' // Version information not provided by tool on CLI - process UCSC_BEDCLIP { tag "$meta.id" label 'process_medium' + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. conda (params.enable_conda ? "bioconda::ucsc-bedclip=377" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ucsc-bedclip:377--h0b8a92a_2' : @@ -23,6 +22,7 @@ process UCSC_BEDCLIP { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '377' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ bedClip \\ $bedgraph \\ diff --git a/modules/nf-core/modules/ucsc/bedclip/meta.yml b/modules/nf-core/modules/ucsc/bedclip/meta.yml old mode 100644 new mode 100755 diff --git a/modules/nf-core/modules/ucsc/bedgraphtobigwig/main.nf b/modules/nf-core/modules/ucsc/bedgraphtobigwig/main.nf index ef0ca088f..a2979e044 100644 --- a/modules/nf-core/modules/ucsc/bedgraphtobigwig/main.nf +++ b/modules/nf-core/modules/ucsc/bedgraphtobigwig/main.nf @@ -1,9 +1,8 @@ -def VERSION = '377' // Version information not provided by tool on CLI - process UCSC_BEDGRAPHTOBIGWIG { tag "$meta.id" - label 'process_medium' + label 'process_single' + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. conda (params.enable_conda ? "bioconda::ucsc-bedgraphtobigwig=377" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ucsc-bedgraphtobigwig:377--h446ed27_1' : @@ -23,6 +22,7 @@ process UCSC_BEDGRAPHTOBIGWIG { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '377' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ bedGraphToBigWig \\ $bedgraph \\ diff --git a/modules/nf-core/modules/ucsc/bedgraphtobigwig/meta.yml b/modules/nf-core/modules/ucsc/bedgraphtobigwig/meta.yml old mode 100644 new mode 100755 diff --git a/modules/nf-core/modules/umitools/dedup/main.nf b/modules/nf-core/modules/umitools/dedup/main.nf index 07e6061db..48559d818 100644 --- a/modules/nf-core/modules/umitools/dedup/main.nf +++ b/modules/nf-core/modules/umitools/dedup/main.nf @@ -26,8 +26,10 @@ process UMITOOLS_DEDUP { def prefix = task.ext.prefix ?: "${meta.id}" def paired = meta.single_end ? "" : "--paired" def stats = get_output_stats ? "--output-stats $prefix" : "" + + if (!(args ==~ /.*--random-seed.*/)) {args += " --random-seed=100"} """ - umi_tools \\ + PYTHONHASHSEED=0 umi_tools \\ dedup \\ -I $bam \\ -S ${prefix}.bam \\ diff --git a/modules/nf-core/modules/untar/main.nf b/modules/nf-core/modules/untar/main.nf index 058d17644..71eea7b2c 100644 --- a/modules/nf-core/modules/untar/main.nf +++ b/modules/nf-core/modules/untar/main.nf @@ -1,6 +1,6 @@ process UNTAR { tag "$archive" - label 'process_low' + label 'process_single' conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -21,12 +21,29 @@ process UNTAR { def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' untar = archive.toString() - '.tar.gz' + """ - tar \\ - -xzvf \\ - $args \\ - $archive \\ - $args2 \\ + mkdir output + + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in output + if [[ \$(tar -tzf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C output --strip-components 1 \\ + -xzvf \\ + $args \\ + $archive \\ + $args2 + else + tar \\ + -C output \\ + -xzvf \\ + $args \\ + $archive \\ + $args2 + fi + + mv output ${untar} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/untar/meta.yml b/modules/nf-core/modules/untar/meta.yml index d426919bd..ea7a3f382 100644 --- a/modules/nf-core/modules/untar/meta.yml +++ b/modules/nf-core/modules/untar/meta.yml @@ -26,9 +26,9 @@ output: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - untar: - type: file - description: - pattern: "*.*" + type: directory + description: Directory containing contents of archive + pattern: "*/" - versions: type: file description: File containing software versions @@ -36,3 +36,5 @@ output: authors: - "@joseespinosa" - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/nextflow.config b/nextflow.config index 0d436fa7a..1ccbb4daa 100644 --- a/nextflow.config +++ b/nextflow.config @@ -84,7 +84,7 @@ params { skip_biotype_qc = false skip_deseq2_qc = false skip_multiqc = false - deseq2_vst = false + deseq2_vst = true rseqc_modules = 'bam_stat,inner_distance,infer_experiment,junction_annotation,junction_saturation,read_distribution,read_duplication' // Boilerplate options diff --git a/nextflow_schema.json b/nextflow_schema.json index 69d34ec1e..cdc5304b7 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -411,7 +411,7 @@ }, "min_mapped_reads": { "type": "number", - "default": 5.0, + "default": 5, "fa_icon": "fas fa-percentage", "description": "Minimum percentage of uniquely mapped reads below which samples are removed from further processing.", "help_text": "Some downstream steps in the pipeline will fail if this threshold is too low." @@ -466,7 +466,8 @@ "type": "boolean", "description": "Use vst transformation instead of rlog with DESeq2.", "help_text": "See [DESeq2 docs](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#data-transformations-and-visualization).", - "fa_icon": "fas fa-dolly" + "fa_icon": "fas fa-dolly", + "default": true }, "skip_bigwig": { "type": "boolean", diff --git a/subworkflows/nf-core/fastqc_umitools_trimgalore.nf b/subworkflows/nf-core/fastqc_umitools_trimgalore.nf index 2f07c2ff6..a05dc7258 100644 --- a/subworkflows/nf-core/fastqc_umitools_trimgalore.nf +++ b/subworkflows/nf-core/fastqc_umitools_trimgalore.nf @@ -6,6 +6,23 @@ include { FASTQC } from '../../modules/nf-core/modules/fastqc/main' include { UMITOOLS_EXTRACT } from '../../modules/nf-core/modules/umitools/extract/main' include { TRIMGALORE } from '../../modules/nf-core/modules/trimgalore/main' +// +// Function that parses TrimGalore log output file to get total number of reads after trimming +// +def getTrimGaloreReadsAfterFiltering(log_file) { + def total_reads = 0 + def filtered_reads = 0 + log_file.eachLine { line -> + def total_reads_matcher = line =~ /([\d\.]+)\ssequences processed in total/ + def se_filtered_reads_matcher = line =~ /shorter than the length cutoff of\s[\d\.]+\sbp:\s([\d\.]+)/ + def pe_filtered_reads_matcher = line =~ /shorter than the length cutoff\s\([\d\.]+\sbp\):\s([\d\.]+)/ + if (total_reads_matcher) total_reads = total_reads_matcher[0][1].toFloat() + if (se_filtered_reads_matcher) filtered_reads = se_filtered_reads_matcher[0][1].toFloat() + if (pe_filtered_reads_matcher) filtered_reads = pe_filtered_reads_matcher[0][1].toFloat() + } + return total_reads - filtered_reads +} + workflow FASTQC_UMITOOLS_TRIMGALORE { take: reads // channel: [ val(meta), [ reads ] ] @@ -62,6 +79,22 @@ workflow FASTQC_UMITOOLS_TRIMGALORE { trim_zip = TRIMGALORE.out.zip trim_log = TRIMGALORE.out.log ch_versions = ch_versions.mix(TRIMGALORE.out.versions.first()) + + // + // Filter empty FastQ files after adapter trimming + // + trim_reads + .join(trim_log) + .map { + meta, reads, trim_log -> + if (!meta.single_end) { + trim_log = trim_log[-1] + } + if (getTrimGaloreReadsAfterFiltering(trim_log) > 0) { + [ meta, reads ] + } + } + .set { trim_reads } } emit: