diff --git a/.github/workflows/local_modules.yml b/.github/workflows/local_modules.yml index ba5a322283..838351536d 100644 --- a/.github/workflows/local_modules.yml +++ b/.github/workflows/local_modules.yml @@ -23,7 +23,7 @@ jobs: strategy: fail-fast: false matrix: - nxf_version: ['20.11.0-edge'] + nxf_version: ['21.04.0'] tags: ['${{ fromJson(needs.changes.outputs.modules) }}'] profile: ['docker', 'singularity'] ## 'conda' env: diff --git a/.gitignore b/.gitignore index 7620ce70a0..09ece0f765 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,6 @@ work/ data/ results/ .DS_Store -tests/ testing/ testing* *.pyc diff --git a/.nf-core-lint.yml b/.nf-core-lint.yml index 7e6a595c7e..f0a130b3ba 100644 --- a/.nf-core-lint.yml +++ b/.nf-core-lint.yml @@ -2,5 +2,7 @@ files_unchanged: - .github/ISSUE_TEMPLATE/bug_report.md - .github/ISSUE_TEMPLATE/feature_request.md - .github/PULL_REQUEST_TEMPLATE.md + - .gitignore - assets/nf-core-sarek_logo.png - docs/images/nf-core-sarek_logo.png + - lib/NfcoreSchema.groovy diff --git a/assets/dummy_file.txt b/assets/dummy_file.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bin/concatenateVCFs.sh b/bin/concatenateVCFs.sh index 2c4d376a44..d1ea48cc8d 100755 --- a/bin/concatenateVCFs.sh +++ b/bin/concatenateVCFs.sh @@ -8,27 +8,27 @@ usage() { echo "Usage: $0 [-i genome_index_file] [-o output.file.no.gz.extension while [[ $# -gt 0 ]] do - key=$1 - case $key in + key=$1 + case $key in -i) genomeIndex=$2 shift # past argument - shift # past value + shift # past value ;; -c) cpus=$2 shift # past argument - shift # past value + shift # past value ;; -o) outputFile=$2 shift # past argument - shift # past value + shift # past value ;; -t) targetBED=$2 shift # past argument - shift # past value + shift # past value ;; -n) noInt=1 @@ -46,7 +46,7 @@ if [ -z ${cpus} ]; then echo "No CPUs defined: setting to 1"; cpus=1; fi if [ -z ${outputFile} ]; then echo "Missing output file name"; usage; fi -if [ -z ${noInt+x} ] +if [ -z ${noInt+x} ] then # First make a header from one of the VCF # Remove interval information from the GATK command-line, but leave the rest @@ -62,36 +62,36 @@ then # Concatenate VCFs in the correct order ( - cat header + cat header - for chr in "${CONTIGS[@]}"; do - # Skip if globbing would not match any file to avoid errors such as - # "ls: cannot access chr3_*.vcf: No such file or directory" when chr3 - # was not processed. - pattern="${chr}_*.vcf" - if ! compgen -G "${pattern}" > /dev/null; then continue; fi + for chr in "${CONTIGS[@]}"; do + # Skip if globbing would not match any file to avoid errors such as + # "ls: cannot access chr3_*.vcf: No such file or directory" when chr3 + # was not processed. + pattern="${chr}_*.vcf" + if ! compgen -G "${pattern}" > /dev/null; then continue; fi - # ls -v sorts by numeric value ("version"), which means that chr1_100_ - # is sorted *after* chr1_99_. - for vcf in $(ls -v ${pattern}); do - # Determine length of header. - # The 'q' command makes sed exit when it sees the first non-header - # line, which avoids reading in the entire file. - L=$(sed -n '/^[^#]/q;p' ${vcf} | wc -l) - - # Then print all non-header lines. Since tail is very fast (nearly as - # fast as cat), this is way more efficient than using a single sed, - # awk or grep command. - tail -n +$((L+1)) ${vcf} - done - done + # ls -v sorts by numeric value ("version"), which means that chr1_100_ + # is sorted *after* chr1_99_. + for vcf in $(ls -v ${pattern}); do + # Determine length of header. + # The 'q' command makes sed exit when it sees the first non-header + # line, which avoids reading in the entire file. + L=$(sed -n '/^[^#]/q;p' ${vcf} | wc -l) + + # Then print all non-header lines. Since tail is very fast (nearly as + # fast as cat), this is way more efficient than using a single sed, + # awk or grep command. + tail -n +$((L+1)) ${vcf} + done + done ) | bgzip -@${cpus} > rawcalls.vcf.gz tabix rawcalls.vcf.gz else - VCF=$(ls no_intervals*.vcf) - cp $VCF rawcalls.vcf - bgzip -@${cpus} rawcalls.vcf - tabix rawcalls.vcf.gz + VCF=$(ls no_intervals*.vcf) + cp $VCF rawcalls.vcf + bgzip -@${cpus} rawcalls.vcf + tabix rawcalls.vcf.gz fi set +u diff --git a/conf/genomes.config b/conf/genomes.config index edbbfd868c..9e74db8dde 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -79,5 +79,15 @@ params { 'custom' { fasta = null } + 'small_hg38' { + dbsnp = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" + fasta = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.fasta" + fasta_fai = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.fasta.fai" + germline_resource = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz" + known_indels = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz" + snpeff_db = 'GRCh38.86' + species = 'homo_sapiens' + vep_cache_version = '99' + } } } diff --git a/conf/modules.config b/conf/modules.config index 800c7e1976..1e43a0b83b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -8,202 +8,212 @@ params { modules { // BUILD_INDICES 'build_intervals' { - publish_dir = "reference" + publish_dir = 'reference' publish_files = false } 'bwa_index' { - publish_dir = "reference" + publish_dir = 'reference' publish_files = false } 'bwamem2_index' { - publish_dir = "reference" + publish_dir = 'reference' publish_files = false } 'create_intervals_bed' { - publish_dir = "reference" + publish_dir = 'reference' publish_files = false } 'dict' { - publish_dir = "reference" + publish_dir = 'reference' publish_files = false } 'index_target_bed' { - publish_dir = "reference" + publish_dir = 'reference' publish_files = false } - 'msisensor_scan' { - publish_dir = "reference" + 'msisensorpro_scan' { + publish_dir = 'reference' publish_files = false } 'samtools_faidx' { - publish_dir = "reference" + publish_dir = 'reference' publish_files = false } 'tabix_dbsnp' { - publish_dir = "reference" + publish_dir = 'reference' publish_files = false } 'tabix_germline_resource' { - publish_dir = "reference" + publish_dir = 'reference' publish_files = false } 'tabix_known_indels' { - publish_dir = "reference" + publish_dir = 'reference' publish_files = false } 'tabix_pon' { - publish_dir = "reference" + publish_dir = 'reference' publish_files = false } // MAPPING 'bwa_mem1_mem' { - args = "-K 100000000 -M" - args2 = "sort" + args = '-K 100000000 -M' + args2 = 'sort' publish_files = false } 'bwa_mem1_mem_tumor' { - args = "-K 100000000 -M -B 3" - args2 = "sort" + args = '-K 100000000 -M -B 3' + args2 = 'sort' publish_files = false } 'bwa_mem2_mem' { - args = "-K 100000000 -M" - args2 = "sort" + args = '-K 100000000 -M' + args2 = 'sort' publish_files = false } 'bwa_mem2_mem_tumor' { - args = "-K 100000000 -M -B 3" - args2 = "sort" + args = '-K 100000000 -M -B 3' + args2 = 'sort' publish_files = false } 'merge_bam_mapping' { - publish_by_id = "true" + publish_by_meta = true publish_files = ['bam':'mapped'] - publish_dir = "preprocessing" + publish_dir = 'preprocessing' } 'qualimap_bamqc_mapping' { - publish_by_id = "true" - publish_dir = "reports/qualimap" + args = '--paint-chromosome-limits --genome-gc-distr HUMAN -skip-duplicated --skip-dup-mode 0 -outformat HTML' + publish_by_meta = true + publish_dir = 'reports/qualimap' } 'samtools_index_mapping' { - publish_by_id = "true" + publish_by_meta = true publish_files = ['bai':'mapped'] - publish_dir = "preprocessing" + publish_dir = 'preprocessing' } 'samtools_stats_mapping' { - publish_by_id = "true" - publish_dir = "reports/samtools_stats" + publish_by_meta = true + publish_dir = 'reports/samtools_stats' } // MARKDUPLICATES 'markduplicates' { - args = "ASSUME_SORTED=true REMOVE_DUPLICATES=false VALIDATION_STRINGENCY=LENIENT TMP_DIR=tmp" - suffix = ".md" - publish_by_id = "true" - publish_dir = "preprocessing" - publish_files = ['md.bam': 'markduplicates', 'md.bam.bai': 'markduplicates'] + args = 'REMOVE_DUPLICATES=false VALIDATION_STRINGENCY=LENIENT' + suffix = '.md' + publish_by_meta = true + publish_dir = 'preprocessing' + publish_files = ['bam': 'markduplicates', 'bai': 'markduplicates'] + } + 'markduplicatesspark' { + args = '--remove-sequencing-duplicates false -VS LENIENT' + suffix = '.md' + publish_by_meta = true + publish_dir = 'preprocessing' + publish_files = ['bam': 'markduplicates', 'bai': 'markduplicates'] } // PREPARE_RECALIBRATION 'baserecalibrator' { - publish_by_id = "true" - publish_dir = "preprocessing" - publish_files = ['recal.table':'markduplicates'] + publish_by_meta = true + publish_dir = 'preprocessing' + publish_files = ['recal.table': 'recal_table'] } 'gatherbqsrreports' { - publish_by_id = "true" - publish_dir = "preprocessing" - publish_files = ['recal.table':'markduplicates'] + publish_by_meta = true + publish_dir = 'preprocessing' + publish_files = ['recal.table': 'recal_table'] } // RECALIBRATE 'applybqsr' { - suffix = "recal" - publish_by_id = "true" - publish_dir = "preprocessing" + suffix = '.recal' + publish_files = false } 'merge_bam_recalibrate' { - suffix = ".recal" - publish_by_id = "true" + suffix = '.recal' + publish_by_meta = true publish_files = ['bam':'recalibrated'] - publish_dir = "preprocessing" + publish_dir = 'preprocessing' } 'qualimap_bamqc_recalibrate' { - publish_by_id = "true" - publish_dir = "reports/qualimap" + args = '--paint-chromosome-limits --genome-gc-distr HUMAN -skip-duplicated --skip-dup-mode 0 -outformat HTML' + publish_by_meta = true + publish_dir = 'reports/qualimap' } 'samtools_index_recalibrate' { - suffix = "recal" - publish_by_id = "true" - publish_dir = "preprocessing" + suffix = 'recal' + publish_by_meta = true + publish_dir = 'preprocessing' publish_files = ['recal.bam':'recalibrated', 'recal.bam.bai':'recalibrated'] } 'samtools_stats_recalibrate' { - publish_by_id = "true" - publish_dir = "reports/samtools_stats" + publish_by_meta = true + publish_dir = 'reports/samtools_stats' } // GERMLINE_VARIANT_CALLING 'haplotypecaller' { - publish_by_id = "true" - publish_dir = "variant_calling" + args = '-ERC GVCF' + suffix = '.g' + publish_by_meta = true + publish_dir = 'variant_calling' publish_files = false } 'genotypegvcf' { - publish_by_id = "true" - publish_dir = "variant_calling" + publish_by_meta = true + publish_dir = 'variant_calling' publish_files = false } 'concat_haplotypecaller' { - suffix = "haplotypecaller" - publish_by_id = "true" - publish_dir = "variant_calling" + suffix = 'haplotypecaller' + publish_by_meta = true + publish_dir = 'variant_calling' publish_files = ['vcf.gz':'haplotypecaller', 'vcf.gz.tbi':'haplotypecaller'] } 'concat_gvcf' { - suffix = "haplotypecaller_gvcf" - publish_by_id = "true" - publish_dir = "variant_calling" + suffix = 'haplotypecaller_gvcf' + publish_by_meta = true + publish_dir = 'variant_calling' publish_files = ['vcf.gz':'haplotypecaller_gvcf', 'vcf.gz.tbi':'haplotypecaller_gvcf'] } 'strelka_germline' { - publish_by_id = "true" - publish_dir = "variant_calling" + publish_by_meta = true + publish_dir = 'variant_calling' publish_files = ['vcf.gz':'strelka', 'vcf.gz.tbi':'strelka'] } // TUMOR_VARIANT_CALLING // PAIR_VARIANT_CALLING 'manta_somatic' { - publish_by_id = "true" - publish_dir = "variant_calling" + publish_by_meta = true + publish_dir = 'variant_calling' publish_files = ['vcf.gz':'manta', 'vcf.gz.tbi':'manta'] } - 'msisensor_msi' { - publish_by_id = "true" - publish_dir = "variant_calling" - publish_files = ['list':'msisensor'] + 'msisensorpro_msi' { + publish_by_meta = true + publish_dir = 'variant_calling' + publish_files = ['list':'msisensorpro'] } 'strelka_somatic' { - publish_by_id = "true" - publish_dir = "variant_calling" + publish_by_meta = true + publish_dir = 'variant_calling' publish_files = ['vcf.gz':'strelka', 'vcf.gz.tbi':'strelka'] } 'strelka_somatic_bp' { - publish_by_id = "true" - publish_dir = "variant_calling" + publish_by_meta = true + publish_dir = 'variant_calling' publish_files = ['vcf.gz':'strelka', 'vcf.gz.tbi':'strelka'] } // QC_TRIM 'fastqc' { - args = "--quiet" - publish_by_id = "true" - publish_dir = "reports/fastqc" + args = '--quiet' + publish_by_meta = true + publish_dir = 'reports/fastqc' } 'trimgalore' { - args = "--fastqc" - publish_by_id = "true" - publish_dir = "trimgalore" + args = '--fastqc' + publish_by_meta = true + publish_dir = 'trimgalore' } // OTHERS 'get_software_versions' { - publish_dir = "pipeline_info" + publish_dir = 'pipeline_info' } } } diff --git a/conf/test.config b/conf/test.config index 7ad28d55ec..d4f8912045 100644 --- a/conf/test.config +++ b/conf/test.config @@ -4,7 +4,7 @@ * ------------------------------------------------- * Defines bundled input files and everything required * to run a fast and simple test. Use as follows: - * nextflow run nf-core/sarek -profile test, + * nextflow run nf-core/sarek -profile test,, */ params { @@ -17,7 +17,7 @@ params { max_time = 48.h // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/tsv/tiny-manta-normal-https.tsv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/tiny-manta-normal-https.csv' // Small reference genome igenomes_ignore = true @@ -31,6 +31,61 @@ params { schema_ignore_params = 'genomes,input' } +profiles { + annotation { + params.input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/vcf/Strelka_1234N_variants.vcf.gz' + params.genome = 'WBcel235' + params.igenomes_ignore = false + } + pair { + params.input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/tiny-manta-https.csv' + } + save_bam_mapped { + params.save_bam_mapped = true + } + split_fastq { + params.split_fastq = 500 + } + targeted { + params.target_bed = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/target.bed' + params.tools = 'manta,strelka' + } + tool { + params.step = 'variant_calling' + params.input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/tiny-recal-normal-https.csv' + } + tool_pair { + params.step = 'variant_calling' + params.input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/tiny-recal-pair-https.csv' + } + trimming { + params.trim_fastq = true + params.clip_r1 = 1 + params.clip_r2 = 1 + params.three_prime_clip_r1 = 1 + params.three_prime_clip_r2 = 1 + } + use_gatk_spark { + params.use_gatk_spark = true + } + umi_quiaseq { + params.genome = 'smallGRCh38' + params.genomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/reference/chr20_hg38' + params.input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/tiny-umi-qiaseq-https.csv' + params.umi = true + params.read_structure1 = "12M11S+T" + params.read_structure2 = "12M11S+T" + } + umi_tso { + genome = 'smallGRCh38' + genomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/reference/chr20_hg38' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/tiny-umi-tso-https.csv' + umi = true + read_structure1 = "7M1S+T" + read_structure2 = "7M1S+T" + } +} + /* * TODO: uncomment when ready process { diff --git a/conf/test_annotation.config b/conf/test_annotation.config deleted file mode 100644 index bbcf38cb88..0000000000 --- a/conf/test_annotation.config +++ /dev/null @@ -1,16 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/sarek -profile test_annotation - */ - -includeConfig 'test.config' - -params { - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/vcf/Strelka_1234N_variants.vcf.gz' - genome = 'WBcel235' - igenomes_ignore = false -} \ No newline at end of file diff --git a/conf/test_full.config b/conf/test_full.config index 74b118ef36..2ef0836093 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -14,11 +14,7 @@ params { // Input data for full size test // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) // TODO nf-core: Give any required params for the test so that command line flags are not needed - single_end = false - input_paths = [ - ['Testdata', ['https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R1.tiny.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R2.tiny.fastq.gz']], - ['SRR389222', ['https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub2.fastq.gz']] - ] + // Ignore `--input` as otherwise the parameter validation will throw an error - schema_ignore_params = 'genomes,input_paths,input' + schema_ignore_params = 'genomes,input' } diff --git a/conf/test_germline_variantcalling.config b/conf/test_germline_variantcalling.config deleted file mode 100644 index 7cec01feca..0000000000 --- a/conf/test_germline_variantcalling.config +++ /dev/null @@ -1,15 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/sarek -profile test_tool - */ - -includeConfig 'test.config' - -params { - // Input data - tools = 'haplotypecaller,strelka' -} \ No newline at end of file diff --git a/conf/test_pair.config b/conf/test_pair.config deleted file mode 100644 index 493b152c2b..0000000000 --- a/conf/test_pair.config +++ /dev/null @@ -1,14 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/sarek -profile test_pair, - */ - -includeConfig 'test.config' - -params { - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/tsv/tiny-manta-https.tsv' -} \ No newline at end of file diff --git a/conf/test_split_fastq.config b/conf/test_split_fastq.config deleted file mode 100644 index d2e6595caf..0000000000 --- a/conf/test_split_fastq.config +++ /dev/null @@ -1,14 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/sarek -profile test_split_fastq - */ - -includeConfig 'test.config' - -params { - split_fastq = 500 -} diff --git a/conf/test_targeted.config b/conf/test_targeted.config deleted file mode 100644 index 9d3b38307e..0000000000 --- a/conf/test_targeted.config +++ /dev/null @@ -1,15 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/sarek -profile test_targeted - */ - -includeConfig 'test.config' - -params { - target_bed = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/target.bed' - tools = 'manta,strelka' -} diff --git a/conf/test_tool.config b/conf/test_tool.config deleted file mode 100644 index 3d9a934533..0000000000 --- a/conf/test_tool.config +++ /dev/null @@ -1,16 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/sarek -profile test_tool - */ - -includeConfig 'test.config' - -params { - // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/tsv/tiny-recal-pair-https.tsv' - step = 'variantcalling' -} \ No newline at end of file diff --git a/conf/test_trimming.config b/conf/test_trimming.config deleted file mode 100644 index f9a4af2da2..0000000000 --- a/conf/test_trimming.config +++ /dev/null @@ -1,18 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/sarek -profile test_trimming - */ - -includeConfig 'test.config' - -params { - trim_fastq = true - clip_r1 = 1 - clip_r2 = 1 - three_prime_clip_r1 = 1 - three_prime_clip_r2 = 1 -} \ No newline at end of file diff --git a/conf/test_umi_qiaseq.config b/conf/test_umi_qiaseq.config deleted file mode 100644 index fd4d047a36..0000000000 --- a/conf/test_umi_qiaseq.config +++ /dev/null @@ -1,19 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/sarek -profile test_umi_qiaseq - */ - -includeConfig 'test.config' - -params { - genome = 'smallGRCh38' - genomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/reference/chr20_hg38' - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/tsv/tiny-umi-qiaseq-https.tsv' - umi = true - read_structure1 = "12M11S+T" - read_structure2 = "12M11S+T" -} \ No newline at end of file diff --git a/conf/test_umi_tso.config b/conf/test_umi_tso.config deleted file mode 100644 index d87e5e47cc..0000000000 --- a/conf/test_umi_tso.config +++ /dev/null @@ -1,19 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/sarek -profile test_umi_tso - */ - -includeConfig 'test.config' - -params { - genome = 'smallGRCh38' - genomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/reference/chr20_hg38' - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/tsv/tiny-umi-tso-https.tsv' - umi = true - read_structure1 = "7M1S+T" - read_structure2 = "7M1S+T" -} \ No newline at end of file diff --git a/conf/test_use_gatk_spark.config b/conf/test_use_gatk_spark.config deleted file mode 100644 index 7668782c33..0000000000 --- a/conf/test_use_gatk_spark.config +++ /dev/null @@ -1,14 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/sarek -profile test_use_gatk_spark - */ - -includeConfig 'test.config' - -params { - use_gatk_spark = true -} \ No newline at end of file diff --git a/lib/Checks.groovy b/lib/Checks.groovy deleted file mode 100644 index 63c9cf5cfc..0000000000 --- a/lib/Checks.groovy +++ /dev/null @@ -1,102 +0,0 @@ -import org.yaml.snakeyaml.Yaml - -/* - * This file holds several functions used to perform standard checks for the nf-core pipeline template. - */ - -class Checks { - - static void check_conda_channels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - def required_channels = ['conda-forge', 'bioconda', 'defaults'] - def conda_check_failed = !required_channels.every { ch -> ch in channels } - - // Check that they are in the right order - conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) - conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) - - if (conda_check_failed) { - log.warn "=============================================================================\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + - " NB: The order of the channels matters!\n" + - "===================================================================================" - } - } - - static void aws_batch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - // related: https://github.com/nextflow-io/nextflow/issues/813 - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - // Prevent trace files to be stored on S3 since S3 does not support rolling files. - assert !params.tracedir.startsWith('s3:') : "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles." - } - } - - static void hostname(workflow, params, log) { - Map colors = Headers.log_colours(params.monochrome_logs) - if (params.hostnames) { - def hostname = "hostname".execute().text.trim() - params.hostnames.each { prof, hnames -> - hnames.each { hname -> - if (hostname.contains(hname) && !workflow.profile.contains(prof)) { - log.info "=${colors.yellow}====================================================${colors.reset}=\n" + - "${colors.yellow}WARN: You are running with `-profile $workflow.profile`\n" + - " but your machine hostname is ${colors.white}'$hostname'${colors.reset}.\n" + - " ${colors.yellow_bold}Please use `-profile $prof${colors.reset}`\n" + - "=${colors.yellow}====================================================${colors.reset}=" - } - } - } - } - } - - // Citation string - private static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - "* The pipeline\n" + - " https://doi.org/10.12688/f1000research.16665.2\n" + - " https://doi.org/10.5281/zenodo.4468605\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } - - // Exit pipeline if incorrect --genome key provided - static void genome_exists(params, log) { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - log.error "=============================================================================\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "=============================================================================" - System.exit(0) - } - } - - // Get attribute from genome config file e.g. fasta - static String get_genome_attribute(params, attribute) { - def val = '' - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - val = params.genomes[ params.genome ][ attribute ] - } - } - return val - } - -} diff --git a/lib/Completion.groovy b/lib/Completion.groovy deleted file mode 100644 index 5a933eb9a5..0000000000 --- a/lib/Completion.groovy +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Functions to be run on completion of pipeline - */ - -class Completion { - static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = workflow.manifest.version - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success && !params.skip_multiqc) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = Headers.log_colours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - } - - static void summary(workflow, params, log) { - Map colors = Headers.log_colours(params.monochrome_logs) - - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - Checks.hostname(workflow, params, log) - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } -} diff --git a/lib/Headers.groovy b/lib/Headers.groovy deleted file mode 100644 index 9b066b42fe..0000000000 --- a/lib/Headers.groovy +++ /dev/null @@ -1,50 +0,0 @@ -/* - * This file holds several functions used to render the nf-core ANSI header. - */ - -class Headers { - - private static Map log_colours(Boolean monochrome_logs) { - Map colorcodes = [:] - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['yellow_bold'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - colorcodes['red'] = monochrome_logs ? '' : "\033[1;91m" - return colorcodes - } - - static String dashed_line(monochrome_logs) { - Map colors = log_colours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - static String nf_core(workflow, monochrome_logs) { - Map colors = log_colours(monochrome_logs) - String.format( - """\n - ${dashed_line(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.white}____${colors.reset} - ${colors.white}.´ _ `.${colors.reset} - ${colors.white}/ ${colors.green}|\\${colors.reset}`-_ \\${colors.reset} ${colors.blue} __ __ ___ ${colors.reset} - ${colors.white}| ${colors.green}| \\${colors.reset} `-|${colors.reset} ${colors.blue}|__` /\\ |__) |__ |__/${colors.reset} - ${colors.white}\\ ${colors.green}| \\${colors.reset} /${colors.reset} ${colors.blue}.__| /¯¯\\ | \\ |___ | \\${colors.reset} - ${colors.white}`${colors.green}|${colors.reset}____${colors.green}\\${colors.reset}´${colors.reset} - - ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} - ${dashed_line(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy index 52ee730432..16986434f7 100644 --- a/lib/NfcoreSchema.groovy +++ b/lib/NfcoreSchema.groovy @@ -1,6 +1,6 @@ -/* - * This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. - */ +// +// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. +// import org.everit.json.schema.Schema import org.everit.json.schema.loader.SchemaLoader @@ -13,16 +13,23 @@ import groovy.json.JsonBuilder class NfcoreSchema { - /* - * Function to loop over all parameters defined in schema and check - * whether the given paremeters adhere to the specificiations - */ + // + // Resolve Schema path relative to main workflow directory + // + public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { + return "${workflow.projectDir}/${schema_filename}" + } + + // + // Function to loop over all parameters defined in schema and check + // whether the given parameters adhere to the specifications + // /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - private static void validateParameters(params, jsonSchema, log) { + public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { def has_error = false //=====================================================================// // Check for nextflow core params and unexpected params - def json = new File(jsonSchema).text + def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') def nf_params = [ // Options for base `nextflow` command @@ -125,36 +132,36 @@ class NfcoreSchema { //=====================================================================// // Validate parameters against the schema - InputStream inputStream = new File(jsonSchema).newInputStream() - JSONObject rawSchema = new JSONObject(new JSONTokener(inputStream)) + InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() + JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) // Remove anything that's in params.schema_ignore_params - rawSchema = removeIgnoredParams(rawSchema, params) + raw_schema = removeIgnoredParams(raw_schema, params) - Schema schema = SchemaLoader.load(rawSchema) + Schema schema = SchemaLoader.load(raw_schema) // Clean the parameters def cleanedParams = cleanParameters(params) // Convert to JSONObject def jsonParams = new JsonBuilder(cleanedParams) - JSONObject paramsJSON = new JSONObject(jsonParams.toString()) + JSONObject params_json = new JSONObject(jsonParams.toString()) // Validate try { - schema.validate(paramsJSON) + schema.validate(params_json) } catch (ValidationException e) { println '' log.error 'ERROR: Validation of pipeline parameters failed!' JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, paramsJSON, log) + printExceptions(exceptionJSON, params_json, log) println '' has_error = true } // Check for unexpected parameters if (unexpectedParams.size() > 0) { - Map colors = log_colours(params.monochrome_logs) + Map colors = NfcoreTemplate.logColours(params.monochrome_logs) println '' def warn_msg = 'Found unexpected parameters:' for (unexpectedParam in unexpectedParams) { @@ -170,266 +177,17 @@ class NfcoreSchema { } } - // Loop over nested exceptions and print the causingException - private static void printExceptions(exJSON, paramsJSON, log) { - def causingExceptions = exJSON['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = exJSON['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (exJSON['pointerToViolation'] == '#') { - log.error "* ${exJSON['message']}" - } - // Error with specific param - else { - def param = exJSON['pointerToViolation'] - ~/^#\// - def param_val = paramsJSON[param].toString() - log.error "* --${param}: ${exJSON['message']} (${param_val})" - } - } - for (ex in causingExceptions) { - printExceptions(ex, paramsJSON, log) - } - } - - // Remove an element from a JSONArray - private static JSONArray removeElement(jsonArray, element){ - def list = [] - int len = jsonArray.length() - for (int i=0;i - if(rawSchema.keySet().contains('definitions')){ - rawSchema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(rawSchema.keySet().contains('properties') && rawSchema.get('properties').keySet().contains(ignore_param)) { - rawSchema.get("properties").remove(ignore_param) - } - if(rawSchema.keySet().contains('required') && rawSchema.required.contains(ignore_param)) { - def cleaned_required = removeElement(rawSchema.required, ignore_param) - rawSchema.put("required", cleaned_required) - } - } - return rawSchema - } - - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - /* - * This method tries to read a JSON params file - */ - private static LinkedHashMap params_load(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = params_read(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - private static Map log_colours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - static String dashed_line(monochrome_logs) { - Map colors = log_colours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - /* - Method to actually read in JSON file using Groovy. - Group (as Key), values are all parameters - - Parameter1 as Key, Description as Value - - Parameter2 as Key, Description as Value - .... - Group - - - */ - private static LinkedHashMap params_read(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - /* - * Get maximum number of characters across all parameter names - */ - private static Integer params_max_chars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } - - /* - * Beautify parameters for --help - */ - private static String params_help(workflow, params, json_schema, command) { - Map colors = log_colours(params.monochrome_logs) + // + // Beautify parameters for --help + // + public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { + Map colors = NfcoreTemplate.logColours(params.monochrome_logs) Integer num_hidden = 0 String output = '' output += 'Typical pipeline command:\n\n' output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = params_load(json_schema) - Integer max_chars = params_max_chars(params_map) + 1 + Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) + Integer max_chars = paramsMaxChars(params_map) + 1 Integer desc_indent = max_chars + 14 Integer dec_linewidth = 160 - desc_indent for (group in params_map.keySet()) { @@ -469,18 +227,17 @@ class NfcoreSchema { output += group_output } } - output += dashed_line(params.monochrome_logs) if (num_hidden > 0){ - output += colors.dim + "\n Hiding $num_hidden params, use --show_hidden_params to show.\n" + colors.reset - output += dashed_line(params.monochrome_logs) + output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset } + output += NfcoreTemplate.dashedLine(params.monochrome_logs) return output } - /* - * Groovy Map summarising parameters/workflow options used by the pipeline - */ - private static LinkedHashMap params_summary_map(workflow, params, json_schema) { + // + // Groovy Map summarising parameters/workflow options used by the pipeline + // + public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { // Get a selection of core Nextflow workflow options def Map workflow_summary = [:] if (workflow.revision) { @@ -503,7 +260,7 @@ class NfcoreSchema { // Get pipeline parameters defined in JSON Schema def Map params_summary = [:] def blacklist = ['hostnames'] - def params_map = params_load(json_schema) + def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) for (group in params_map.keySet()) { def sub_params = new LinkedHashMap() def group_params = params_map.get(group) // This gets the parameters of that particular group @@ -546,14 +303,14 @@ class NfcoreSchema { return [ 'Core Nextflow options' : workflow_summary ] << params_summary } - /* - * Beautify parameters for summary and return as string - */ - private static String params_summary_log(workflow, params, json_schema) { - Map colors = log_colours(params.monochrome_logs) + // + // Beautify parameters for summary and return as string + // + public static String paramsSummaryLog(workflow, params) { + Map colors = NfcoreTemplate.logColours(params.monochrome_logs) String output = '' - def params_map = params_summary_map(workflow, params, json_schema) - def max_chars = params_max_chars(params_map) + def params_map = paramsSummaryMap(workflow, params) + def max_chars = paramsMaxChars(params_map) for (group in params_map.keySet()) { def group_params = params_map.get(group) // This gets the parameters of that particular group if (group_params) { @@ -564,10 +321,196 @@ class NfcoreSchema { output += '\n' } } - output += dashed_line(params.monochrome_logs) - output += colors.dim + "\n Only displaying parameters that differ from defaults.\n" + colors.reset - output += dashed_line(params.monochrome_logs) + output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" + output += NfcoreTemplate.dashedLine(params.monochrome_logs) return output } + // + // Loop over nested exceptions and print the causingException + // + private static void printExceptions(ex_json, params_json, log) { + def causingExceptions = ex_json['causingExceptions'] + if (causingExceptions.length() == 0) { + def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ + // Missing required param + if (m.matches()) { + log.error "* Missing required parameter: --${m[0][1]}" + } + // Other base-level error + else if (ex_json['pointerToViolation'] == '#') { + log.error "* ${ex_json['message']}" + } + // Error with specific param + else { + def param = ex_json['pointerToViolation'] - ~/^#\// + def param_val = params_json[param].toString() + log.error "* --${param}: ${ex_json['message']} (${param_val})" + } + } + for (ex in causingExceptions) { + printExceptions(ex, params_json, log) + } + } + + // + // Remove an element from a JSONArray + // + private static JSONArray removeElement(json_array, element) { + def list = [] + int len = json_array.length() + for (int i=0;i + if(raw_schema.keySet().contains('definitions')){ + raw_schema.definitions.each { definition -> + for (key in definition.keySet()){ + if (definition[key].get("properties").keySet().contains(ignore_param)){ + // Remove the param to ignore + definition[key].get("properties").remove(ignore_param) + // If the param was required, change this + if (definition[key].has("required")) { + def cleaned_required = removeElement(definition[key].required, ignore_param) + definition[key].put("required", cleaned_required) + } + } + } + } + } + if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { + raw_schema.get("properties").remove(ignore_param) + } + if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { + def cleaned_required = removeElement(raw_schema.required, ignore_param) + raw_schema.put("required", cleaned_required) + } + } + return raw_schema + } + + // + // Clean and check parameters relative to Nextflow native classes + // + private static Map cleanParameters(params) { + def new_params = params.getClass().newInstance(params) + for (p in params) { + // remove anything evaluating to false + if (!p['value']) { + new_params.remove(p.key) + } + // Cast MemoryUnit to String + if (p['value'].getClass() == nextflow.util.MemoryUnit) { + new_params.replace(p.key, p['value'].toString()) + } + // Cast Duration to String + if (p['value'].getClass() == nextflow.util.Duration) { + new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) + } + // Cast LinkedHashMap to String + if (p['value'].getClass() == LinkedHashMap) { + new_params.replace(p.key, p['value'].toString()) + } + } + return new_params + } + + // + // This function tries to read a JSON params file + // + private static LinkedHashMap paramsLoad(String json_schema) { + def params_map = new LinkedHashMap() + try { + params_map = paramsRead(json_schema) + } catch (Exception e) { + println "Could not read parameters settings from JSON. $e" + params_map = new LinkedHashMap() + } + return params_map + } + + // + // Method to actually read in JSON file using Groovy. + // Group (as Key), values are all parameters + // - Parameter1 as Key, Description as Value + // - Parameter2 as Key, Description as Value + // .... + // Group + // - + private static LinkedHashMap paramsRead(String json_schema) throws Exception { + def json = new File(json_schema).text + def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') + def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') + /* Tree looks like this in nf-core schema + * definitions <- this is what the first get('definitions') gets us + group 1 + title + description + properties + parameter 1 + type + description + parameter 2 + type + description + group 2 + title + description + properties + parameter 1 + type + description + * properties <- parameters can also be ungrouped, outside of definitions + parameter 1 + type + description + */ + + // Grouped params + def params_map = new LinkedHashMap() + schema_definitions.each { key, val -> + def Map group = schema_definitions."$key".properties // Gets the property object of the group + def title = schema_definitions."$key".title + def sub_params = new LinkedHashMap() + group.each { innerkey, value -> + sub_params.put(innerkey, value) + } + params_map.put(title, sub_params) + } + + // Ungrouped params + def ungrouped_params = new LinkedHashMap() + schema_properties.each { innerkey, value -> + ungrouped_params.put(innerkey, value) + } + params_map.put("Other parameters", ungrouped_params) + + return params_map + } + + // + // Get maximum number of characters across all parameter names + // + private static Integer paramsMaxChars(params_map) { + Integer max_chars = 0 + for (group in params_map.keySet()) { + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (param.size() > max_chars) { + max_chars = param.size() + } + } + } + return max_chars + } } diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy new file mode 100644 index 0000000000..e1e05e8520 --- /dev/null +++ b/lib/NfcoreTemplate.groovy @@ -0,0 +1,274 @@ +// +// This file holds several functions used within the nf-core pipeline template. +// + +import org.yaml.snakeyaml.Yaml + +class NfcoreTemplate { + + // + // Check AWS Batch related parameters have been specified correctly + // + public static void awsBatch(workflow, params) { + if (workflow.profile.contains('awsbatch')) { + // Check params.awsqueue and params.awsregion have been set if running on AWSBatch + assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" + // Check outdir paths to be S3 buckets if running on AWSBatch + assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" + } + } + + // + // Check params.hostnames + // + public static void hostName(workflow, params, log) { + Map colors = logColours(params.monochrome_logs) + if (params.hostnames) { + def hostname = "hostname".execute().text.trim() + params.hostnames.each { prof, hnames -> + hnames.each { hname -> + if (hostname.contains(hname) && !workflow.profile.contains(prof)) { + log.info "=${colors.yellow}====================================================${colors.reset}=\n" + + "${colors.yellow}WARN: You are running with `-profile $workflow.profile`\n" + + " but your machine hostname is ${colors.white}'$hostname'${colors.reset}.\n" + + " ${colors.yellow_bold}Please use `-profile $prof${colors.reset}`\n" + + "=${colors.yellow}====================================================${colors.reset}=" + } + } + } + } + } + + // + // Construct and send completion email + // + public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = workflow.manifest.version + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = null + try { + if (workflow.success && !params.skip_multiqc) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + + // Check if we are only sending emails on failure + def email_address = params.email + if (!params.email && params.email_on_fail && !workflow.success) { + email_address = params.email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("$projectDir/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("$projectDir/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] + def sf = new File("$projectDir/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(params.monochrome_logs) + if (email_address) { + try { + if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + mail_cmd += [ '-A', mqc_report ] + } + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_d = new File("${params.outdir}/pipeline_info/") + if (!output_d.exists()) { + output_d.mkdirs() + } + def output_hf = new File(output_d, "pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + def output_tf = new File(output_d, "pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + } + + // + // Print pipeline summary on completion + // + public static void summary(workflow, params, log) { + Map colors = logColours(params.monochrome_logs) + + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + hostName(workflow, params, log) + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } + } + + // + // ANSII Colours used for terminal logging + // + public static Map logColours(Boolean monochrome_logs) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes + } + + // + // Does what is says on the tin + // + public static String dashedLine(monochrome_logs) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" + } + + // + // nf-core logo + // + public static String logo(workflow, monochrome_logs) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.white}____${colors.reset} + ${colors.white}.´ _ `.${colors.reset} + ${colors.white}/ ${colors.green}|\\${colors.reset}`-_ \\${colors.reset} ${colors.blue} __ __ ___ ${colors.reset} + ${colors.white}| ${colors.green}| \\${colors.reset} `-|${colors.reset} ${colors.blue}|__` /\\ |__) |__ |__/${colors.reset} + ${colors.white}\\ ${colors.green}| \\${colors.reset} /${colors.reset} ${colors.blue}.__| /¯¯\\ | \\ |___ | \\${colors.reset} + ${colors.white}`${colors.green}|${colors.reset}____${colors.green}\\${colors.reset}´${colors.reset} + + ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) + } +} diff --git a/lib/Schema.groovy b/lib/Schema.groovy deleted file mode 100644 index c2cad355a5..0000000000 --- a/lib/Schema.groovy +++ /dev/null @@ -1,228 +0,0 @@ -/* - * This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. - */ - -import groovy.json.JsonSlurper - -class Schema { - /* - * This method tries to read a JSON params file - */ - private static LinkedHashMap params_load(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = params_read(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - /* - Method to actually read in JSON file using Groovy. - Group (as Key), values are all parameters - - Parameter1 as Key, Description as Value - - Parameter2 as Key, Description as Value - .... - Group - - - */ - private static LinkedHashMap params_read(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map json_params = (Map) new JsonSlurper().parseText(json).get('definitions') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - */ - def params_map = new LinkedHashMap() - json_params.each { key, val -> - def Map group = json_params."$key".properties // Gets the property object of the group - def title = json_params."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - return params_map - } - - /* - * Get maximum number of characters across all parameter names - */ - private static Integer params_max_chars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } - - /* - * Beautify parameters for --help - */ - private static String params_help(workflow, params, json_schema, command) { - String output = Headers.nf_core(workflow, params.monochrome_logs) + "\n" - output += "Typical pipeline command:\n\n" - output += " ${command}\n\n" - def params_map = params_load(json_schema) - def max_chars = params_max_chars(params_map) + 1 - for (group in params_map.keySet()) { - output += group + "\n" - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - def type = "[" + group_params.get(param).type + "]" - def description = group_params.get(param).description - output += " \u001B[1m--" + param.padRight(max_chars) + "\u001B[1m" + type.padRight(10) + description + "\n" - } - output += "\n" - } - output += Headers.dashed_line(params.monochrome_logs) - output += "\n\n" + Checks.citation(workflow) - output += "\n\n" + Headers.dashed_line(params.monochrome_logs) - return output - } - - /* - * Groovy Map summarising parameters/workflow options used by the pipeline - */ - private static LinkedHashMap params_summary_map(workflow, params, json_schema) { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = "$workflow.containerEngine" - } - if (workflow.container) { - workflow_summary['container'] = "$workflow.container" - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def blacklist = ['hostnames'] - def params_map = params_load(json_schema) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param) && !blacklist.contains(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value == null) { - if (param_type == 'boolean') { - schema_value = false - } - if (param_type == 'string') { - schema_value = '' - } - if (param_type == 'integer') { - schema_value = 0 - } - } else { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir','') - sub_string = sub_string.replace('\${projectDir}','') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir','') - sub_string = sub_string.replace('\${params.outdir}','') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - if (params_value != schema_value) { - sub_params.put("$param", params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - /* - * Beautify parameters for summary and return as string - */ - private static String params_summary_log(workflow, params, json_schema) { - String output = Headers.nf_core(workflow, params.monochrome_logs) + "\n" - def params_map = params_summary_map(workflow, params, json_schema) - def max_chars = params_max_chars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += group + "\n" - for (param in group_params.keySet()) { - output += " \u001B[1m" + param.padRight(max_chars) + ": \u001B[1m" + group_params.get(param) + "\n" - } - output += "\n" - } - } - output += Headers.dashed_line(params.monochrome_logs) - output += "\n\n" + Checks.citation(workflow) - output += "\n\n" + Headers.dashed_line(params.monochrome_logs) - return output - } - - static String params_summary_multiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" - } - summary_section += "
\n" - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" - return yaml_file_text - } -} diff --git a/lib/Utils.groovy b/lib/Utils.groovy new file mode 100644 index 0000000000..18173e9850 --- /dev/null +++ b/lib/Utils.groovy @@ -0,0 +1,47 @@ +// +// This file holds several Groovy functions that could be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml + +class Utils { + + // + // When running with -profile conda, warn if channels have not been set-up appropriately + // + public static void checkCondaChannels(log) { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + def required_channels = ['conda-forge', 'bioconda', 'defaults'] + def conda_check_failed = !required_channels.every { ch -> ch in channels } + + // Check that they are in the right order + conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) + conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) + + if (conda_check_failed) { + log.warn "=============================================================================\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + + " NB: The order of the channels matters!\n" + + "===================================================================================" + } + } + + // + // Join module args with appropriate spacing + // + public static String joinModuleArgs(args_list) { + return ' ' + args_list.join(' ') + } +} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy new file mode 100644 index 0000000000..0de4abda18 --- /dev/null +++ b/lib/WorkflowMain.groovy @@ -0,0 +1,88 @@ +// +// This file holds several functions specific to the main.nf workflow in the nf-core/sarek pipeline +// + +class WorkflowMain { + + // + // Citation string for pipeline + // + public static String citation(workflow) { + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + " https://doi.org/10.12688/f1000research.16665.2\n" + + " https://doi.org/10.5281/zenodo.4468605\n\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" + } + + // + // Print help to screen if required + // + public static String help(workflow, params, log) { + def command = "nextflow run nf-core/sarek --input sample.tsv --genome GRCh38 -profile docker" + def help_string = '' + help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) + help_string += NfcoreSchema.paramsHelp(workflow, params, command) + help_string += '\n' + citation(workflow) + '\n' + help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) + return help_string + } + + // + // Print parameter summary log to screen + // + public static String paramsSummaryLog(workflow, params, log) { + def summary_log = '' + summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) + summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) + summary_log += '\n' + citation(workflow) + '\n' + summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) + return summary_log + } + + // + // Validate parameters and print summary to screen + // + public static void initialise(workflow, params, log) { + // Print help to screen if required + if (params.help) { + log.info help(workflow, params, log) + System.exit(0) + } + + // Validate workflow parameters via the JSON schema + if (params.validate_params) { + NfcoreSchema.validateParameters(workflow, params, log) + } + + // Print parameter summary log to screen + log.info paramsSummaryLog(workflow, params, log) + + // Check that conda channels are set-up correctly + if (params.enable_conda) { + Utils.checkCondaChannels(log) + } + + // Check AWS batch settings + NfcoreTemplate.awsBatch(workflow, params) + + // Check the hostnames against configured profiles + NfcoreTemplate.hostName(workflow, params, log) + } + + // + // Get attribute from genome config file e.g. fasta + // + public static String getGenomeAttribute(params, attribute) { + def val = '' + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + val = params.genomes[ params.genome ][ attribute ] + } + } + return val + } +} diff --git a/lib/WorkflowSarek.groovy b/lib/WorkflowSarek.groovy new file mode 100644 index 0000000000..37632d5ba8 --- /dev/null +++ b/lib/WorkflowSarek.groovy @@ -0,0 +1,25 @@ +// +// This file holds several functions specific to the main.nf workflow in the nf-core/sarek pipeline +// + +class WorkflowSarek { + + public static void initialise(params, log) { + genomeExistsError(params, log) + + } + + // + // Exit pipeline if incorrect --genome key provided + // + private static void genomeExistsError(params, log) { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + log.error "=============================================================================\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "===================================================================================" + System.exit(1) + } + } +} \ No newline at end of file diff --git a/main.nf b/main.nf index 0c77129ce8..df57dd1b88 100644 --- a/main.nf +++ b/main.nf @@ -1,97 +1,87 @@ #!/usr/bin/env nextflow /* --------------------------------------------------------------------------------- - nf-core/sarek --------------------------------------------------------------------------------- -Started March 2016. -Ported to nf-core May 2019. -Ported to DSL 2 July 2020. --------------------------------------------------------------------------------- -nf-core/sarek: - An open-source analysis pipeline to detect germline or somatic variants - from whole genome or targeted sequencing --------------------------------------------------------------------------------- - @Homepage - https://nf-co.re/sarek --------------------------------------------------------------------------------- - @Documentation - https://nf-co.re/sarek/latest/usage --------------------------------------------------------------------------------- +======================================================================================== + nf-core/sarek +======================================================================================== + Started March 2016. + Ported to nf-core May 2019. + Ported to DSL 2 July 2020. +---------------------------------------------------------------------------------------- + nf-core/sarek: + An open-source analysis pipeline to detect germline or somatic variants + from whole genome or targeted sequencing +---------------------------------------------------------------------------------------- + @Homepage + https://nf-co.re/sarek +---------------------------------------------------------------------------------------- + @Documentation + https://nf-co.re/sarek/usage +---------------------------------------------------------------------------------------- + @Github + https://github.com/nf-core/sarek +---------------------------------------------------------------------------------------- + @Slack + https://nfcore.slack.com/channels/sarek +---------------------------------------------------------------------------------------- */ -nextflow.enable.dsl=2 +nextflow.enable.dsl = 2 -//////////////////////////////////////////////////// -/* -- PRINT HELP -- */ -//////////////////////////////////////////////////// - -def json_schema = "$projectDir/nextflow_schema.json" -if (params.help) { - def command = "nextflow run nf-core/sarek -profile docker --input sample.tsv --genome GRCh38" - log.info Schema.params_help(workflow, params, json_schema, command) - exit 0 -} - -//////////////////////////////////////////////////// -/* -- PARAMETER CHECKS -- */ -//////////////////////////////////////////////////// - -// Check that conda channels are set-up correctly -if (params.enable_conda) { - Checks.check_conda_channels(log) -} +/* +======================================================================================== + GENOME PARAMETER VALUES +======================================================================================== +*/ -// Check AWS batch settings -Checks.aws_batch(workflow, params) +params.ac_loci = WorkflowMain.getGenomeAttribute(params, 'ac_loci') +params.ac_loci_gc = WorkflowMain.getGenomeAttribute(params, 'ac_loci_gc') +params.bwa = WorkflowMain.getGenomeAttribute(params, 'bwa') +params.chr_dir = WorkflowMain.getGenomeAttribute(params, 'chr_dir') +params.chr_length = WorkflowMain.getGenomeAttribute(params, 'chr_length') +params.dbsnp = WorkflowMain.getGenomeAttribute(params, 'dbsnp') +params.dbsnp_index = WorkflowMain.getGenomeAttribute(params, 'dbsnp_index') +params.dict = WorkflowMain.getGenomeAttribute(params, 'dict') +params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.fasta_fai = WorkflowMain.getGenomeAttribute(params, 'fasta_fai') +params.germline_resource = WorkflowMain.getGenomeAttribute(params, 'germline_resource') +params.germline_resource_index = WorkflowMain.getGenomeAttribute(params, 'germline_resource_index') +params.intervals = WorkflowMain.getGenomeAttribute(params, 'intervals') +params.known_indels = WorkflowMain.getGenomeAttribute(params, 'known_indels') +params.known_indels_index = WorkflowMain.getGenomeAttribute(params, 'known_indels_index') +params.mappability = WorkflowMain.getGenomeAttribute(params, 'mappability') +params.snpeff_db = WorkflowMain.getGenomeAttribute(params, 'snpeff_db') +params.species = WorkflowMain.getGenomeAttribute(params, 'species') +params.vep_cache_version = WorkflowMain.getGenomeAttribute(params, 'vep_cache_version') -// Check the hostnames against configured profiles -Checks.hostname(workflow, params, log) -// Check genome key exists if provided -Checks.genome_exists(params, log) +/* +======================================================================================== + VALIDATE & PRINT PARAMETER SUMMARY +======================================================================================== +*/ -//////////////////////////////////////////////////// -/* -- REFERENCES PARAMETER VALUES -- */ -//////////////////////////////////////////////////// -/* -- Initialize each params in params.genomes -- */ -/* -- catch the command line first if defined -- */ -//////////////////////////////////////////////////// +WorkflowMain.initialise(workflow, params, log) -params.ac_loci = Checks.get_genome_attribute(params, 'ac_loci') -params.ac_loci_gc = Checks.get_genome_attribute(params, 'ac_loci_gc') -params.bwa = Checks.get_genome_attribute(params, 'bwa') -params.chr_dir = Checks.get_genome_attribute(params, 'chr_dir') -params.chr_length = Checks.get_genome_attribute(params, 'chr_length') -params.dbsnp = Checks.get_genome_attribute(params, 'dbsnp') -params.dbsnp_index = Checks.get_genome_attribute(params, 'dbsnp_index') -params.dict = Checks.get_genome_attribute(params, 'dict') -params.fasta = Checks.get_genome_attribute(params, 'fasta') -params.fasta_fai = Checks.get_genome_attribute(params, 'fasta_fai') -params.germline_resource = Checks.get_genome_attribute(params, 'germline_resource') -params.germline_resource_index = Checks.get_genome_attribute(params, 'germline_resource_index') -params.intervals = Checks.get_genome_attribute(params, 'intervals') -params.known_indels = Checks.get_genome_attribute(params, 'known_indels') -params.known_indels_index = Checks.get_genome_attribute(params, 'known_indels_index') -params.mappability = Checks.get_genome_attribute(params, 'mappability') -params.snpeff_db = Checks.get_genome_attribute(params, 'snpeff_db') -params.species = Checks.get_genome_attribute(params, 'species') -params.vep_cache_version = Checks.get_genome_attribute(params, 'vep_cache_version') -//////////////////////////////////////////////////// -/* -- PRINT PARAMETER SUMMARY -- */ -//////////////////////////////////////////////////// +/* +======================================================================================== + NAMED WORKFLOW FOR PIPELINE +======================================================================================== +*/ -def summary_params = Schema.params_summary_map(workflow, params, json_schema) -log.info Schema.params_summary_log(workflow, params, json_schema) +workflow NFCORE_SAREK { + include { SAREK } from './workflows/sarek' + SAREK () +} -//////////////////////////////////////////////////// -/* -- RUN THE WORKFLOW -- */ -//////////////////////////////////////////////////// +/* +======================================================================================== + RUN ALL WORKFLOWS +======================================================================================== +*/ +// WORKFLOW: Execute a single named workflow for the pipeline workflow { - - include { SAREK } from './workflows/sarek' addParams( summary_params: summary_params ) - SAREK () - -} + NFCORE_SAREK () +} \ No newline at end of file diff --git a/modules/local/build_intervals/functions.nf b/modules/local/build_intervals/functions.nf new file mode 100644 index 0000000000..da9da093d3 --- /dev/null +++ b/modules/local/build_intervals/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/local/build_intervals.nf b/modules/local/build_intervals/main.nf similarity index 73% rename from modules/local/build_intervals.nf rename to modules/local/build_intervals/main.nf index 10cb148df7..786c04bab4 100644 --- a/modules/local/build_intervals.nf +++ b/modules/local/build_intervals/main.nf @@ -2,13 +2,14 @@ include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] -def options = initOptions(params.options) +options = initOptions(params.options) process BUILD_INTERVALS { tag "$fai" label 'process_medium' - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") } + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } conda (params.enable_conda ? "anaconda::gawk=5.1.0" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { @@ -18,10 +19,10 @@ process BUILD_INTERVALS { } input: - path fai + path fai output: - path "${fai.baseName}.bed" + path "${fai.baseName}.bed" script: """ diff --git a/modules/local/concat_vcf.nf b/modules/local/concat_vcf.nf deleted file mode 100644 index be8f3684ab..0000000000 --- a/modules/local/concat_vcf.nf +++ /dev/null @@ -1,35 +0,0 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -def options = initOptions(params.options) - -process CONCAT_VCF { - tag "$meta.id" - label 'process_medium' - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - - conda (params.enable_conda ? "bioconda::htslib=1.12" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/htslib:1.12--hd3b49d5_0" - } else { - container "quay.io/biocontainers/htslib:1.12--hd3b49d5_0" - } - - input: - tuple val(meta), path(vcf) - path fai - path bed - - output: - tuple val(meta), path("*_*.vcf.gz"), path("*_*.vcf.gz.tbi"), emit: vcf - - script: - name = options.suffix ? "${options.suffix}_${meta.id}" : "${meta.id}" - target_options = params.target_bed ? "-t ${bed}" : "" - interval_options = params.no_intervals ? "-n" : "" - """ - concatenateVCFs.sh -i ${fai} -c ${task.cpus} -o ${name}.vcf ${target_options} ${interval_options} - """ -} \ No newline at end of file diff --git a/modules/local/concat_vcf/functions.nf b/modules/local/concat_vcf/functions.nf new file mode 100644 index 0000000000..da9da093d3 --- /dev/null +++ b/modules/local/concat_vcf/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/local/concat_vcf/main.nf b/modules/local/concat_vcf/main.nf new file mode 100644 index 0000000000..02a336dda3 --- /dev/null +++ b/modules/local/concat_vcf/main.nf @@ -0,0 +1,36 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process CONCAT_VCF { + tag "$meta.id" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::htslib=1.12" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/htslib:1.12--hd3b49d5_0" + } else { + container "quay.io/biocontainers/htslib:1.12--hd3b49d5_0" + } + + input: + tuple val(meta), path(vcf) + path fai + path bed + + output: + tuple val(meta), path("*_*.vcf.gz"), path("*_*.vcf.gz.tbi"), emit: vcf + + script: + def prefix = options.suffix ? "${options.suffix}_${meta.id}" : "${meta.id}" + def target_options = params.target_bed ? "-t ${bed}" : "" + def interval_options = params.no_intervals ? "-n" : "" + """ + concatenateVCFs.sh -i ${fai} -c ${task.cpus} -o ${prefix}.vcf ${target_options} ${interval_options} + """ +} \ No newline at end of file diff --git a/modules/local/create_intervals_bed/functions.nf b/modules/local/create_intervals_bed/functions.nf new file mode 100644 index 0000000000..da9da093d3 --- /dev/null +++ b/modules/local/create_intervals_bed/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/local/create_intervals_bed.nf b/modules/local/create_intervals_bed/main.nf similarity index 79% rename from modules/local/create_intervals_bed.nf rename to modules/local/create_intervals_bed/main.nf index e7554040ff..fb93f04ee4 100644 --- a/modules/local/create_intervals_bed.nf +++ b/modules/local/create_intervals_bed/main.nf @@ -1,14 +1,15 @@ // Import generic module functions -include { initOptions; saveFiles; getSoftwareName; has_extension } from './functions' +include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] -def options = initOptions(params.options) +options = initOptions(params.options) process CREATE_INTERVALS_BED { tag "$intervals" label 'process_medium' - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") } + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } conda (params.enable_conda ? "anaconda::gawk=5.1.0" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { @@ -18,15 +19,15 @@ process CREATE_INTERVALS_BED { } input: - path intervals + path intervals output: - path ('*.bed') + path ('*.bed') script: // If the interval file is BED format, the fifth column is interpreted to // contain runtime estimates, which is then used to combine short-running jobs - if (has_extension(intervals, "bed")) + if (intervals.toString().toLowerCase().endsWith("bed")) """ awk -vFS="\t" '{ t = \$5 # runtime estimate @@ -46,7 +47,7 @@ process CREATE_INTERVALS_BED { print \$0 > name }' ${intervals} """ - else if (has_extension(intervals, "interval_list")) + else if (intervals.toString().toLowerCase().endsWith("interval_list")) """ grep -v '^@' ${intervals} | awk -vFS="\t" '{ name = sprintf("%s_%d-%d", \$1, \$2, \$3); diff --git a/modules/local/functions.nf b/modules/local/functions.nf deleted file mode 100644 index c4a9d6baaa..0000000000 --- a/modules/local/functions.nf +++ /dev/null @@ -1,328 +0,0 @@ -/* - * This file holds several functions used to perform operation in Sarek - */ - -// Check if a row has the expected number of item -def check_number_of_item(row, number) { - if (row.size() != number) exit 1, "Malformed row in TSV file: ${row}, see --help for more information" - return true -} - -// Check parameter existence -def check_parameter_existence(it, list) { - if (!list.contains(it)) { - log.warn "Unknown parameter: ${it}" - return false - } - return true -} - -// Compare each parameter with a list of parameters -def check_parameter_list(list, realList) { - return list.every{ check_parameter_existence(it, realList) } -} - -// Define list of available tools to annotate -def define_anno_list() { - return [ - 'haplotypecaller', - 'manta', - 'mutect2', - 'strelka', - 'tiddit' - ] -} - -// Define list of skipable QC tools -def define_skip_qc_list() { - return [ - 'bamqc', - 'baserecalibrator', - 'bcftools', - 'documentation', - 'fastqc', - 'markduplicates', - 'multiqc', - 'samtools', - 'sentieon', - 'vcftools', - 'versions' - ] -} - -// Define list of available step -def define_step_list() { - return [ - 'annotate', - 'controlfreec', - 'mapping', - 'preparerecalibration', - 'recalibrate', - 'variantcalling' - ] -} - -// Define list of available tools -def define_tool_list() { - return [ - 'ascat', - 'cnvkit', - 'controlfreec', - 'dnascope', - 'dnaseq', - 'freebayes', - 'haplotypecaller', - 'manta', - 'merge', - 'mpileup', - 'mutect2', - 'snpeff', - 'strelka', - 'tiddit', - 'tnscope', - 'vep', - 'msisensor' - ] -} - -// Channeling the TSV file containing BAM. -// Format is: "patient gender status sample bam bai" -def extract_bam(tsvFile) { - Channel.from(tsvFile) - .splitCsv(sep: '\t') - .map { row -> - check_number_of_item(row, 6) - def meta = [:] - - meta.patient = row[0] - meta.gender = row[1] - meta.status = return_status(row[2].toInteger()) - meta.sample = row[3] - meta.id = meta.sample - def bam = return_file(row[4]) - def bai = return_file(row[5]) - - if (!has_extension(bam, "bam")) exit 1, "File: ${bam} has the wrong extension. See --help for more information" - if (!has_extension(bai, "bai")) exit 1, "File: ${bai} has the wrong extension. See --help for more information" - - return [meta, bam, bai] - } -} - -// Create a channel of germline FASTQs from a directory pattern: "my_samples/*/" -// All FASTQ files in subdirectories are collected and emitted; -// they must have _R1_ and _R2_ in their names. -// All FASTQ files are assumed to be from the same sample. -def extract_fastq_from_dir(folder) { - sample = file(folder).getFileName().toString() - - fastq = Channel.fromFilePairs(folder + '/*{_R1_,_R2_}*.fastq.gz') - .ifEmpty { error "No directories found matching folder '${folder}'" } - -// TODO check if flowcellLane_from_fastq is useful or not - - fastq = fastq.map{ run, pair -> - def meta = [:] - meta.patient = sample - meta.sample = meta.patient - meta.gender = 'ZZ' // unused - meta.status = 0 // normal (not tumor) - meta.run = run - meta.id = "${meta.sample}-${meta.run}" - def read1 = pair[0] - def read2 = pair[1] - def CN = params.sequencing_center ? "CN:${params.sequencing_center}\\t" : "" - def read_group = "\"@RG\\tID:${meta.run}\\t${CN}PU:${meta.run}\\tSM:${meta.sample}\\tLB:${meta.sample}\\tPL:ILLUMINA\"" - meta.read_group = read_group - - return [meta, [read1, read2]] - } -} - -// Channeling the TSV file containing FASTQ or BAM -// Format is: "patient gender status sample lane fastq1 fastq2" -// or: "patient gender status sample lane bam" -def extract_fastq(tsvFile) { - Channel.from(tsvFile) - .splitCsv(sep: '\t') - .map { row -> - def meta = [:] - meta.patient = row[0] - meta.gender = row[1] - meta.status = return_status(row[2].toInteger()) - meta.sample = row[3] - meta.run = row[4] - meta.id = "${meta.sample}-${meta.run}" - def read1 = return_file(row[5]) - def read2 = "null" - def CN = params.sequencing_center ? "CN:${params.sequencing_center}\\t" : "" - def read_group = "\"@RG\\tID:${meta.run}\\t${CN}PU:${meta.run}\\tSM:${meta.sample}\\tLB:${meta.sample}\\tPL:ILLUMINA\"" - meta.read_group = read_group - - if (has_extension(read1, "fastq.gz") || has_extension(read1, "fq.gz") || has_extension(read1, "fastq") || has_extension(read1, "fq")) { - check_number_of_item(row, 7) - read2 = return_file(row[6]) - if (!has_extension(read2, "fastq.gz") && !has_extension(read2, "fq.gz") && !has_extension(read2, "fastq") && !has_extension(read2, "fq")) exit 1, "File: ${file2} has the wrong extension. See --help for more information" - if (has_extension(read1, "fastq") || has_extension(read1, "fq") || has_extension(read2, "fastq") || has_extension(read2, "fq")) { - exit 1, "We do recommend to use gziped fastq file to help you reduce your data footprint." - } - } - else if (has_extension(read1, "bam")) check_number_of_item(row, 6) - else exit 1, "No recognisable extention for input file: ${read1}" - - return [meta, [read1, read2]] - } -} - -// // Channeling the TSV file containing mpileup -// // Format is: "patient gender status sample pileup" -// def extract_pileup(tsvFile) { -// Channel.from(tsvFile) -// .splitCsv(sep: '\t') -// .map { row -> -// check_number_of_item(row, 5) -// def idPatient = row[0] -// def gender = row[1] -// def status = return_status(row[2].toInteger()) -// def idSample = row[3] -// def mpileup = return_file(row[4]) - -// if (!has_extension(mpileup, "pileup")) exit 1, "File: ${mpileup} has the wrong extension. See --help for more information" - -// return [idPatient, gender, status, idSample, mpileup] -// } -// } - -// Channeling the TSV file containing Recalibration Tables. -// Format is: "patient gender status sample bam bai recalTable" -def extract_recal(tsvFile) { - Channel.from(tsvFile) - .splitCsv(sep: '\t') - .map { row -> - check_number_of_item(row, 7) - def meta = [:] - - meta.patient = row[0] - meta.gender = row[1] - meta.status = return_status(row[2].toInteger()) - meta.sample = row[3] - meta.id = meta.sample - def bam = return_file(row[4]) - def bai = return_file(row[5]) - def table = return_file(row[6]) - - if (!has_extension(bam, "bam")) exit 1, "File: ${bam} has the wrong extension. See --help for more information" - if (!has_extension(bai, "bai")) exit 1, "File: ${bai} has the wrong extension. See --help for more information" - if (!has_extension(table, "recal.table")) exit 1, "File: ${table} has the wrong extension. See --help for more information" - - return [meta, bam, bai, table] - } -} - -// // Parse first line of a FASTQ file, return the flowcell id and lane number. -// def flowcellLane_from_fastq(path) { -// // expected format: -// // xx:yy:FLOWCELLID:LANE:... (seven fields) -// // or -// // FLOWCELLID:LANE:xx:... (five fields) -// InputStream fileStream = new FileInputStream(path.toFile()) -// InputStream gzipStream = new java.util.zip.GZIPInputStream(fileStream) -// Reader decoder = new InputStreamReader(gzipStream, 'ASCII') -// BufferedReader buffered = new BufferedReader(decoder) -// def line = buffered.readLine() -// assert line.startsWith('@') -// line = line.substring(1) -// def fields = line.split(' ')[0].split(':') -// String fcid -// int lane -// if (fields.size() == 7) { -// // CASAVA 1.8+ format -// fcid = fields[2] -// lane = fields[3].toInteger() -// } else if (fields.size() == 5) { -// fcid = fields[0] -// lane = fields[1].toInteger() -// } -// [fcid, lane] -// } - -// Check file extension -def has_extension(it, extension) { - it.toString().toLowerCase().endsWith(extension.toLowerCase()) -} - -// Return file if it exists -def return_file(it) { - if (!file(it).exists()) exit 1, "Missing file in TSV file: ${it}, see --help for more information" - return file(it) -} - -// Remove .ann .gz and .vcf extension from a VCF file -def reduce_vcf(file) { - return file.fileName.toString().minus(".ann").minus(".vcf").minus(".gz") -} - -// Return status [0,1] -// 0 == Normal, 1 == Tumor -def return_status(it) { - if (!(it in [0, 1])) exit 1, "Status is not recognized in TSV file: ${it}, see --help for more information" - return it -} - -/* - * nf-core core functions - */ - -/* - * Extract name of software tool from process name using $task.process - */ -def getSoftwareName(task_process) { - return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() -} - -/* - * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules - */ -def initOptions(Map args) { - def Map options = [:] - options.args = args.args ?: '' - options.args2 = args.args2 ?: '' - options.publish_by_id = args.publish_by_id ?: false - options.publish_dir = args.publish_dir ?: '' - options.publish_files = args.publish_files - options.suffix = args.suffix ?: '' - return options -} - -/* - * Tidy up and join elements of a list to return a path string - */ -def getPathFromList(path_list) { - def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries - paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes - return paths.join('/') -} - -/* - * Function to save/publish module results - */ -def saveFiles(Map args) { - if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) - def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_id) { - path_list.add(args.publish_id) - } - if (ioptions.publish_files instanceof Map) { - for (ext in ioptions.publish_files) { - if (args.filename.endsWith(ext.key)) { - def ext_list = path_list.collect() - ext_list.add(ext.value) - return "${getPathFromList(ext_list)}/$args.filename" - } - } - } else if (ioptions.publish_files == null) { - return "${getPathFromList(path_list)}/$args.filename" - } - } -} diff --git a/modules/local/index_target_bed/functions.nf b/modules/local/index_target_bed/functions.nf new file mode 100644 index 0000000000..da9da093d3 --- /dev/null +++ b/modules/local/index_target_bed/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/local/index_target_bed.nf b/modules/local/index_target_bed/main.nf similarity index 71% rename from modules/local/index_target_bed.nf rename to modules/local/index_target_bed/main.nf index 7411c09398..412f47298c 100644 --- a/modules/local/index_target_bed.nf +++ b/modules/local/index_target_bed/main.nf @@ -2,13 +2,14 @@ include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] -def options = initOptions(params.options) +options = initOptions(params.options) process INDEX_TARGET_BED { tag "$target_bed" label 'process_medium' - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") } + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } conda (params.enable_conda ? "bioconda::htslib=1.12" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { @@ -18,10 +19,10 @@ process INDEX_TARGET_BED { } input: - path target_bed + path target_bed output: - tuple path("${target_bed}.gz"), path("${target_bed}.gz.tbi") + tuple path("${target_bed}.gz"), path("${target_bed}.gz.tbi") script: """ diff --git a/modules/nf-core/software/bwa/index/functions.nf b/modules/nf-core/software/bwa/index/functions.nf index d25eea86b3..da9da093d3 100644 --- a/modules/nf-core/software/bwa/index/functions.nf +++ b/modules/nf-core/software/bwa/index/functions.nf @@ -1,48 +1,57 @@ -/* - * ----------------------------------------------------- - * Utility functions used in nf-core DSL2 module files - * ----------------------------------------------------- - */ +// +// Utility functions used in nf-core DSL2 module files +// -/* - * Extract name of software tool from process name using $task.process - */ +// +// Extract name of software tool from process name using $task.process +// def getSoftwareName(task_process) { return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() } -/* - * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules - */ +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// def initOptions(Map args) { def Map options = [:] - options.args = args.args ?: '' - options.args2 = args.args2 ?: '' - options.publish_by_id = args.publish_by_id ?: false - options.publish_dir = args.publish_dir ?: '' - options.publish_files = args.publish_files - options.suffix = args.suffix ?: '' + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' return options } -/* - * Tidy up and join elements of a list to return a path string - */ +// +// Tidy up and join elements of a list to return a path string +// def getPathFromList(path_list) { - def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries - paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes return paths.join('/') } -/* - * Function to save/publish module results - */ +// +// Function to save/publish module results +// def saveFiles(Map args) { if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) + def ioptions = initOptions(args.options) def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_id) { - path_list.add(args.publish_id) + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } } if (ioptions.publish_files instanceof Map) { for (ext in ioptions.publish_files) { diff --git a/modules/nf-core/software/bwa/index/main.nf b/modules/nf-core/software/bwa/index/main.nf index 915c6623b4..aabd187c91 100644 --- a/modules/nf-core/software/bwa/index/main.nf +++ b/modules/nf-core/software/bwa/index/main.nf @@ -2,14 +2,14 @@ include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] -def options = initOptions(params.options) +options = initOptions(params.options) process BWA_INDEX { tag "$fasta" label 'process_high' publishDir "${params.outdir}", mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:'') } + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'index', meta:[:], publish_by_meta:[]) } conda (params.enable_conda ? "bioconda::bwa=0.7.17" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { diff --git a/modules/nf-core/software/bwa/index/meta.yml b/modules/nf-core/software/bwa/index/meta.yml index 0c3b8f5ff4..181204c3da 100644 --- a/modules/nf-core/software/bwa/index/meta.yml +++ b/modules/nf-core/software/bwa/index/meta.yml @@ -13,27 +13,6 @@ tools: homepage: http://bio-bwa.sourceforge.net/ documentation: http://www.htslib.org/doc/samtools.html arxiv: arXiv:1303.3997 -params: - - outdir: - type: string - description: | - The pipeline's output directory. By default, the module will - output files into `$params.outdir/` - - publish_dir_mode: - type: string - description: | - Value for the Nextflow `publishDir` mode parameter. - Available: symlink, rellink, link, copy, copyNoFollow, move. - - enable_conda: - type: boolean - description: | - Run the module with Conda using the software specified - via the `conda` directive - - singularity_pull_docker_container: - type: boolean - description: | - Instead of directly downloading Singularity images for use with Singularity, - force the workflow to pull and convert Docker containers instead. input: - fasta: type: file diff --git a/modules/nf-core/software/bwa/mem/functions.nf b/modules/nf-core/software/bwa/mem/functions.nf index d25eea86b3..da9da093d3 100644 --- a/modules/nf-core/software/bwa/mem/functions.nf +++ b/modules/nf-core/software/bwa/mem/functions.nf @@ -1,48 +1,57 @@ -/* - * ----------------------------------------------------- - * Utility functions used in nf-core DSL2 module files - * ----------------------------------------------------- - */ +// +// Utility functions used in nf-core DSL2 module files +// -/* - * Extract name of software tool from process name using $task.process - */ +// +// Extract name of software tool from process name using $task.process +// def getSoftwareName(task_process) { return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() } -/* - * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules - */ +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// def initOptions(Map args) { def Map options = [:] - options.args = args.args ?: '' - options.args2 = args.args2 ?: '' - options.publish_by_id = args.publish_by_id ?: false - options.publish_dir = args.publish_dir ?: '' - options.publish_files = args.publish_files - options.suffix = args.suffix ?: '' + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' return options } -/* - * Tidy up and join elements of a list to return a path string - */ +// +// Tidy up and join elements of a list to return a path string +// def getPathFromList(path_list) { - def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries - paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes return paths.join('/') } -/* - * Function to save/publish module results - */ +// +// Function to save/publish module results +// def saveFiles(Map args) { if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) + def ioptions = initOptions(args.options) def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_id) { - path_list.add(args.publish_id) + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } } if (ioptions.publish_files instanceof Map) { for (ext in ioptions.publish_files) { diff --git a/modules/nf-core/software/bwa/mem/main.nf b/modules/nf-core/software/bwa/mem/main.nf index 5d39412d86..8fac539fdb 100644 --- a/modules/nf-core/software/bwa/mem/main.nf +++ b/modules/nf-core/software/bwa/mem/main.nf @@ -2,14 +2,14 @@ include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] -def options = initOptions(params.options) +options = initOptions(params.options) process BWA_MEM { tag "$meta.id" label 'process_high' publishDir "${params.outdir}", mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } conda (params.enable_conda ? "bioconda::bwa=0.7.17 bioconda::samtools=1.12" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { @@ -27,6 +27,7 @@ process BWA_MEM { path "*.version.txt" , emit: version script: + def split_cpus = Math.floor(task.cpus/2) def software = getSoftwareName(task.process) def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" def read_group = meta.read_group ? "-R ${meta.read_group}" : "" @@ -36,10 +37,10 @@ process BWA_MEM { bwa mem \\ $options.args \\ $read_group \\ - -t $task.cpus \\ + -t ${split_cpus} \\ \$INDEX \\ $reads \\ - | samtools $options.args2 --threads $task.cpus -o ${prefix}.bam - + | samtools $options.args2 --threads ${split_cpus} -o ${prefix}.bam - echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//' > ${software}.version.txt """ diff --git a/modules/nf-core/software/bwa/mem/meta.yml b/modules/nf-core/software/bwa/mem/meta.yml index 2e44951c17..693c5450f2 100644 --- a/modules/nf-core/software/bwa/mem/meta.yml +++ b/modules/nf-core/software/bwa/mem/meta.yml @@ -16,27 +16,6 @@ tools: homepage: http://bio-bwa.sourceforge.net/ documentation: http://www.htslib.org/doc/samtools.html arxiv: arXiv:1303.3997 -params: - - outdir: - type: string - description: | - The pipeline's output directory. By default, the module will - output files into `$params.outdir/` - - publish_dir_mode: - type: string - description: | - Value for the Nextflow `publishDir` mode parameter. - Available: symlink, rellink, link, copy, copyNoFollow, move. - - enable_conda: - type: boolean - description: | - Run the module with Conda using the software specified - via the `conda` directive - - singularity_pull_docker_container: - type: boolean - description: | - Instead of directly downloading Singularity images for use with Singularity, - force the workflow to pull and convert Docker containers instead. input: - meta: type: map diff --git a/modules/nf-core/software/bwamem2/index/functions.nf b/modules/nf-core/software/bwamem2/index/functions.nf index d25eea86b3..da9da093d3 100644 --- a/modules/nf-core/software/bwamem2/index/functions.nf +++ b/modules/nf-core/software/bwamem2/index/functions.nf @@ -1,48 +1,57 @@ -/* - * ----------------------------------------------------- - * Utility functions used in nf-core DSL2 module files - * ----------------------------------------------------- - */ +// +// Utility functions used in nf-core DSL2 module files +// -/* - * Extract name of software tool from process name using $task.process - */ +// +// Extract name of software tool from process name using $task.process +// def getSoftwareName(task_process) { return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() } -/* - * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules - */ +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// def initOptions(Map args) { def Map options = [:] - options.args = args.args ?: '' - options.args2 = args.args2 ?: '' - options.publish_by_id = args.publish_by_id ?: false - options.publish_dir = args.publish_dir ?: '' - options.publish_files = args.publish_files - options.suffix = args.suffix ?: '' + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' return options } -/* - * Tidy up and join elements of a list to return a path string - */ +// +// Tidy up and join elements of a list to return a path string +// def getPathFromList(path_list) { - def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries - paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes return paths.join('/') } -/* - * Function to save/publish module results - */ +// +// Function to save/publish module results +// def saveFiles(Map args) { if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) + def ioptions = initOptions(args.options) def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_id) { - path_list.add(args.publish_id) + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } } if (ioptions.publish_files instanceof Map) { for (ext in ioptions.publish_files) { diff --git a/modules/nf-core/software/bwamem2/index/main.nf b/modules/nf-core/software/bwamem2/index/main.nf index 34e8ba469f..b667f266cf 100644 --- a/modules/nf-core/software/bwamem2/index/main.nf +++ b/modules/nf-core/software/bwamem2/index/main.nf @@ -2,14 +2,14 @@ include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] -def options = initOptions(params.options) +options = initOptions(params.options) process BWAMEM2_INDEX { tag "$fasta" label 'process_high' publishDir "${params.outdir}", mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:'') } + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'index', meta:[:], publish_by_meta:[]) } conda (params.enable_conda ? "bioconda::bwa-mem2=2.2.1" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { diff --git a/modules/nf-core/software/bwamem2/index/meta.yml b/modules/nf-core/software/bwamem2/index/meta.yml index 707809028d..9d717f739e 100644 --- a/modules/nf-core/software/bwamem2/index/meta.yml +++ b/modules/nf-core/software/bwamem2/index/meta.yml @@ -12,27 +12,6 @@ tools: a large reference genome, such as the human genome. homepage: https://github.com/bwa-mem2/bwa-mem2 documentation: https://github.com/bwa-mem2/bwa-mem2#usage -params: - - outdir: - type: string - description: | - The pipeline's output directory. By default, the module will - output files into `$params.outdir/` - - publish_dir_mode: - type: string - description: | - Value for the Nextflow `publishDir` mode parameter. - Available: symlink, rellink, link, copy, copyNoFollow, move. - - enable_conda: - type: boolean - description: | - Run the module with Conda using the software specified - via the `conda` directive - - singularity_pull_docker_container: - type: boolean - description: | - Instead of directly downloading Singularity images for use with Singularity, - force the workflow to pull and convert Docker containers instead. input: - fasta: type: file diff --git a/modules/nf-core/software/bwamem2/mem/functions.nf b/modules/nf-core/software/bwamem2/mem/functions.nf index d25eea86b3..da9da093d3 100644 --- a/modules/nf-core/software/bwamem2/mem/functions.nf +++ b/modules/nf-core/software/bwamem2/mem/functions.nf @@ -1,48 +1,57 @@ -/* - * ----------------------------------------------------- - * Utility functions used in nf-core DSL2 module files - * ----------------------------------------------------- - */ +// +// Utility functions used in nf-core DSL2 module files +// -/* - * Extract name of software tool from process name using $task.process - */ +// +// Extract name of software tool from process name using $task.process +// def getSoftwareName(task_process) { return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() } -/* - * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules - */ +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// def initOptions(Map args) { def Map options = [:] - options.args = args.args ?: '' - options.args2 = args.args2 ?: '' - options.publish_by_id = args.publish_by_id ?: false - options.publish_dir = args.publish_dir ?: '' - options.publish_files = args.publish_files - options.suffix = args.suffix ?: '' + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' return options } -/* - * Tidy up and join elements of a list to return a path string - */ +// +// Tidy up and join elements of a list to return a path string +// def getPathFromList(path_list) { - def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries - paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes return paths.join('/') } -/* - * Function to save/publish module results - */ +// +// Function to save/publish module results +// def saveFiles(Map args) { if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) + def ioptions = initOptions(args.options) def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_id) { - path_list.add(args.publish_id) + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } } if (ioptions.publish_files instanceof Map) { for (ext in ioptions.publish_files) { diff --git a/modules/nf-core/software/bwamem2/mem/main.nf b/modules/nf-core/software/bwamem2/mem/main.nf index 4f2195ac5b..633e62d072 100644 --- a/modules/nf-core/software/bwamem2/mem/main.nf +++ b/modules/nf-core/software/bwamem2/mem/main.nf @@ -2,14 +2,14 @@ include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] -def options = initOptions(params.options) +options = initOptions(params.options) process BWAMEM2_MEM { tag "$meta.id" label 'process_high' publishDir "${params.outdir}", mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } conda (params.enable_conda ? "bioconda::bwa-mem2=2.2.1 bioconda::samtools=1.12" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { @@ -27,6 +27,7 @@ process BWAMEM2_MEM { path "*.version.txt" , emit: version script: + def split_cpus = Math.floor(task.cpus/2) def software = getSoftwareName(task.process) def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" def read_group = meta.read_group ? "-R ${meta.read_group}" : "" @@ -36,10 +37,10 @@ process BWAMEM2_MEM { bwa-mem2 mem \\ $options.args \\ $read_group \\ - -t $task.cpus \\ + -t ${split_cpus} \\ \$INDEX \\ $reads \\ - | samtools $options.args2 -@ $task.cpus -o ${prefix}.bam - + | samtools $options.args2 -@ ${split_cpus} -o ${prefix}.bam - echo \$(bwa-mem2 version 2>&1) > ${software}.version.txt """ diff --git a/modules/nf-core/software/bwamem2/mem/meta.yml b/modules/nf-core/software/bwamem2/mem/meta.yml index 7d59b9b404..2fc7713d0f 100644 --- a/modules/nf-core/software/bwamem2/mem/meta.yml +++ b/modules/nf-core/software/bwamem2/mem/meta.yml @@ -16,27 +16,6 @@ tools: homepage: http://bio-bwa.sourceforge.net/ documentation: http://www.htslib.org/doc/samtools.html arxiv: arXiv:1303.3997 -params: - - outdir: - type: string - description: | - The pipeline's output directory. By default, the module will - output files into `$params.outdir/` - - publish_dir_mode: - type: string - description: | - Value for the Nextflow `publishDir` mode parameter. - Available: symlink, rellink, link, copy, copyNoFollow, move. - - enable_conda: - type: boolean - description: | - Run the module with Conda using the software specified - via the `conda` directive - - singularity_pull_docker_container: - type: boolean - description: | - Instead of directly downloading Singularity images for use with Singularity, - force the workflow to pull and convert Docker containers instead. input: - meta: type: map diff --git a/modules/nf-core/software/fastqc/functions.nf b/modules/nf-core/software/fastqc/functions.nf index d25eea86b3..da9da093d3 100644 --- a/modules/nf-core/software/fastqc/functions.nf +++ b/modules/nf-core/software/fastqc/functions.nf @@ -1,48 +1,57 @@ -/* - * ----------------------------------------------------- - * Utility functions used in nf-core DSL2 module files - * ----------------------------------------------------- - */ +// +// Utility functions used in nf-core DSL2 module files +// -/* - * Extract name of software tool from process name using $task.process - */ +// +// Extract name of software tool from process name using $task.process +// def getSoftwareName(task_process) { return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() } -/* - * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules - */ +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// def initOptions(Map args) { def Map options = [:] - options.args = args.args ?: '' - options.args2 = args.args2 ?: '' - options.publish_by_id = args.publish_by_id ?: false - options.publish_dir = args.publish_dir ?: '' - options.publish_files = args.publish_files - options.suffix = args.suffix ?: '' + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' return options } -/* - * Tidy up and join elements of a list to return a path string - */ +// +// Tidy up and join elements of a list to return a path string +// def getPathFromList(path_list) { - def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries - paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes return paths.join('/') } -/* - * Function to save/publish module results - */ +// +// Function to save/publish module results +// def saveFiles(Map args) { if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) + def ioptions = initOptions(args.options) def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_id) { - path_list.add(args.publish_id) + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } } if (ioptions.publish_files instanceof Map) { for (ext in ioptions.publish_files) { diff --git a/modules/nf-core/software/fastqc/main.nf b/modules/nf-core/software/fastqc/main.nf index cce410a080..7ce76adffb 100644 --- a/modules/nf-core/software/fastqc/main.nf +++ b/modules/nf-core/software/fastqc/main.nf @@ -2,14 +2,14 @@ include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] -def options = initOptions(params.options) +options = initOptions(params.options) process FASTQC { tag "$meta.id" label 'process_medium' publishDir "${params.outdir}", mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { diff --git a/modules/nf-core/software/fastqc/meta.yml b/modules/nf-core/software/fastqc/meta.yml index 413aad8db6..8eb9953dce 100644 --- a/modules/nf-core/software/fastqc/meta.yml +++ b/modules/nf-core/software/fastqc/meta.yml @@ -15,27 +15,6 @@ tools: overrepresented sequences. homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ -params: - - outdir: - type: string - description: | - The pipeline's output directory. By default, the module will - output files into `$params.outdir/` - - publish_dir_mode: - type: string - description: | - Value for the Nextflow `publishDir` mode parameter. - Available: symlink, rellink, link, copy, copyNoFollow, move. - - enable_conda: - type: boolean - description: | - Run the module with Conda using the software specified - via the `conda` directive - - singularity_pull_docker_container: - type: boolean - description: | - Instead of directly downloading Singularity images for use with Singularity, - force the workflow to pull and convert Docker containers instead. input: - meta: type: map diff --git a/modules/nf-core/software/functions.nf b/modules/nf-core/software/functions.nf deleted file mode 100644 index d25eea86b3..0000000000 --- a/modules/nf-core/software/functions.nf +++ /dev/null @@ -1,59 +0,0 @@ -/* - * ----------------------------------------------------- - * Utility functions used in nf-core DSL2 module files - * ----------------------------------------------------- - */ - -/* - * Extract name of software tool from process name using $task.process - */ -def getSoftwareName(task_process) { - return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() -} - -/* - * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules - */ -def initOptions(Map args) { - def Map options = [:] - options.args = args.args ?: '' - options.args2 = args.args2 ?: '' - options.publish_by_id = args.publish_by_id ?: false - options.publish_dir = args.publish_dir ?: '' - options.publish_files = args.publish_files - options.suffix = args.suffix ?: '' - return options -} - -/* - * Tidy up and join elements of a list to return a path string - */ -def getPathFromList(path_list) { - def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries - paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes - return paths.join('/') -} - -/* - * Function to save/publish module results - */ -def saveFiles(Map args) { - if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) - def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_id) { - path_list.add(args.publish_id) - } - if (ioptions.publish_files instanceof Map) { - for (ext in ioptions.publish_files) { - if (args.filename.endsWith(ext.key)) { - def ext_list = path_list.collect() - ext_list.add(ext.value) - return "${getPathFromList(ext_list)}/$args.filename" - } - } - } else if (ioptions.publish_files == null) { - return "${getPathFromList(path_list)}/$args.filename" - } - } -} diff --git a/modules/nf-core/software/gatk4/applybqsr.nf b/modules/nf-core/software/gatk4/applybqsr.nf deleted file mode 100644 index cbe83e2f68..0000000000 --- a/modules/nf-core/software/gatk4/applybqsr.nf +++ /dev/null @@ -1,44 +0,0 @@ -include { initOptions; saveFiles; getSoftwareName } from './../functions' - -params.options = [:] -def options = initOptions(params.options) - -process GATK4_APPLYBQSR { - tag "$meta.id" - label 'process_medium' - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - - conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/gatk4:4.2.0.0--0" - } else { - container "quay.io/biocontainers/gatk4:4.2.0.0--0" - } - - input: - tuple val(meta), path(bam), path(bai), path(recalibrationReport), path(interval) - path dict - path fasta - path fai - - output: - tuple val(meta), path("${prefix}${meta.sample}.recal.bam") , emit: bam - path "*.version.txt" , emit: version - - script: - def software = getSoftwareName(task.process) - prefix = params.no_intervals ? "" : "${interval.baseName}_" - options_intervals = params.no_intervals ? "" : "-L ${interval}" - """ - gatk --java-options -Xmx${task.memory.toGiga()}g \ - ApplyBQSR \ - -R ${fasta} \ - --input ${bam} \ - --output ${prefix}${meta.sample}.recal.bam \ - ${options_intervals} \ - --bqsr-recal-file ${recalibrationReport} - - echo \$(gatk ApplyBQSR --version 2>&1) | sed 's/^.*(GATK) v//; s/ HTSJDK.*\$//' > ${software}.version.txt - """ -} \ No newline at end of file diff --git a/modules/nf-core/software/gatk4/applybqsr/functions.nf b/modules/nf-core/software/gatk4/applybqsr/functions.nf new file mode 100644 index 0000000000..da9da093d3 --- /dev/null +++ b/modules/nf-core/software/gatk4/applybqsr/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/software/gatk4/applybqsr/main.nf b/modules/nf-core/software/gatk4/applybqsr/main.nf new file mode 100644 index 0000000000..ba0c629869 --- /dev/null +++ b/modules/nf-core/software/gatk4/applybqsr/main.nf @@ -0,0 +1,46 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process GATK4_APPLYBQSR { + tag "$meta.id" + label 'process_low' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/gatk4:4.2.0.0--0" + } else { + container "quay.io/biocontainers/gatk4:4.2.0.0--0" + } + + input: + tuple val(meta), path(bam), path(bai), path(bqsr_table), path(intervalsBed) + path fasta + path fastaidx + path dict + + output: + tuple val(meta), path("*.bam"), emit: bam + path "*.version.txt", emit: version + + script: + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def intervalsCommand = intervalsBed ? "-L ${intervalsBed}" : "" + """ + gatk ApplyBQSR \\ + -R $fasta \\ + -I $bam \\ + --bqsr-recal-file $bqsr_table \\ + $intervalsCommand \\ + -O ${prefix}.bam \\ + $options.args + + gatk --version | grep Picard | sed "s/Picard Version: //g" > ${software}.version.txt + """ +} diff --git a/modules/nf-core/software/gatk4/applybqsr/meta.yml b/modules/nf-core/software/gatk4/applybqsr/meta.yml new file mode 100644 index 0000000000..9bf12f09bb --- /dev/null +++ b/modules/nf-core/software/gatk4/applybqsr/meta.yml @@ -0,0 +1,58 @@ +name: gatk4_applybqsr +description: Apply base quality score recalibration (BQSR) to a bam file +keywords: + - bqsr + - bam +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file from alignment + pattern: "*.{bam}" + - bqsr_table: + type: file + description: Recalibration table from gatk4_baserecalibrator + - fasta: + type: file + description: The reference fasta file + - fastaidx: + type: file + description: Index of reference fasta file + - dict: + type: file + description: GATK sequence dictionary + - intervalsBed: + type: file + description: Bed file with the genomic regions included in the library (optional) + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + - bam: + type: file + description: Recalibrated BAM file + pattern: "*.{bam}" + +authors: + - "@yocra3" diff --git a/modules/nf-core/software/gatk4/baserecalibrator.nf b/modules/nf-core/software/gatk4/baserecalibrator.nf deleted file mode 100644 index b6ba407592..0000000000 --- a/modules/nf-core/software/gatk4/baserecalibrator.nf +++ /dev/null @@ -1,55 +0,0 @@ -include { initOptions; saveFiles; getSoftwareName } from './../functions' - -params.options = [:] -def options = initOptions(params.options) - -process GATK4_BASERECALIBRATOR { - tag "$meta.id" - label 'process_medium' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - - conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/gatk4:4.2.0.0--0" - } else { - container "quay.io/biocontainers/gatk4:4.2.0.0--0" - } - - input: - tuple val(meta), path(bam), path(bai), path(interval) - path dbsnp - tuple val(meta_dbsnp), path(dbsnp_tbi) - path dict - path fai - path fasta - path known_indels - tuple val(meta_known_indels), path(known_indels_tbi) - - output: - tuple val(meta), path("${prefix}${meta.sample}.recal.table"), emit: report - path "*.version.txt" , emit: version - - script: - def software = getSoftwareName(task.process) - options_dbsnp = params.dbsnp ? "--known-sites ${dbsnp}" : "" - options_intervals = params.no_intervals ? "" : "-L ${interval}" - options_known_indels = params.known_indels ? known_indels.collect{"--known-sites ${it}"}.join(' ') : "" - prefix = params.no_intervals ? "" : "${interval.baseName}_" - // TODO: --use-original-qualities ??? - """ - gatk --java-options -Xmx${task.memory.toGiga()}g \ - BaseRecalibrator \ - -I ${bam} \ - -O ${prefix}${meta.sample}.recal.table \ - --tmp-dir . \ - -R ${fasta} \ - ${options_dbsnp} \ - ${options_known_indels} \ - ${options_intervals} \ - --verbosity INFO - - echo \$(gatk ApplyBQSR --version 2>&1) | sed 's/^.*(GATK) v//; s/ HTSJDK.*\$//' > ${software}.version.txt - """ -} \ No newline at end of file diff --git a/modules/nf-core/software/gatk4/baserecalibrator/functions.nf b/modules/nf-core/software/gatk4/baserecalibrator/functions.nf new file mode 100644 index 0000000000..da9da093d3 --- /dev/null +++ b/modules/nf-core/software/gatk4/baserecalibrator/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/software/gatk4/baserecalibrator/main.nf b/modules/nf-core/software/gatk4/baserecalibrator/main.nf new file mode 100644 index 0000000000..48f05fa21a --- /dev/null +++ b/modules/nf-core/software/gatk4/baserecalibrator/main.nf @@ -0,0 +1,49 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process GATK4_BASERECALIBRATOR { + tag "$meta.id" + label 'process_low' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/gatk4:4.2.0.0--0" + } else { + container "quay.io/biocontainers/gatk4:4.2.0.0--0" + } + + input: + tuple val(meta), path(bam), path(bai), path(intervalsBed) + path fasta + path fai + path dict + path knownSites + path knownSites_tbi + + output: + tuple val(meta), path("*.table"), emit: table + path "*.version.txt" , emit: version + + script: + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def intervalsCommand = intervalsBed ? "-L ${intervalsBed}" : "" + def sitesCommand = knownSites.collect{"--known-sites ${it}"}.join(' ') + """ + gatk BaseRecalibrator \ + -R $fasta \ + -I $bam \ + $sitesCommand \ + $intervalsCommand \ + $options.args \ + -O ${prefix}.table + + gatk --version | grep Picard | sed "s/Picard Version: //g" > ${software}.version.txt + """ +} diff --git a/modules/nf-core/software/gatk4/baserecalibrator/meta.yml b/modules/nf-core/software/gatk4/baserecalibrator/meta.yml new file mode 100644 index 0000000000..0996dcbe51 --- /dev/null +++ b/modules/nf-core/software/gatk4/baserecalibrator/meta.yml @@ -0,0 +1,58 @@ +name: gatk4_baserecalibrator +description: Generate recalibration table for Base Quality Score Recalibration (BQSR) +keywords: + - sort +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file from alignment + pattern: "*.{bam}" + - fasta: + type: file + description: The reference fasta file + - fastaidx: + type: file + description: Index of reference fasta file + - dict: + type: file + description: GATK sequence dictionary + - intervalsBed: + type: file + description: Bed file with the genomic regions included in the library (optional) + - knownSites: + type: file + description: Bed file with the genomic regions included in the library (optional) + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + - table: + type: file + description: Recalibration table from BaseRecalibrator + pattern: "*.{table}" + +authors: + - "@yocra3" diff --git a/modules/nf-core/software/gatk4/createsequencedictionary/functions.nf b/modules/nf-core/software/gatk4/createsequencedictionary/functions.nf index d25eea86b3..da9da093d3 100644 --- a/modules/nf-core/software/gatk4/createsequencedictionary/functions.nf +++ b/modules/nf-core/software/gatk4/createsequencedictionary/functions.nf @@ -1,48 +1,57 @@ -/* - * ----------------------------------------------------- - * Utility functions used in nf-core DSL2 module files - * ----------------------------------------------------- - */ +// +// Utility functions used in nf-core DSL2 module files +// -/* - * Extract name of software tool from process name using $task.process - */ +// +// Extract name of software tool from process name using $task.process +// def getSoftwareName(task_process) { return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() } -/* - * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules - */ +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// def initOptions(Map args) { def Map options = [:] - options.args = args.args ?: '' - options.args2 = args.args2 ?: '' - options.publish_by_id = args.publish_by_id ?: false - options.publish_dir = args.publish_dir ?: '' - options.publish_files = args.publish_files - options.suffix = args.suffix ?: '' + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' return options } -/* - * Tidy up and join elements of a list to return a path string - */ +// +// Tidy up and join elements of a list to return a path string +// def getPathFromList(path_list) { - def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries - paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes return paths.join('/') } -/* - * Function to save/publish module results - */ +// +// Function to save/publish module results +// def saveFiles(Map args) { if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) + def ioptions = initOptions(args.options) def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_id) { - path_list.add(args.publish_id) + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } } if (ioptions.publish_files instanceof Map) { for (ext in ioptions.publish_files) { diff --git a/modules/nf-core/software/gatk4/createsequencedictionary/main.nf b/modules/nf-core/software/gatk4/createsequencedictionary/main.nf index 5f9a7ce25f..8bc71a8073 100644 --- a/modules/nf-core/software/gatk4/createsequencedictionary/main.nf +++ b/modules/nf-core/software/gatk4/createsequencedictionary/main.nf @@ -2,14 +2,14 @@ include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] -def options = initOptions(params.options) +options = initOptions(params.options) process GATK4_CREATESEQUENCEDICTIONARY { tag "$fasta" label 'process_medium' publishDir "${params.outdir}", mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:'') } + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { diff --git a/modules/nf-core/software/gatk4/createsequencedictionary/meta.yml b/modules/nf-core/software/gatk4/createsequencedictionary/meta.yml index b33af6ad26..d0cc59801f 100644 --- a/modules/nf-core/software/gatk4/createsequencedictionary/meta.yml +++ b/modules/nf-core/software/gatk4/createsequencedictionary/meta.yml @@ -12,27 +12,6 @@ tools: homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 -params: - - outdir: - type: string - description: | - The pipeline's output directory. By default, the module will - output files into `$params.outdir/` - - publish_dir_mode: - type: string - description: | - Value for the Nextflow `publishDir` mode parameter. - Available: symlink, rellink, link, copy, copyNoFollow, move. - - enable_conda: - type: boolean - description: | - Run the module with Conda using the software specified - via the `conda` directive - - singularity_pull_docker_container: - type: boolean - description: | - Instead of directly downloading Singularity images for use with Singularity, - force the workflow to pull and convert Docker containers instead. input: - fasta: type: file diff --git a/modules/nf-core/software/gatk4/gatherbqsrreports/functions.nf b/modules/nf-core/software/gatk4/gatherbqsrreports/functions.nf new file mode 100644 index 0000000000..da9da093d3 --- /dev/null +++ b/modules/nf-core/software/gatk4/gatherbqsrreports/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/software/gatk4/gatherbqsrreports.nf b/modules/nf-core/software/gatk4/gatherbqsrreports/main.nf similarity index 78% rename from modules/nf-core/software/gatk4/gatherbqsrreports.nf rename to modules/nf-core/software/gatk4/gatherbqsrreports/main.nf index bfda3d217b..5f71884bda 100644 --- a/modules/nf-core/software/gatk4/gatherbqsrreports.nf +++ b/modules/nf-core/software/gatk4/gatherbqsrreports/main.nf @@ -1,4 +1,4 @@ -include { initOptions; saveFiles; getSoftwareName } from './../functions' +include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] def options = initOptions(params.options) @@ -6,8 +6,9 @@ def options = initOptions(params.options) process GATK4_GATHERBQSRREPORTS { tag "$meta.id" label 'process_medium' - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { @@ -22,7 +23,7 @@ process GATK4_GATHERBQSRREPORTS { output: tuple val(meta), path("${meta.sample}.recal.table"), emit: table path "${meta.sample}.recal.table", emit: report - path "*.version.txt" , emit: version + path "*.version.txt", emit: version script: def software = getSoftwareName(task.process) diff --git a/modules/nf-core/software/gatk4/genotypegvcf/functions.nf b/modules/nf-core/software/gatk4/genotypegvcf/functions.nf new file mode 100644 index 0000000000..da9da093d3 --- /dev/null +++ b/modules/nf-core/software/gatk4/genotypegvcf/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/software/gatk4/genotypegvcf.nf b/modules/nf-core/software/gatk4/genotypegvcf/main.nf similarity index 93% rename from modules/nf-core/software/gatk4/genotypegvcf.nf rename to modules/nf-core/software/gatk4/genotypegvcf/main.nf index 19337680b2..270454b465 100644 --- a/modules/nf-core/software/gatk4/genotypegvcf.nf +++ b/modules/nf-core/software/gatk4/genotypegvcf/main.nf @@ -1,4 +1,4 @@ -include { initOptions; saveFiles; getSoftwareName } from './../functions' +include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] def options = initOptions(params.options) @@ -19,7 +19,7 @@ process GATK4_GENOTYPEGVCF { input: tuple val(meta), path(interval), path(gvcf) path dbsnp - tuple val(meta_dbsnp), path(dbsnp_tbi) + path dbsnp_tbi path dict path fasta path fai diff --git a/modules/nf-core/software/gatk4/haplotypecaller.nf b/modules/nf-core/software/gatk4/haplotypecaller.nf deleted file mode 100644 index 818ed0c878..0000000000 --- a/modules/nf-core/software/gatk4/haplotypecaller.nf +++ /dev/null @@ -1,49 +0,0 @@ -include { initOptions; saveFiles; getSoftwareName } from './../functions' - -params.options = [:] -def options = initOptions(params.options) - -process GATK4_HAPLOTYPECALLER { - tag "$meta.id" - label 'process_medium' - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - - conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/gatk4:4.2.0.0--0" - } else { - container "quay.io/biocontainers/gatk4:4.2.0.0--0" - } - - input: - tuple val(meta), path(bam), path(bai), file(interval) - path dbsnp - tuple val(meta_dbsnp), path(dbsnp_tbi) - path dict - path fasta - path fai - val no_intervals - - output: - tuple val(meta), path("${interval.baseName}_${meta.id}.g.vcf"), emit: gvcf - tuple val(meta), path(interval), path("${interval.baseName}_${meta.id}.g.vcf"), emit: interval_gvcf - path "*.version.txt" , emit: version - - script: - def software = getSoftwareName(task.process) - intervalsOptions = no_intervals ? "" : "-L ${interval}" - dbsnpOptions = params.dbsnp ? "--D ${dbsnp}" : "" - """ - gatk --java-options "-Xmx${task.memory.toGiga()}g -Xms6000m -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10" \ - HaplotypeCaller \ - -R ${fasta} \ - -I ${bam} \ - ${intervalsOptions} \ - ${dbsnpOptions} \ - -O ${interval.baseName}_${meta.id}.g.vcf \ - -ERC GVCF - - echo \$(gatk HaplotypeCaller --version 2>&1) | sed 's/^.*(GATK) v//; s/ HTSJDK.*\$//' > ${software}.version.txt - """ -} diff --git a/modules/nf-core/software/gatk4/haplotypecaller/functions.nf b/modules/nf-core/software/gatk4/haplotypecaller/functions.nf new file mode 100644 index 0000000000..da9da093d3 --- /dev/null +++ b/modules/nf-core/software/gatk4/haplotypecaller/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/software/gatk4/haplotypecaller/main.nf b/modules/nf-core/software/gatk4/haplotypecaller/main.nf new file mode 100644 index 0000000000..48f73b9340 --- /dev/null +++ b/modules/nf-core/software/gatk4/haplotypecaller/main.nf @@ -0,0 +1,58 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process GATK4_HAPLOTYPECALLER { + tag "$meta.id" + label 'process_medium' + publishDir params.outdir, mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + + conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/gatk4:4.2.0.0--0" + } else { + container "quay.io/biocontainers/gatk4:4.2.0.0--0" + } + + input: + tuple val(meta), path(bam), path(bai), path(interval) + path dbsnp + path dbsnp_tbi + path dict + path fasta + path fai + val no_intervals + + output: + tuple val(meta), path("*.vcf"), emit: vcf + tuple val(meta), path(interval), path("*.vcf"), emit: interval_vcf + path "*.version.txt", emit: version + + script: + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${interval.baseName}_${meta.id}${options.suffix}" : "${interval.baseName}_${meta.id}" + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + def intervalsOptions = no_intervals ? "" : "-L ${interval}" + def dbsnpOptions = params.dbsnp ? "--D ${dbsnp}" : "" + """ + gatk \\ + --java-options "-Xmx${avail_mem}g" \\ + HaplotypeCaller \\ + -R $fasta \\ + -I $bam \\ + ${intervalsOptions} \\ + ${dbsnpOptions} \\ + -O ${prefix}.vcf \\ + $options.args + + gatk --version | grep Picard | sed "s/Picard Version: //g" > ${software}.version.txt + """ +} \ No newline at end of file diff --git a/modules/nf-core/software/gatk4/haplotypecaller/meta.yml b/modules/nf-core/software/gatk4/haplotypecaller/meta.yml new file mode 100644 index 0000000000..f0fc3910e3 --- /dev/null +++ b/modules/nf-core/software/gatk4/haplotypecaller/meta.yml @@ -0,0 +1,63 @@ +name: gatk4_haplotypecaller +description: Call germline SNPs and indels via local re-assembly of haplotypes +keywords: + - gatk4 + - haplotypecaller + - haplotype +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file + pattern: "*.bam" + - bai: + type: file + description: Index of BAM file + pattern: "*.bam.bai" + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + - vcf: + type: file + description: Compressed VCF file + pattern: "*.vcf.gz" + - tbi: + type: file + description: Index of VCF file + pattern: "*.vcf.gz.tbi" + +authors: + - "@suzannejin" diff --git a/modules/nf-core/software/gatk4/markduplicates.nf b/modules/nf-core/software/gatk4/markduplicates.nf deleted file mode 100644 index ad976d628a..0000000000 --- a/modules/nf-core/software/gatk4/markduplicates.nf +++ /dev/null @@ -1,83 +0,0 @@ -include { initOptions; saveFiles; getSoftwareName } from './../functions' - -params.options = [:] -def options = initOptions(params.options) - -process GATK4_MARKDUPLICATES { - tag "$meta.id" - label 'process_medium' - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - - conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/gatk4:4.2.0.0--0" - } else { - container "quay.io/biocontainers/gatk4:4.2.0.0--0" - } - - input: - tuple val(meta), path("${meta.sample}.bam"), path("${meta.sample}.bam.bai") - - output: - tuple val(meta), path("${meta.sample}.md.bam"), path("${meta.sample}.md.bam.bai"), emit: bam - path "${meta.sample}.bam.metrics", optional : true, emit: report - path "*.version.txt" , emit: version - - script: - def software = getSoftwareName(task.process) - markdup_java_options = task.memory.toGiga() > 8 ? params.markdup_java_options : "\"-Xms" + (task.memory.toGiga() / 2).trunc() + "g -Xmx" + (task.memory.toGiga() - 1) + "g\"" - metrics = 'markduplicates' in params.skip_qc ? '' : "-M ${meta.sample}.bam.metrics" - """ - gatk --java-options ${markdup_java_options} \ - MarkDuplicates \ - --INPUT ${meta.sample}.bam \ - --METRICS_FILE ${meta.sample}.bam.metrics \ - --TMP_DIR . \ - --ASSUME_SORT_ORDER coordinate \ - --CREATE_INDEX true \ - --OUTPUT ${meta.sample}.md.bam - mv ${meta.sample}.md.bai ${meta.sample}.md.bam.bai - - echo \$(gatk MarkDuplicates --version 2>&1) | sed 's/^.*(GATK) v//; s/ HTSJDK.*\$//' > ${software}.version.txt - """ -} - -process GATK4_MARKDUPLICATES_SPARK { - tag "$meta.id" - label 'process_medium' - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - - conda (params.enable_conda ? "bioconda::gatk4-spark=4.2.0.0" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/gatk4-spark:4.2.0.0--0" - } else { - container "quay.io/biocontainers/gatk4-spark:4.2.0.0--0" - } - - input: - tuple val(meta), path("${meta.sample}.bam"), path("${meta.sample}.bam.bai") - - output: - tuple val(meta), path("${meta.sample}.md.bam"), path("${meta.sample}.md.bam.bai"), emit: bam - path "${meta.sample}.bam.metrics", optional : true, emit: report - path "*.version.txt" , emit: version - - script: - def software = getSoftwareName(task.process) - markdup_java_options = task.memory.toGiga() > 8 ? params.markdup_java_options : "\"-Xms" + (task.memory.toGiga() / 2).trunc() + "g -Xmx" + (task.memory.toGiga() - 1) + "g\"" - metrics = 'markduplicates' in params.skip_qc ? '' : "-M ${meta.sample}.bam.metrics" - """ - gatk --java-options ${markdup_java_options} \ - MarkDuplicatesSpark \ - -I ${meta.sample}.bam \ - -O ${meta.sample}.md.bam \ - ${metrics} \ - --tmp-dir . \ - --create-output-bam-index true \ - --spark-master local[${task.cpus}] - - echo \$(gatk MarkDuplicatesSpark --version 2>&1) | sed 's/^.*(GATK) v//; s/ HTSJDK.*\$//' > ${software}.version.txt - """ -} \ No newline at end of file diff --git a/modules/nf-core/software/gatk4/markduplicates/functions.nf b/modules/nf-core/software/gatk4/markduplicates/functions.nf new file mode 100644 index 0000000000..da9da093d3 --- /dev/null +++ b/modules/nf-core/software/gatk4/markduplicates/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/software/gatk4/markduplicates/main.nf b/modules/nf-core/software/gatk4/markduplicates/main.nf new file mode 100644 index 0000000000..751ff82337 --- /dev/null +++ b/modules/nf-core/software/gatk4/markduplicates/main.nf @@ -0,0 +1,48 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process GATK4_MARKDUPLICATES { + tag "$meta.id" + label 'process_low' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/gatk4:4.2.0.0--0" + } else { + container "quay.io/biocontainers/gatk4:4.2.0.0--0" + } + + input: + tuple val(meta), path(bam), path(bai) + val use_metrics + + output: + tuple val(meta), path("*.bam"), path("*.bai"), emit: bam + tuple val(meta), path("*.metrics"), optional : true, emit: metrics + path "*.version.txt", emit: version + + script: + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def metrics = use_metrics ? "M=${prefix}.metrics" :'' + """ + gatk MarkDuplicates \\ + I=$bam \\ + $metrics \ + TMP_DIR=. \\ + ASSUME_SORT_ORDER=coordinate \\ + CREATE_INDEX=true \\ + O=${prefix}.bam \\ + $options.args + + mv ${prefix}.bai ${prefix}.bam.bai + + echo \$(gatk MarkDuplicates --version 2>&1) | sed 's/^.*(GATK) v//; s/ HTSJDK.*\$//' > ${software}.version.txt + """ +} \ No newline at end of file diff --git a/modules/nf-core/software/gatk4/markduplicates/meta.yml b/modules/nf-core/software/gatk4/markduplicates/meta.yml new file mode 100644 index 0000000000..abe61e023f --- /dev/null +++ b/modules/nf-core/software/gatk4/markduplicates/meta.yml @@ -0,0 +1,49 @@ +name: gatk4_markduplicates +description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. +keywords: + - markduplicates + - bam + - sort +tools: + - gatk4: + description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ['BSD-3-clause'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM file + pattern: "*.{bam}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + - bam: + type: file + description: Marked duplicates BAM file + pattern: "*.{bam}" + - metrics: + type: file + description: Duplicate metrics file generated by GATK + pattern: "*.{metrics.txt}" + +authors: + - "@ajodeh-juma" diff --git a/modules/nf-core/software/gatk4/markduplicatesspark/functions.nf b/modules/nf-core/software/gatk4/markduplicatesspark/functions.nf new file mode 100644 index 0000000000..da9da093d3 --- /dev/null +++ b/modules/nf-core/software/gatk4/markduplicatesspark/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/software/gatk4/markduplicatesspark/main.nf b/modules/nf-core/software/gatk4/markduplicatesspark/main.nf new file mode 100644 index 0000000000..dd8b71f90a --- /dev/null +++ b/modules/nf-core/software/gatk4/markduplicatesspark/main.nf @@ -0,0 +1,46 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process GATK4_MARKDUPLICATES_SPARK { + tag "$meta.id" + label 'process_low' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::gatk4-spark=4.2.0.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/gatk4-spark:4.2.0.0--0" + } else { + container "quay.io/biocontainers/gatk4-spark:4.2.0.0--0" + } + + input: + tuple val(meta), path(bam), path(bai) + val use_metrics + + output: + tuple val(meta), path("*.bam"), path("*.bai"), emit: bam + tuple val(meta), path("*.metrics"), optional : true, emit: metrics + path "*.version.txt", emit: version + + script: + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def metrics = use_metrics ? "-M ${prefix}.bam.metrics" :'' + """ + gatk MarkDuplicatesSpark \\ + -I $bam \\ + $metrics \ + --tmp-dir . \\ + --create-output-bam-index true \\ + --spark-master local[${task.cpus}] \\ + -O ${prefix}.bam \\ + $options.args + + echo \$(gatk MarkDuplicatesSpark --version 2>&1) | sed 's/^.*(GATK) v//; s/ HTSJDK.*\$//' > ${software}.version.txt + """ +} \ No newline at end of file diff --git a/modules/nf-core/software/gatk4/markduplicatesspark/meta.yml b/modules/nf-core/software/gatk4/markduplicatesspark/meta.yml new file mode 100644 index 0000000000..abe61e023f --- /dev/null +++ b/modules/nf-core/software/gatk4/markduplicatesspark/meta.yml @@ -0,0 +1,49 @@ +name: gatk4_markduplicates +description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. +keywords: + - markduplicates + - bam + - sort +tools: + - gatk4: + description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ['BSD-3-clause'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM file + pattern: "*.{bam}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + - bam: + type: file + description: Marked duplicates BAM file + pattern: "*.{bam}" + - metrics: + type: file + description: Duplicate metrics file generated by GATK + pattern: "*.{metrics.txt}" + +authors: + - "@ajodeh-juma" diff --git a/modules/nf-core/software/manta/somatic/functions.nf b/modules/nf-core/software/manta/somatic/functions.nf new file mode 100644 index 0000000000..da9da093d3 --- /dev/null +++ b/modules/nf-core/software/manta/somatic/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/software/manta/somatic.nf b/modules/nf-core/software/manta/somatic/main.nf similarity index 85% rename from modules/nf-core/software/manta/somatic.nf rename to modules/nf-core/software/manta/somatic/main.nf index 8976d423bd..a27d43b2f6 100644 --- a/modules/nf-core/software/manta/somatic.nf +++ b/modules/nf-core/software/manta/somatic/main.nf @@ -1,13 +1,15 @@ -include { initOptions; saveFiles; getSoftwareName } from './../functions' +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] -def options = initOptions(params.options) +options = initOptions(params.options) process MANTA_SOMATIC { tag "$meta.id" label 'process_high' - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } conda (params.enable_conda ? "bioconda::manta=1.6.0" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { @@ -37,16 +39,16 @@ process MANTA_SOMATIC { // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "$ioptions.args" variable // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter // using the Nextflow "task" variable e.g. "--threads $task.cpus" - options_manta = params.target_bed ? "--exome --callRegions ${target_bed}" : "" + options_manta = params.target_bed ? "--exome --callRegions $target_bed" : "" """ configManta.py \ - --tumorBam ${bam_tumor} \ - --normalBam ${bam_normal} \ - --reference ${fasta} \ - ${options_manta} \ + --tumorBam $bam_tumor \ + --normalBam $bam_normal \ + --reference $fasta \ + $options_manta \ --runDir manta - python manta/runWorkflow.py -m local -j ${task.cpus} + python manta/runWorkflow.py -m local -j $task.cpus mv manta/results/variants/candidateSmallIndels.vcf.gz ${prefix}.candidateSmallIndels.vcf.gz mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi ${prefix}.candidateSmallIndels.vcf.gz.tbi diff --git a/modules/nf-core/software/msisensor/msi.nf b/modules/nf-core/software/msisensor/msi.nf deleted file mode 100644 index 59fa69e4a4..0000000000 --- a/modules/nf-core/software/msisensor/msi.nf +++ /dev/null @@ -1,45 +0,0 @@ -include { initOptions; saveFiles; getSoftwareName } from './../functions' - -params.options = [:] -def options = initOptions(params.options) - -process MSISENSOR_MSI { - tag "$meta.id" - label 'process_high' - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - - conda (params.enable_conda ? "bioconda::msisensor=0.5" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/msisensor:0.5--hb3646a4_2" - } else { - container "quay.io/biocontainers/msisensor:0.5--hb3646a4_2" - } - - input: - tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor) - path msisensor_scan - - output: - tuple val(meta), path("*.list") - - script: - def software = getSoftwareName(task.process) - def ioptions = initOptions(options) - def prefix = ioptions.suffix ? "msisensor_${meta.id}${ioptions.suffix}" : "msisensor_${meta.id}" - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "$ioptions.args" variable - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - """ - msisensor msi -d ${msisensor_scan} \ - -b 4 \ - -t ${bam_tumor} \ - -n ${bam_normal} \ - -o ${prefix} - - mv ${prefix} ${prefix}.list - mv ${prefix}_dis ${prefix}_dis.list - mv ${prefix}_germline ${prefix}_germline.list - mv ${prefix}_somatic ${prefix}_somatic.list - """ -} \ No newline at end of file diff --git a/modules/nf-core/software/msisensor/scan.nf b/modules/nf-core/software/msisensor/scan.nf deleted file mode 100644 index fa71e5b723..0000000000 --- a/modules/nf-core/software/msisensor/scan.nf +++ /dev/null @@ -1,36 +0,0 @@ -include { initOptions; saveFiles; getSoftwareName } from './../functions' - -params.options = [:] -def options = initOptions(params.options) - -process MSISENSOR_SCAN { - tag "$fasta" - label 'process_high' - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") } - - conda (params.enable_conda ? "bioconda::msisensor=0.5" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/msisensor:0.5--hb3646a4_2" - } else { - container "quay.io/biocontainers/msisensor:0.5--hb3646a4_2" - } - - input: - path fasta - path fai - - output: - path "microsatellites.list" - - script: - def software = getSoftwareName(task.process) - def ioptions = initOptions(options) - def prefix = ioptions.suffix ? "msisensor_${ioptions.suffix}" : "msisensor_" - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "$ioptions.args" variable - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - """ - msisensor scan -d ${fasta} -o microsatellites.list - """ -} \ No newline at end of file diff --git a/modules/nf-core/software/msisensorpro/msi/functions.nf b/modules/nf-core/software/msisensorpro/msi/functions.nf new file mode 100644 index 0000000000..da9da093d3 --- /dev/null +++ b/modules/nf-core/software/msisensorpro/msi/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/software/msisensorpro/msi/main.nf b/modules/nf-core/software/msisensorpro/msi/main.nf new file mode 100644 index 0000000000..01c9553a62 --- /dev/null +++ b/modules/nf-core/software/msisensorpro/msi/main.nf @@ -0,0 +1,45 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process MSISENSORPRO_MSI { + tag "$meta.id" + label 'process_high' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::msisensor-pro=1.1.a" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/msisensor-pro:1.1.a--hb3646a4_0" + } else { + container "quay.io/biocontainers/msisensor-pro:1.1.a--hb3646a4_0" + } + + input: + tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor) + path msisensorpro_scan + + output: + tuple val(meta), path("msisensorpro_*.list") + + script: + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}.${options.suffix}" : "${meta.id}" + """ + msisensor-pro msi \\ + -d $msisensorpro_scan \\ + -n $bam_normal \\ + -t $bam_tumor \\ + -o $prefix \\ + -b $task.cpus \\ + $options.args + + mv ${prefix} msisensorpro_${prefix}.list + mv ${prefix}_dis msisensorpro_${prefix}_dis.list + mv ${prefix}_germline msisensorpro_${prefix}_germline.list + mv ${prefix}_somatic msisensorpro_${prefix}_somatic.list + """ +} \ No newline at end of file diff --git a/modules/nf-core/software/msisensorpro/msi/meta.yml b/modules/nf-core/software/msisensorpro/msi/meta.yml new file mode 100644 index 0000000000..e1d2220c67 --- /dev/null +++ b/modules/nf-core/software/msisensorpro/msi/meta.yml @@ -0,0 +1,58 @@ +name: msisensorpro_msi + +description: evaluate MSI using paired tumor-normal sequencing data +keywords: + - microsatellite +tools: + - msisensor: + description: MSIsensor-pro is an updated version of msisensor + homepage: https://github.com/xjtu-omics/msisensor-pro + documentation: None + tool_dev_url: None + doi: "doi.org/10.1016/j.gpb.2020.02.001" + licence: ['free for non-commercial use by academic, government, and non-profit/not-for-profit institutions'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam_normal: + type: file + description: | + BAM file from normal tissue + - bai_normal: + type: file + description: | + Index for the BAM file from normal tissue + - bam_tumor: + type: file + description: | + BAM file from tumor tissue + - bai_tumor: + type: file + description: | + Index for the BAM file from tumor tissue + - msisensor_scan: + type: file + description: | + Output file from msisensor-pro scan module + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - list: + type: file + description: | + msisensor-pro final report and associated files (dis, germline, somatic) + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + +authors: + - "@maxulysse" diff --git a/modules/nf-core/software/msisensorpro/scan/functions.nf b/modules/nf-core/software/msisensorpro/scan/functions.nf new file mode 100644 index 0000000000..da9da093d3 --- /dev/null +++ b/modules/nf-core/software/msisensorpro/scan/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/software/msisensorpro/scan/main.nf b/modules/nf-core/software/msisensorpro/scan/main.nf new file mode 100644 index 0000000000..0b346d91a0 --- /dev/null +++ b/modules/nf-core/software/msisensorpro/scan/main.nf @@ -0,0 +1,39 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process MSISENSORPRO_SCAN { + tag "$fasta" + label 'process_low' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? "bioconda::msisensor-pro=1.1.a" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/msisensor-pro:1.1.a--hb3646a4_0" + } else { + container "quay.io/biocontainers/msisensor-pro:1.1.a--hb3646a4_0" + } + + input: + path fasta + + output: + path "*.list", emit: list + path "*.version.txt", emit: version + + script: + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${options.suffix}" : "" + """ + msisensor-pro scan \\ + -d $fasta \\ + -o ${fasta.baseName}.list \\ + $options.args + + echo \$(msisensor-pro 2>&1) | sed -nE 's/Version:\\sv([0-9]\\.[0-9])/\\1/ p' > ${software}.version.txt + """ +} \ No newline at end of file diff --git a/modules/nf-core/software/msisensorpro/scan/meta.yml b/modules/nf-core/software/msisensorpro/scan/meta.yml new file mode 100644 index 0000000000..5e9261aadb --- /dev/null +++ b/modules/nf-core/software/msisensorpro/scan/meta.yml @@ -0,0 +1,32 @@ +name: msisensorpro_scan + +description: Scan the reference genome to get microsatellites information +keywords: + - microsatellite +tools: + - msisensor: + description: MSIsensor-pro is an updated version of msisensor + homepage: https://github.com/xjtu-omics/msisensor-pro + documentation: None + tool_dev_url: None + doi: "doi.org/10.1016/j.gpb.2020.02.001" + licence: ['free for non-commercial use by academic, government, and non-profit/not-for-profit institutions'] + +input: + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta}" + +output: + - list: + type: file + description: MSIsensor-pro scan output file of minisatellites + pattern: "*.list" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + +authors: + - "@maxulysse" diff --git a/modules/nf-core/software/multiqc/functions.nf b/modules/nf-core/software/multiqc/functions.nf index d25eea86b3..da9da093d3 100644 --- a/modules/nf-core/software/multiqc/functions.nf +++ b/modules/nf-core/software/multiqc/functions.nf @@ -1,48 +1,57 @@ -/* - * ----------------------------------------------------- - * Utility functions used in nf-core DSL2 module files - * ----------------------------------------------------- - */ +// +// Utility functions used in nf-core DSL2 module files +// -/* - * Extract name of software tool from process name using $task.process - */ +// +// Extract name of software tool from process name using $task.process +// def getSoftwareName(task_process) { return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() } -/* - * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules - */ +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// def initOptions(Map args) { def Map options = [:] - options.args = args.args ?: '' - options.args2 = args.args2 ?: '' - options.publish_by_id = args.publish_by_id ?: false - options.publish_dir = args.publish_dir ?: '' - options.publish_files = args.publish_files - options.suffix = args.suffix ?: '' + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' return options } -/* - * Tidy up and join elements of a list to return a path string - */ +// +// Tidy up and join elements of a list to return a path string +// def getPathFromList(path_list) { - def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries - paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes return paths.join('/') } -/* - * Function to save/publish module results - */ +// +// Function to save/publish module results +// def saveFiles(Map args) { if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) + def ioptions = initOptions(args.options) def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_id) { - path_list.add(args.publish_id) + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } } if (ioptions.publish_files instanceof Map) { for (ext in ioptions.publish_files) { diff --git a/modules/nf-core/software/multiqc/main.nf b/modules/nf-core/software/multiqc/main.nf index ff1175fcd0..da78080024 100644 --- a/modules/nf-core/software/multiqc/main.nf +++ b/modules/nf-core/software/multiqc/main.nf @@ -2,19 +2,19 @@ include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] -def options = initOptions(params.options) +options = initOptions(params.options) process MULTIQC { label 'process_medium' publishDir "${params.outdir}", mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename: filename, options: params.options, publish_dir: getSoftwareName(task.process), publish_id: '') } + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } - conda (params.enable_conda ? "bioconda::multiqc=1.9" : null) + conda (params.enable_conda ? "bioconda::multiqc=1.10.1" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/multiqc:1.9--pyh9f0ad1d_0" + container "https://depot.galaxyproject.org/singularity/multiqc:1.10.1--py_0" } else { - container "quay.io/biocontainers/multiqc:1.9--pyh9f0ad1d_0" + container "quay.io/biocontainers/multiqc:1.10.1--py_0" } input: diff --git a/modules/nf-core/software/multiqc/meta.yml b/modules/nf-core/software/multiqc/meta.yml index 02f4b41586..532a8bb1ef 100644 --- a/modules/nf-core/software/multiqc/meta.yml +++ b/modules/nf-core/software/multiqc/meta.yml @@ -11,27 +11,6 @@ tools: It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ -params: - - outdir: - type: string - description: | - The pipeline's output directory. By default, the module will - output files into `$params.outdir/` - - publish_dir_mode: - type: string - description: | - Value for the Nextflow `publishDir` mode parameter. - Available: symlink, rellink, link, copy, copyNoFollow, move. - - enable_conda: - type: boolean - description: | - Run the module with Conda using the software specified - via the `conda` directive - - singularity_pull_docker_container: - type: boolean - description: | - Instead of directly downloading Singularity images for use with Singularity, - force the workflow to pull and convert Docker containers instead. input: - multiqc_files: type: file diff --git a/modules/nf-core/software/qualimap/bamqc/functions.nf b/modules/nf-core/software/qualimap/bamqc/functions.nf new file mode 100644 index 0000000000..da9da093d3 --- /dev/null +++ b/modules/nf-core/software/qualimap/bamqc/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/software/qualimap/bamqc/main.nf b/modules/nf-core/software/qualimap/bamqc/main.nf new file mode 100644 index 0000000000..463cde4cb7 --- /dev/null +++ b/modules/nf-core/software/qualimap/bamqc/main.nf @@ -0,0 +1,61 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process QUALIMAP_BAMQC { + tag "$meta.id" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::qualimap=2.2.2d" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/qualimap:2.2.2d--1" + } else { + container "quay.io/biocontainers/qualimap:2.2.2d--1" + } + + input: + tuple val(meta), path(bam) + path gff + val use_gff + + output: + tuple val(meta), path("${prefix}"), emit: results + path "*.version.txt" , emit: version + + script: + def software = getSoftwareName(task.process) + prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + + def collect_pairs = meta.single_end ? '' : '--collect-overlap-pairs' + def memory = task.memory.toGiga() + "G" + def regions = use_gff ? "--gff $gff" : '' + + def strandedness = 'non-strand-specific' + if (meta.strandedness == 'forward') { + strandedness = 'strand-specific-forward' + } else if (meta.strandedness == 'reverse') { + strandedness = 'strand-specific-reverse' + } + """ + unset DISPLAY + mkdir tmp + export _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp + qualimap \\ + --java-mem-size=$memory \\ + bamqc \\ + $options.args \\ + -bam $bam \\ + $regions \\ + -p $strandedness \\ + $collect_pairs \\ + -outdir $prefix \\ + -nt $task.cpus + + echo \$(qualimap 2>&1) | sed 's/^.*QualiMap v.//; s/Built.*\$//' > ${software}.version.txt + """ +} diff --git a/modules/nf-core/software/qualimap/bamqc/meta.yml b/modules/nf-core/software/qualimap/bamqc/meta.yml new file mode 100644 index 0000000000..3c608f31a2 --- /dev/null +++ b/modules/nf-core/software/qualimap/bamqc/meta.yml @@ -0,0 +1,49 @@ +name: qualimap_bamqc +description: Evaluate alignment data +keywords: + - quality control + - qc + - bam +tools: + - qualimap: + description: | + Qualimap 2 is a platform-independent application written in + Java and R that provides both a Graphical User Interface and + a command-line interface to facilitate the quality control of + alignment sequencing data and its derivatives like feature counts. + homepage: http://qualimap.bioinfo.cipf.es/ + documentation: http://qualimap.conesalab.org/doc_html/index.html + doi: 10.1093/bioinformatics/bts503 +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - gff: + type: file + description: Feature file with regions of interest + pattern: "*.{gff,gtf,bed}" + - use_gff: + type: boolean + description: Specifies if feature file should be used or not +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - results: + type: dir + description: Qualimap results dir + pattern: "*/*" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" +authors: + - "@phue" diff --git a/modules/nf-core/software/qualimap_bamqc.nf b/modules/nf-core/software/qualimap_bamqc.nf deleted file mode 100644 index 055ad7cbf9..0000000000 --- a/modules/nf-core/software/qualimap_bamqc.nf +++ /dev/null @@ -1,41 +0,0 @@ -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -def options = initOptions(params.options) - -process QUALIMAP_BAMQC { - tag "$meta.id" - label 'process_medium' - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - - conda (params.enable_conda ? "bioconda::qualimap=2.2.2d" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/qualimap:2.2.2d--1" - } else { - container "quay.io/biocontainers/qualimap:2.2.2d--1" - } - - input: - tuple val(meta), path(bam) - path(target_bed) - - output: - path("${bam.baseName}") - - script: - use_bed = params.target_bed ? "-gff ${target_bed}" : '' - """ - qualimap --java-mem-size=${task.memory.toGiga()}G \ - bamqc \ - -bam ${bam} \ - --paint-chromosome-limits \ - --genome-gc-distr HUMAN \ - ${use_bed} \ - -nt ${task.cpus} \ - -skip-duplicated \ - --skip-dup-mode 0 \ - -outdir ${bam.baseName} \ - -outformat HTML - """ -} diff --git a/modules/nf-core/software/samtools/faidx/functions.nf b/modules/nf-core/software/samtools/faidx/functions.nf index d25eea86b3..da9da093d3 100644 --- a/modules/nf-core/software/samtools/faidx/functions.nf +++ b/modules/nf-core/software/samtools/faidx/functions.nf @@ -1,48 +1,57 @@ -/* - * ----------------------------------------------------- - * Utility functions used in nf-core DSL2 module files - * ----------------------------------------------------- - */ +// +// Utility functions used in nf-core DSL2 module files +// -/* - * Extract name of software tool from process name using $task.process - */ +// +// Extract name of software tool from process name using $task.process +// def getSoftwareName(task_process) { return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() } -/* - * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules - */ +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// def initOptions(Map args) { def Map options = [:] - options.args = args.args ?: '' - options.args2 = args.args2 ?: '' - options.publish_by_id = args.publish_by_id ?: false - options.publish_dir = args.publish_dir ?: '' - options.publish_files = args.publish_files - options.suffix = args.suffix ?: '' + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' return options } -/* - * Tidy up and join elements of a list to return a path string - */ +// +// Tidy up and join elements of a list to return a path string +// def getPathFromList(path_list) { - def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries - paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes return paths.join('/') } -/* - * Function to save/publish module results - */ +// +// Function to save/publish module results +// def saveFiles(Map args) { if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) + def ioptions = initOptions(args.options) def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_id) { - path_list.add(args.publish_id) + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } } if (ioptions.publish_files instanceof Map) { for (ext in ioptions.publish_files) { diff --git a/modules/nf-core/software/samtools/faidx/main.nf b/modules/nf-core/software/samtools/faidx/main.nf index 85b7dd4dc1..6c023f1cd8 100644 --- a/modules/nf-core/software/samtools/faidx/main.nf +++ b/modules/nf-core/software/samtools/faidx/main.nf @@ -9,7 +9,7 @@ process SAMTOOLS_FAIDX { label 'process_low' publishDir "${params.outdir}", mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:'') } + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } conda (params.enable_conda ? "bioconda::samtools=1.12" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { diff --git a/modules/nf-core/software/samtools/index/functions.nf b/modules/nf-core/software/samtools/index/functions.nf index d25eea86b3..da9da093d3 100644 --- a/modules/nf-core/software/samtools/index/functions.nf +++ b/modules/nf-core/software/samtools/index/functions.nf @@ -1,48 +1,57 @@ -/* - * ----------------------------------------------------- - * Utility functions used in nf-core DSL2 module files - * ----------------------------------------------------- - */ +// +// Utility functions used in nf-core DSL2 module files +// -/* - * Extract name of software tool from process name using $task.process - */ +// +// Extract name of software tool from process name using $task.process +// def getSoftwareName(task_process) { return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() } -/* - * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules - */ +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// def initOptions(Map args) { def Map options = [:] - options.args = args.args ?: '' - options.args2 = args.args2 ?: '' - options.publish_by_id = args.publish_by_id ?: false - options.publish_dir = args.publish_dir ?: '' - options.publish_files = args.publish_files - options.suffix = args.suffix ?: '' + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' return options } -/* - * Tidy up and join elements of a list to return a path string - */ +// +// Tidy up and join elements of a list to return a path string +// def getPathFromList(path_list) { - def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries - paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes return paths.join('/') } -/* - * Function to save/publish module results - */ +// +// Function to save/publish module results +// def saveFiles(Map args) { if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) + def ioptions = initOptions(args.options) def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_id) { - path_list.add(args.publish_id) + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } } if (ioptions.publish_files instanceof Map) { for (ext in ioptions.publish_files) { diff --git a/modules/nf-core/software/samtools/index/main.nf b/modules/nf-core/software/samtools/index/main.nf index 501e66c063..778e9384ea 100644 --- a/modules/nf-core/software/samtools/index/main.nf +++ b/modules/nf-core/software/samtools/index/main.nf @@ -1,13 +1,15 @@ // Import generic module functions -include { saveFiles; getSoftwareName } from './functions' +include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] +options = initOptions(params.options) process SAMTOOLS_INDEX { tag "$meta.id" + label 'process_low' publishDir "${params.outdir}", mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } conda (params.enable_conda ? "bioconda::samtools=1.12" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { @@ -20,13 +22,14 @@ process SAMTOOLS_INDEX { tuple val(meta), path(bam) output: - tuple val(meta), path("*.bai"), emit: bai + tuple val(meta), path("*.bai"), optional:true, emit: bai + tuple val(meta), path("*.csi"), optional:true, emit: csi path "*.version.txt" , emit: version script: def software = getSoftwareName(task.process) """ - samtools index $bam + samtools index $options.args $bam echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' > ${software}.version.txt """ } diff --git a/modules/nf-core/software/samtools/index/meta.yml b/modules/nf-core/software/samtools/index/meta.yml index 089a83be02..5d076e3be5 100644 --- a/modules/nf-core/software/samtools/index/meta.yml +++ b/modules/nf-core/software/samtools/index/meta.yml @@ -14,27 +14,6 @@ tools: homepage: http://www.htslib.org/ documentation: hhttp://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 -params: - - outdir: - type: string - description: | - The pipeline's output directory. By default, the module will - output files into `$params.outdir/` - - publish_dir_mode: - type: string - description: | - Value for the Nextflow `publishDir` mode parameter. - Available: symlink, rellink, link, copy, copyNoFollow, move. - - enable_conda: - type: boolean - description: | - Run the module with Conda using the software specified - via the `conda` directive - - singularity_pull_docker_container: - type: boolean - description: | - Instead of directly downloading Singularity images for use with Singularity, - force the workflow to pull and convert Docker containers instead. input: - meta: type: map @@ -55,6 +34,10 @@ output: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" + - csi: + type: file + description: CSI index file + pattern: "*.{csi}" - version: type: file description: File containing software version diff --git a/modules/nf-core/software/samtools/merge/functions.nf b/modules/nf-core/software/samtools/merge/functions.nf index d25eea86b3..da9da093d3 100644 --- a/modules/nf-core/software/samtools/merge/functions.nf +++ b/modules/nf-core/software/samtools/merge/functions.nf @@ -1,48 +1,57 @@ -/* - * ----------------------------------------------------- - * Utility functions used in nf-core DSL2 module files - * ----------------------------------------------------- - */ +// +// Utility functions used in nf-core DSL2 module files +// -/* - * Extract name of software tool from process name using $task.process - */ +// +// Extract name of software tool from process name using $task.process +// def getSoftwareName(task_process) { return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() } -/* - * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules - */ +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// def initOptions(Map args) { def Map options = [:] - options.args = args.args ?: '' - options.args2 = args.args2 ?: '' - options.publish_by_id = args.publish_by_id ?: false - options.publish_dir = args.publish_dir ?: '' - options.publish_files = args.publish_files - options.suffix = args.suffix ?: '' + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' return options } -/* - * Tidy up and join elements of a list to return a path string - */ +// +// Tidy up and join elements of a list to return a path string +// def getPathFromList(path_list) { - def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries - paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes return paths.join('/') } -/* - * Function to save/publish module results - */ +// +// Function to save/publish module results +// def saveFiles(Map args) { if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) + def ioptions = initOptions(args.options) def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_id) { - path_list.add(args.publish_id) + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } } if (ioptions.publish_files instanceof Map) { for (ext in ioptions.publish_files) { diff --git a/modules/nf-core/software/samtools/merge/main.nf b/modules/nf-core/software/samtools/merge/main.nf index e60668db53..395b29e5d3 100644 --- a/modules/nf-core/software/samtools/merge/main.nf +++ b/modules/nf-core/software/samtools/merge/main.nf @@ -9,7 +9,7 @@ process SAMTOOLS_MERGE { label 'process_low' publishDir "${params.outdir}", mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } conda (params.enable_conda ? "bioconda::samtools=1.12" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { @@ -19,15 +19,15 @@ process SAMTOOLS_MERGE { } input: - tuple val(meta),path(bams) + tuple val(meta), path(bams) output: tuple val(meta), path("${prefix}.bam"), emit: merged_bam - path "*.version.txt" , emit: version + path "*.version.txt" , emit: version script: def software = getSoftwareName(task.process) - prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" """ samtools merge ${prefix}.bam $bams echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' > ${software}.version.txt diff --git a/modules/nf-core/software/samtools/merge/meta.yml b/modules/nf-core/software/samtools/merge/meta.yml new file mode 100644 index 0000000000..9092f22e50 --- /dev/null +++ b/modules/nf-core/software/samtools/merge/meta.yml @@ -0,0 +1,44 @@ +name: samtools_merge +description: Merge BAM file +keywords: + - merge + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - merged_bam: + type: file + description: BAM file + pattern: "*.{bam}" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" +authors: + - "@drpatelh" + - "@yuukiiwa " + - "@maxulysse" diff --git a/modules/nf-core/software/samtools/stats/functions.nf b/modules/nf-core/software/samtools/stats/functions.nf index d25eea86b3..da9da093d3 100644 --- a/modules/nf-core/software/samtools/stats/functions.nf +++ b/modules/nf-core/software/samtools/stats/functions.nf @@ -1,48 +1,57 @@ -/* - * ----------------------------------------------------- - * Utility functions used in nf-core DSL2 module files - * ----------------------------------------------------- - */ +// +// Utility functions used in nf-core DSL2 module files +// -/* - * Extract name of software tool from process name using $task.process - */ +// +// Extract name of software tool from process name using $task.process +// def getSoftwareName(task_process) { return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() } -/* - * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules - */ +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// def initOptions(Map args) { def Map options = [:] - options.args = args.args ?: '' - options.args2 = args.args2 ?: '' - options.publish_by_id = args.publish_by_id ?: false - options.publish_dir = args.publish_dir ?: '' - options.publish_files = args.publish_files - options.suffix = args.suffix ?: '' + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' return options } -/* - * Tidy up and join elements of a list to return a path string - */ +// +// Tidy up and join elements of a list to return a path string +// def getPathFromList(path_list) { - def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries - paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes return paths.join('/') } -/* - * Function to save/publish module results - */ +// +// Function to save/publish module results +// def saveFiles(Map args) { if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) + def ioptions = initOptions(args.options) def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_id) { - path_list.add(args.publish_id) + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } } if (ioptions.publish_files instanceof Map) { for (ext in ioptions.publish_files) { diff --git a/modules/nf-core/software/samtools/stats/main.nf b/modules/nf-core/software/samtools/stats/main.nf index 1863ac0a41..c307f9e8c3 100644 --- a/modules/nf-core/software/samtools/stats/main.nf +++ b/modules/nf-core/software/samtools/stats/main.nf @@ -1,19 +1,21 @@ // Import generic module functions -include { saveFiles; getSoftwareName } from './functions' +include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] +options = initOptions(params.options) process SAMTOOLS_STATS { tag "$meta.id" + label 'process_low' publishDir "${params.outdir}", mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - conda (params.enable_conda ? "bioconda::samtools=1.12" : null) + conda (params.enable_conda ? "bioconda::samtools=1.10" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/samtools:1.12--hd5e65b6_0" + container "https://depot.galaxyproject.org/singularity/samtools:1.10--h9402c20_2" } else { - container "quay.io/biocontainers/samtools:1.12--hd5e65b6_0" + container "quay.io/biocontainers/samtools:1.10--h9402c20_2" } input: diff --git a/modules/nf-core/software/samtools/stats/meta.yml b/modules/nf-core/software/samtools/stats/meta.yml index b907df92c5..b549ff5c8f 100644 --- a/modules/nf-core/software/samtools/stats/meta.yml +++ b/modules/nf-core/software/samtools/stats/meta.yml @@ -15,27 +15,6 @@ tools: homepage: http://www.htslib.org/ documentation: hhttp://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 -params: - - outdir: - type: string - description: | - The pipeline's output directory. By default, the module will - output files into `$params.outdir/` - - publish_dir_mode: - type: string - description: | - Value for the Nextflow `publishDir` mode parameter. - Available: symlink, rellink, link, copy, copyNoFollow, move. - - enable_conda: - type: boolean - description: | - Run the module with Conda using the software specified - via the `conda` directive - - singularity_pull_docker_container: - type: boolean - description: | - Instead of directly downloading Singularity images for use with Singularity, - force the workflow to pull and convert Docker containers instead. input: - meta: type: map diff --git a/modules/nf-core/software/strelka/germline.nf b/modules/nf-core/software/strelka/germline.nf deleted file mode 100644 index 1b6f802912..0000000000 --- a/modules/nf-core/software/strelka/germline.nf +++ /dev/null @@ -1,54 +0,0 @@ -include { initOptions; saveFiles; getSoftwareName } from './../functions' - -params.options = [:] -def options = initOptions(params.options) - -process STRELKA_GERMLINE { - tag "$meta.id" - label 'process_high' - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - - conda (params.enable_conda ? "bioconda::strelka=2.9.10" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/strelka:2.9.10--0" - } else { - container "quay.io/biocontainers/strelka:2.9.10--0" - } - - input: - tuple val(meta), path(bam), path(bai) - path fasta - path fai - tuple path(target_bed), path(target_bed_tbi) - - output: - tuple val(meta), path("*_variants.vcf.gz"), path("*_variants.vcf.gz.tbi"), emit: vcf - tuple val(meta), path("*_genome.vcf.gz"), path("*_genome.vcf.gz.tbi"), emit: genome_vcf - path "*.version.txt", emit: version - - script: - def software = getSoftwareName(task.process) - def ioptions = initOptions(options) - def prefix = ioptions.suffix ? "strelka_${meta.id}${ioptions.suffix}" : "strelka_${meta.id}" - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "$ioptions.args" variable - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - options_strelka = params.target_bed ? "--exome --callRegions ${target_bed}" : "" - """ - configureStrelkaGermlineWorkflow.py \ - --bam ${bam} \ - --referenceFasta ${fasta} \ - ${options_strelka} \ - --runDir strelka - - python strelka/runWorkflow.py -m local -j ${task.cpus} - - mv strelka/results/variants/genome.*.vcf.gz ${prefix}_genome.vcf.gz - mv strelka/results/variants/genome.*.vcf.gz.tbi ${prefix}_genome.vcf.gz.tbi - mv strelka/results/variants/variants.vcf.gz ${prefix}_variants.vcf.gz - mv strelka/results/variants/variants.vcf.gz.tbi ${prefix}_variants.vcf.gz.tbi - - echo configureStrelkaGermlineWorkflow.py --version &> ${software}.version.txt #2>&1 - """ -} \ No newline at end of file diff --git a/modules/nf-core/software/strelka/germline/functions.nf b/modules/nf-core/software/strelka/germline/functions.nf new file mode 100644 index 0000000000..da9da093d3 --- /dev/null +++ b/modules/nf-core/software/strelka/germline/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/software/strelka/germline/main.nf b/modules/nf-core/software/strelka/germline/main.nf new file mode 100644 index 0000000000..16282cfad2 --- /dev/null +++ b/modules/nf-core/software/strelka/germline/main.nf @@ -0,0 +1,54 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process STRELKA_GERMLINE { + tag "$meta.id" + label 'process_high' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::strelka=2.9.10" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/strelka:2.9.10--0" + } else { + container "quay.io/biocontainers/strelka:2.9.10--0" + } + + input: + tuple val(meta), path(bam), path(bai) + path fasta + path fai + path target_bed + + output: + tuple val(meta), path("*_variants.vcf.gz"), path("*_variants.vcf.gz.tbi"), emit: vcf + tuple val(meta), path("*_genome.vcf.gz"), path("*_genome.vcf.gz.tbi") , emit: genome_vcf + path "*.version.txt" , emit: version + + script: + def software = getSoftwareName(task.process) + def ioptions = initOptions(options) + def prefix = ioptions.suffix ? "strelka_${meta.id}${ioptions.suffix}" : "strelka_${meta.id}" + def options_strelka = params.target_bed ? "--exome --callRegions ${target_bed}" : "" + """ + configureStrelkaGermlineWorkflow.py \\ + --bam $bam \\ + --referenceFasta $fasta \\ + $options_strelka \\ + $options.args \\ + --runDir strelka + + python strelka/runWorkflow.py -m local -j $task.cpus + + mv strelka/results/variants/genome.*.vcf.gz ${prefix}_genome.vcf.gz + mv strelka/results/variants/genome.*.vcf.gz.tbi ${prefix}_genome.vcf.gz.tbi + mv strelka/results/variants/variants.vcf.gz ${prefix}_variants.vcf.gz + mv strelka/results/variants/variants.vcf.gz.tbi ${prefix}_variants.vcf.gz.tbi + + echo configureStrelkaGermlineWorkflow.py --version &> ${software}.version.txt #2>&1 + """ +} \ No newline at end of file diff --git a/modules/nf-core/software/strelka/germline/meta.yml b/modules/nf-core/software/strelka/germline/meta.yml new file mode 100644 index 0000000000..1ad1885c9a --- /dev/null +++ b/modules/nf-core/software/strelka/germline/meta.yml @@ -0,0 +1,55 @@ +name: strelka_germline +description: Strelka2 is a fast and accurate small variant caller optimized for analysis of germline variation +keywords: + - variantcalling + - germline + - wgs + - vcf + - variants +tools: + - strelka: + description: Strelka calls somatic and germline small variants from mapped sequencing reads + homepage: https://github.com/Illumina/strelka + documentation: https://github.com/Illumina/strelka/blob/v2.9.x/docs/userGuide/README.md + tool_dev_url: https://github.com/Illumina/strelka + doi: 10.1038/s41592-018-0051-x + licence: ['GPL v3'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - bai: + type: file + description: BAM index file + pattern: "*.{bai}" + - target_bed: + type: file + description: An optional bed file + pattern: "*.{bed}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - vcf: + type: file + description: gzipped germline variant file and associated index file + pattern: "(*_variants.vcf.gz, *_variants.vcf.gz.tbi)" + - genome_vcf: + type: file + description: variant records and compressed non-variant blocks and associated index file + pattern: "(*_genome.vcf.gz, _genome.vcf.gz.tbi)" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" +authors: + - "@arontommi" diff --git a/modules/nf-core/software/strelka/somatic.nf b/modules/nf-core/software/strelka/somatic.nf deleted file mode 100644 index 4fd8c00f47..0000000000 --- a/modules/nf-core/software/strelka/somatic.nf +++ /dev/null @@ -1,110 +0,0 @@ -include { initOptions; saveFiles; getSoftwareName } from './../functions' - -params.options = [:] -def options = initOptions(params.options) - -process STRELKA_SOMATIC { - tag "$meta.id" - label 'process_high' - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - - conda (params.enable_conda ? "bioconda::strelka=2.9.10" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/strelka:2.9.10--0" - } else { - container "quay.io/biocontainers/strelka:2.9.10--0" - } - - input: - tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor) - path fasta - path fai - tuple path(target_bed), path(target_bed_tbi) - - output: - tuple val(meta), path("*_somatic_indels.vcf.gz"), path("*_somatic_indels.vcf.gz.tbi"), emit: indels_vcf - tuple val(meta), path("*_somatic_snvs.vcf.gz"), path("*_somatic_snvs.vcf.gz.tbi"), emit: snvs_vcf - path "*.version.txt", emit: version - - script: - def software = getSoftwareName(task.process) - def ioptions = initOptions(options) - def prefix = ioptions.suffix ? "strelka_${meta.id}${ioptions.suffix}" : "strelka_${meta.id}" - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "$ioptions.args" variable - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - options_strelka = params.target_bed ? "--exome --callRegions ${target_bed}" : "" - """ - configureStrelkaSomaticWorkflow.py \ - --tumor ${bam_tumor} \ - --normal ${bam_normal} \ - --referenceFasta ${fasta} \ - ${options_strelka} \ - --runDir strelka - - python strelka/runWorkflow.py -m local -j ${task.cpus} - - mv strelka/results/variants/somatic.indels.vcf.gz ${prefix}_somatic_indels.vcf.gz - mv strelka/results/variants/somatic.indels.vcf.gz.tbi ${prefix}_somatic_indels.vcf.gz.tbi - mv strelka/results/variants/somatic.snvs.vcf.gz ${prefix}_somatic_snvs.vcf.gz - mv strelka/results/variants/somatic.snvs.vcf.gz.tbi ${prefix}_somatic_snvs.vcf.gz.tbi - - echo configureStrelkaSomaticWorkflow.py --version &> ${software}.version.txt #2>&1 - """ -} - -process STRELKA_SOMATIC_BEST_PRACTICES { - tag "${meta.id}" - - label 'CPUS_MAX' - label 'MEMORY_MAX' - - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - - conda (params.enable_conda ? "bioconda::strelka=2.9.10" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/strelka:2.9.10--0" - } else { - container "quay.io/biocontainers/strelka:2.9.10--0" - } - - input: - tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor), path(manta_csi), path(manta_csi_tbi) - path fasta - path fai - tuple path(target_bed), path(target_bed_tbi) - - output: - tuple val(meta), path("*_somatic_indels.vcf.gz"), path("*_somatic_indels.vcf.gz.tbi"), emit: indels_vcf - tuple val(meta), path("*_somatic_snvs.vcf.gz"), path("*_somatic_snvs.vcf.gz.tbi"), emit: snvs_vcf - path "*.version.txt", emit: version - - script: - def software = getSoftwareName(task.process) - def ioptions = initOptions(options) - def prefix = ioptions.suffix ? "strelka_bp_${meta.id}${ioptions.suffix}" : "strelka_bp_${meta.id}" - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "$ioptions.args" variable - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - options_strelka = params.target_bed ? "--exome --callRegions ${target_bed}" : "" - """ - configureStrelkaSomaticWorkflow.py \ - --tumor ${bam_tumor} \ - --normal ${bam_normal} \ - --referenceFasta ${fasta} \ - --indelCandidates ${manta_csi} \ - ${options_strelka} \ - --runDir strelka - - python strelka/runWorkflow.py -m local -j ${task.cpus} - - mv strelka/results/variants/somatic.indels.vcf.gz ${prefix}_somatic_indels.vcf.gz - mv strelka/results/variants/somatic.indels.vcf.gz.tbi ${prefix}_somatic_indels.vcf.gz.tbi - mv strelka/results/variants/somatic.snvs.vcf.gz ${prefix}_somatic_snvs.vcf.gz - mv strelka/results/variants/somatic.snvs.vcf.gz.tbi ${prefix}_somatic_snvs.vcf.gz.tbi - - echo configureStrelkaSomaticWorkflow.py --version &> ${software}.version.txt #2>&1 - """ -} \ No newline at end of file diff --git a/modules/nf-core/software/strelka/somatic/functions.nf b/modules/nf-core/software/strelka/somatic/functions.nf new file mode 100644 index 0000000000..da9da093d3 --- /dev/null +++ b/modules/nf-core/software/strelka/somatic/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/software/strelka/somatic/main.nf b/modules/nf-core/software/strelka/somatic/main.nf new file mode 100644 index 0000000000..e57225fa39 --- /dev/null +++ b/modules/nf-core/software/strelka/somatic/main.nf @@ -0,0 +1,55 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process STRELKA_SOMATIC { + tag "$meta.id" + label 'process_high' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::strelka=2.9.10" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/strelka:2.9.10--0" + } else { + container "quay.io/biocontainers/strelka:2.9.10--0" + } + + input: + tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor) + path fasta + path fai + path target_bed + + output: + tuple val(meta), path("*_somatic_indels.vcf.gz"), path("*_somatic_indels.vcf.gz.tbi"), emit: indels_vcf + tuple val(meta), path("*_somatic_snvs.vcf.gz"), path("*_somatic_snvs.vcf.gz.tbi"), emit: snvs_vcf + path "*.version.txt", emit: version + + script: + def software = getSoftwareName(task.process) + def ioptions = initOptions(options) + def prefix = ioptions.suffix ? "strelka_${meta.id}${ioptions.suffix}" : "strelka_${meta.id}" + def options_strelka = params.target_bed ? "--exome --callRegions ${target_bed}" : "" + """ + configureStrelkaSomaticWorkflow.py \\ + --tumor $bam_tumor \\ + --normal $bam_normal \\ + --referenceFasta $fasta \\ + $options_strelka \\ + $options.args \\ + --runDir strelka + + python strelka/runWorkflow.py -m local -j $task.cpus + + mv strelka/results/variants/somatic.indels.vcf.gz ${prefix}_somatic_indels.vcf.gz + mv strelka/results/variants/somatic.indels.vcf.gz.tbi ${prefix}_somatic_indels.vcf.gz.tbi + mv strelka/results/variants/somatic.snvs.vcf.gz ${prefix}_somatic_snvs.vcf.gz + mv strelka/results/variants/somatic.snvs.vcf.gz.tbi ${prefix}_somatic_snvs.vcf.gz.tbi + + echo configureStrelkaSomaticWorkflow.py --version &> ${software}.version.txt #2>&1 + """ +} \ No newline at end of file diff --git a/modules/nf-core/software/strelka/somatic/meta.yml b/modules/nf-core/software/strelka/somatic/meta.yml new file mode 100644 index 0000000000..5bf5878e1c --- /dev/null +++ b/modules/nf-core/software/strelka/somatic/meta.yml @@ -0,0 +1,55 @@ +name: strelka_somatic +description: Strelka2 is a fast and accurate small variant caller optimized for analysis of somatic variation +keywords: + - variantcalling + - somatic + - wgs + - vcf + - variants +tools: + - strelka: + description: Strelka calls somatic and germline small variants from mapped sequencing reads + homepage: https://github.com/Illumina/strelka + documentation: https://github.com/Illumina/strelka/blob/v2.9.x/docs/userGuide/README.md + tool_dev_url: https://github.com/Illumina/strelka + doi: 10.1038/s41592-018-0051-x + licence: ['GPL v3'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - bai: + type: file + description: BAM index file + pattern: "*.{bai}" + - target_bed: + type: file + description: An optional bed file + pattern: "*.{bed}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - indels_vcf: + type: file + description: gzipped somatic indels file and associated index file + pattern: "(*_somatic_indels.vcf.gz, *_somatic_indels.vcf.gz.tbi)" + - snvs_vcf: + type: file + description: gzipped somatic snvs file and associated index file + pattern: "(*_somatic_snvs.vcf.gz, _somatic_snvs.vcf.gz.tbi)" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" +authors: + - "@arontommi" diff --git a/modules/nf-core/software/strelka/somaticbp/functions.nf b/modules/nf-core/software/strelka/somaticbp/functions.nf new file mode 100644 index 0000000000..da9da093d3 --- /dev/null +++ b/modules/nf-core/software/strelka/somaticbp/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/software/strelka/somaticbp/main.nf b/modules/nf-core/software/strelka/somaticbp/main.nf new file mode 100644 index 0000000000..432481aebc --- /dev/null +++ b/modules/nf-core/software/strelka/somaticbp/main.nf @@ -0,0 +1,56 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process STRELKA_SOMATIC_BEST_PRACTICES { + tag "$meta.id" + label 'process_high' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::strelka=2.9.10" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/strelka:2.9.10--0" + } else { + container "quay.io/biocontainers/strelka:2.9.10--0" + } + + input: + tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor), path(manta_csi), path(manta_csi_tbi) + path fasta + path fai + path target_bed + + output: + tuple val(meta), path("*_somatic_indels.vcf.gz"), path("*_somatic_indels.vcf.gz.tbi"), emit: indels_vcf + tuple val(meta), path("*_somatic_snvs.vcf.gz"), path("*_somatic_snvs.vcf.gz.tbi"), emit: snvs_vcf + path "*.version.txt", emit: version + + script: + def software = getSoftwareName(task.process) + def ioptions = initOptions(options) + def prefix = ioptions.suffix ? "strelka_${meta.id}${ioptions.suffix}" : "strelka_${meta.id}" + def options_strelka = params.target_bed ? "--exome --callRegions ${target_bed}" : "" + """ + configureStrelkaSomaticWorkflow.py \\ + --tumor $bam_tumor \\ + --normal $bam_normal \\ + --referenceFasta $fasta \\ + --indelCandidates $manta_csi \ + $options_strelka \\ + $options.args \\ + --runDir strelka + + python strelka/runWorkflow.py -m local -j $task.cpus + + mv strelka/results/variants/somatic.indels.vcf.gz ${prefix}_somatic_indels.vcf.gz + mv strelka/results/variants/somatic.indels.vcf.gz.tbi ${prefix}_somatic_indels.vcf.gz.tbi + mv strelka/results/variants/somatic.snvs.vcf.gz ${prefix}_somatic_snvs.vcf.gz + mv strelka/results/variants/somatic.snvs.vcf.gz.tbi ${prefix}_somatic_snvs.vcf.gz.tbi + + echo configureStrelkaSomaticWorkflow.py --version &> ${software}.version.txt #2>&1 + """ +} \ No newline at end of file diff --git a/modules/nf-core/software/strelka/somaticbp/meta.yml b/modules/nf-core/software/strelka/somaticbp/meta.yml new file mode 100644 index 0000000000..5bf5878e1c --- /dev/null +++ b/modules/nf-core/software/strelka/somaticbp/meta.yml @@ -0,0 +1,55 @@ +name: strelka_somatic +description: Strelka2 is a fast and accurate small variant caller optimized for analysis of somatic variation +keywords: + - variantcalling + - somatic + - wgs + - vcf + - variants +tools: + - strelka: + description: Strelka calls somatic and germline small variants from mapped sequencing reads + homepage: https://github.com/Illumina/strelka + documentation: https://github.com/Illumina/strelka/blob/v2.9.x/docs/userGuide/README.md + tool_dev_url: https://github.com/Illumina/strelka + doi: 10.1038/s41592-018-0051-x + licence: ['GPL v3'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - bai: + type: file + description: BAM index file + pattern: "*.{bai}" + - target_bed: + type: file + description: An optional bed file + pattern: "*.{bed}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - indels_vcf: + type: file + description: gzipped somatic indels file and associated index file + pattern: "(*_somatic_indels.vcf.gz, *_somatic_indels.vcf.gz.tbi)" + - snvs_vcf: + type: file + description: gzipped somatic snvs file and associated index file + pattern: "(*_somatic_snvs.vcf.gz, _somatic_snvs.vcf.gz.tbi)" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" +authors: + - "@arontommi" diff --git a/modules/nf-core/software/tabix/tabix/functions.nf b/modules/nf-core/software/tabix/tabix/functions.nf index d25eea86b3..da9da093d3 100644 --- a/modules/nf-core/software/tabix/tabix/functions.nf +++ b/modules/nf-core/software/tabix/tabix/functions.nf @@ -1,48 +1,57 @@ -/* - * ----------------------------------------------------- - * Utility functions used in nf-core DSL2 module files - * ----------------------------------------------------- - */ +// +// Utility functions used in nf-core DSL2 module files +// -/* - * Extract name of software tool from process name using $task.process - */ +// +// Extract name of software tool from process name using $task.process +// def getSoftwareName(task_process) { return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() } -/* - * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules - */ +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// def initOptions(Map args) { def Map options = [:] - options.args = args.args ?: '' - options.args2 = args.args2 ?: '' - options.publish_by_id = args.publish_by_id ?: false - options.publish_dir = args.publish_dir ?: '' - options.publish_files = args.publish_files - options.suffix = args.suffix ?: '' + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' return options } -/* - * Tidy up and join elements of a list to return a path string - */ +// +// Tidy up and join elements of a list to return a path string +// def getPathFromList(path_list) { - def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries - paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes return paths.join('/') } -/* - * Function to save/publish module results - */ +// +// Function to save/publish module results +// def saveFiles(Map args) { if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) + def ioptions = initOptions(args.options) def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_id) { - path_list.add(args.publish_id) + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } } if (ioptions.publish_files instanceof Map) { for (ext in ioptions.publish_files) { diff --git a/modules/nf-core/software/tabix/tabix/main.nf b/modules/nf-core/software/tabix/tabix/main.nf index 0416a00ddb..da23f535bc 100644 --- a/modules/nf-core/software/tabix/tabix/main.nf +++ b/modules/nf-core/software/tabix/tabix/main.nf @@ -2,14 +2,14 @@ include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] -def options = initOptions(params.options) +options = initOptions(params.options) process TABIX_TABIX { tag "$meta.id" label 'process_medium' publishDir "${params.outdir}", mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } conda (params.enable_conda ? "bioconda::tabix=0.2.6" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { diff --git a/modules/nf-core/software/tabix/tabix/meta.yml b/modules/nf-core/software/tabix/tabix/meta.yml index 8075184ade..f66270dba5 100644 --- a/modules/nf-core/software/tabix/tabix/meta.yml +++ b/modules/nf-core/software/tabix/tabix/meta.yml @@ -10,27 +10,6 @@ tools: homepage: https://www.htslib.org/doc/tabix.html documentation: https://www.htslib.org/doc/tabix.1.html doi: 10.1093/bioinformatics/btq671 -params: - - outdir: - type: string - description: | - The pipeline's output directory. By default, the module will - output files into `$params.outdir/` - - publish_dir_mode: - type: string - description: | - Value for the Nextflow `publishDir` mode parameter. - Available: symlink, rellink, link, copy, copyNoFollow, move. - - enable_conda: - type: boolean - description: | - Run the module with Conda using the software specified - via the `conda` directive - - singularity_pull_docker_container: - type: boolean - description: | - Instead of directly downloading Singularity images for use with Singularity, - force the workflow to pull and convert Docker containers instead. input: - meta: type: map diff --git a/modules/nf-core/software/trimgalore/functions.nf b/modules/nf-core/software/trimgalore/functions.nf index d25eea86b3..da9da093d3 100644 --- a/modules/nf-core/software/trimgalore/functions.nf +++ b/modules/nf-core/software/trimgalore/functions.nf @@ -1,48 +1,57 @@ -/* - * ----------------------------------------------------- - * Utility functions used in nf-core DSL2 module files - * ----------------------------------------------------- - */ +// +// Utility functions used in nf-core DSL2 module files +// -/* - * Extract name of software tool from process name using $task.process - */ +// +// Extract name of software tool from process name using $task.process +// def getSoftwareName(task_process) { return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() } -/* - * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules - */ +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// def initOptions(Map args) { def Map options = [:] - options.args = args.args ?: '' - options.args2 = args.args2 ?: '' - options.publish_by_id = args.publish_by_id ?: false - options.publish_dir = args.publish_dir ?: '' - options.publish_files = args.publish_files - options.suffix = args.suffix ?: '' + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' return options } -/* - * Tidy up and join elements of a list to return a path string - */ +// +// Tidy up and join elements of a list to return a path string +// def getPathFromList(path_list) { - def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries - paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes return paths.join('/') } -/* - * Function to save/publish module results - */ +// +// Function to save/publish module results +// def saveFiles(Map args) { if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) + def ioptions = initOptions(args.options) def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_id) { - path_list.add(args.publish_id) + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } } if (ioptions.publish_files instanceof Map) { for (ext in ioptions.publish_files) { diff --git a/modules/nf-core/software/trimgalore/main.nf b/modules/nf-core/software/trimgalore/main.nf index 79cc745625..44b36e7148 100644 --- a/modules/nf-core/software/trimgalore/main.nf +++ b/modules/nf-core/software/trimgalore/main.nf @@ -2,14 +2,14 @@ include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] -def options = initOptions(params.options) +options = initOptions(params.options) process TRIMGALORE { tag "$meta.id" label 'process_high' publishDir "${params.outdir}", mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } conda (params.enable_conda ? "bioconda::trim-galore=0.6.6" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { diff --git a/modules/nf-core/software/trimgalore/meta.yml b/modules/nf-core/software/trimgalore/meta.yml index 86ba5cd44a..735387076c 100644 --- a/modules/nf-core/software/trimgalore/meta.yml +++ b/modules/nf-core/software/trimgalore/meta.yml @@ -13,47 +13,6 @@ tools: MspI-digested RRBS-type (Reduced Representation Bisufite-Seq) libraries. homepage: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/ documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md -params: - - outdir: - type: string - description: | - The pipeline's output directory. By default, the module will - output files into `$params.outdir/` - - publish_dir_mode: - type: string - description: | - Value for the Nextflow `publishDir` mode parameter. - Available: symlink, rellink, link, copy, copyNoFollow, move. - - enable_conda: - type: boolean - description: | - Run the module with Conda using the software specified - via the `conda` directive - - singularity_pull_docker_container: - type: boolean - description: | - Instead of directly downloading Singularity images for use with Singularity, - force the workflow to pull and convert Docker containers instead. - - clip_r1: - type: integer - description: | - Instructs Trim Galore to remove bp from the 5' end of read 1 - (or single-end reads) - - clip_r2: - type: integer - description: | - Instructs Trim Galore to remove bp from the 5' end of read 2 - (paired-end reads only) - - three_prime_clip_r1: - type: integer - description: | - Instructs Trim Galore to remove bp from the 3' end of read 1 - AFTER adapter/quality trimming has been performed - - three_prime_clip_r2: - type: integer - description: | - Instructs Trim Galore to re move bp from the 3' end of read 2 - AFTER adapter/quality trimming has been performed input: - meta: type: map diff --git a/nextflow.config b/nextflow.config index e2bcdffca5..4c2fffe4e9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -22,7 +22,7 @@ params { // Main options help = false - no_intervals = null // Intervals will be built from the fasta file + no_intervals = false // Intervals will be built from the fasta file nucleotides_per_second = 1000 // Default interval size sentieon = null // Not using Sentieon by default skip_qc = null // All QC tools are used @@ -44,7 +44,7 @@ params { markdup_java_options = '"-Xms4000m -Xmx7g"' // Established values for markDuplicates memory consumption, see https://github.com/SciLifeLab/Sarek/pull/689 for details use_gatk_spark = false // GATK Spark implementation of their tools in local mode not used by default save_bam_mapped = null // Mapped BAMs not saved - skip_markduplicates = null // Do not skip markDuplicates by default + skip_markduplicates = false // Do not skip markDuplicates by default // Variant Calling ascat_ploidy = null // Use default value @@ -52,7 +52,7 @@ params { cf_coeff = 0.05 // default value for Control-FREEC cf_contamination = null // by default not specified in Control-FREEC cf_contamination_adjustment = null // by default we are not using this in Control-FREEC - cf_ploidy = "2" // you can use 2,3,4 + cf_ploidy = 2 // you can use 2,3,4 cf_window = null // by default we are not using this in Control-FREEC generate_gvcf = null // g.vcf are not produced by HaplotypeCaller by default no_strelka_bp = null // Strelka will use Manta candidateSmallIndels if available @@ -76,6 +76,7 @@ params { vep_cache = null // No directory for VEP cache // Custom config + config_profile_name = null config_profile_contact = false config_profile_description = false config_profile_url = false @@ -88,18 +89,14 @@ params { sequencing_center = null // No sequencing center to be written in BAM header in MapReads process multiqc_config = false monochrome_logs = false // Monochrome logs disabled - email = false // No default email + email = "" // No default email email_on_fail = false plaintext_email = false // Plaintext email disabled max_multiqc_email_size = 25.MB hostnames = false - config_profile_name = null - config_profile_description = false - config_profile_contact = false - config_profile_url = false validate_params = true show_hidden_params = false - schema_ignore_params = 'genomes,input_paths' + schema_ignore_params = 'genomes,modules' tracedir = "${params.outdir}/pipeline_info" // Modules @@ -112,14 +109,8 @@ params { max_cpus = 16 max_memory = 128.GB max_time = 240.h - single_cpu_mem = 7.GB } -// Container slug -// Stable releases should specify release tag (ie: `dsl2`) -// Developmental code should specify dev -// process.container = 'nfcore/sarek:dsl2' - // Load base.config by default for all pipelines includeConfig 'conf/base.config' @@ -141,69 +132,54 @@ try { } profiles { - conda { + charliecloud { + charliecloud.enabled = true docker.enabled = false - singularity.enabled = false podman.enabled = false shifter.enabled = false - charliecloud.enabled = false + singularity.enabled = false + } + conda { process.conda = "$projectDir/environment.yml" + charliecloud.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + singularity.enabled = false } debug { process.beforeScript = 'echo $HOSTNAME' } docker { docker.enabled = true - singularity.enabled = false + docker.userEmulation = true + charliecloud.enabled = false podman.enabled = false shifter.enabled = false - charliecloud.enabled = false - // Avoid this error: - // WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap. - // Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351 - // once this is established and works well, nextflow might implement this behavior as new default. - docker.runOptions = '-u \$(id -u):\$(id -g)' + singularity.enabled = false } - singularity { + podman { + podman.enabled = true + charliecloud.enabled = false docker.enabled = false - singularity.enabled = true - podman.enabled = false shifter.enabled = false - charliecloud.enabled = false + singularity.enabled = false + } + singularity { singularity.autoMounts = true singularity.enabled = true - params.enable_conda = false - } - podman { - singularity.enabled = false + charliecloud.enabled = false docker.enabled = false - podman.enabled = true + podman.enabled = false shifter.enabled = false - charliecloud.enabled = false } shifter { - singularity.enabled = false - docker.enabled = false - podman.enabled = false shifter.enabled = true charliecloud.enabled = false - } - charliecloud { - singularity.enabled = false docker.enabled = false podman.enabled = false - shifter.enabled = false - charliecloud.enabled = true + singularity.enabled = false } - test { includeConfig 'conf/test.config' } - test_annotation { includeConfig 'conf/test_annotation.config' } - test_haplotypecaller { includeConfig 'conf/test_germline_variantcalling.config' } - test_pair { includeConfig 'conf/test_pair.config' } - test_split_fastq { includeConfig 'conf/test_split_fastq.config' } - test_targeted { includeConfig 'conf/test_targeted.config' } - test_tool { includeConfig 'conf/test_tool.config' } - test_trimming { includeConfig 'conf/test_trimming.config' } - test_umi_tso { includeConfig 'conf/test_umi_tso.config' } - test_use_gatk_spark { includeConfig 'conf/test_use_gatk_spark.config' } - test_umi_qiaseq { includeConfig 'conf/test_umi_qiaseq.config' } + test { includeConfig 'conf/test.config' } + test_full { includeConfig 'conf/test_full.config' } } // Load genomes.config or igenomes.config diff --git a/nextflow_schema.json b/nextflow_schema.json index 81a46f457d..8dc16093a9 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -26,18 +26,13 @@ "default": "mapping", "fa_icon": "fas fa-play", "description": "Starting step.", - "help_text": "Only one step.\n> **NB** step can be specified with no concern for case, or the presence of `-` or `_`\n", + "help_text": "Only one step", "enum": [ "mapping", "prepare_recalibration", "recalibrate", "variant_calling", "annotate", - "Control-FREEC", - "ControlFREEC", - "preparerecalibration", - "variantCalling", - "variantcalling", "controlfreec" ] }, @@ -58,41 +53,26 @@ "properties": { "tools": { "type": "string", - "default": "null", + "default": null, "fa_icon": "fas fa-toolbox", "description": "Tools to use for variant calling and/or for annotation.", - "help_text": "Multiple separated with commas.\n\nGermline variant calling can currently only be performed with the following variant callers:\n- FreeBayes, HaplotypeCaller, Manta, mpileup, Strelka, TIDDIT\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- ASCAT, Control-FREEC, FreeBayes, Manta, MSIsensor, Mutect2, Strelka\n\nTumor-only somatic variant calling can currently only be performed with the following variant callers:\n- Control-FREEC, Manta, mpileup, Mutect2, TIDDIT\n\nAnnotation is done using snpEff, VEP, or even both consecutively.\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted.\n\n\n\n`DNAseq`, `DNAscope` and `TNscope` are only available with `--sentieon`\n\n> **NB** tools can be specified with no concern for case, or the presence of `-` or `_`\n", + "help_text": "Multiple separated with commas.\n\nGermline variant calling can currently only be performed with the following variant callers:\n- FreeBayes, HaplotypeCaller, Manta, mpileup, Strelka, TIDDIT\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- ASCAT, Control-FREEC, FreeBayes, Manta, MSIsensorpro, Mutect2, Strelka\n\nTumor-only somatic variant calling can currently only be performed with the following variant callers:\n- Control-FREEC, Manta, mpileup, Mutect2, TIDDIT\n\nAnnotation is done using snpEff, VEP, or even both consecutively.\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted.\n\n\n\n`DNAseq`, `DNAscope` and `TNscope` are only available with `--sentieon`\n\n> **NB** tools can be specified with no concern for case, or the presence of `-` or `_`\n", "list": [ - "null", - "ASCAT", - "CNVkit", - "ControlFREEC", - "FreeBayes", - "HaplotypeCaller", - "Manta", - "mpileup", - "MSIsensor", - "Mutect2", - "Strelka", - "TIDDIT", - "snpEff", - "VEP", - "merge", - "DNAseq", - "DNAscope", - "TNscope", + null, "ascat", "cnvkit", "controlfreec", "freebayes", "haplotypecaller", "manta", - "msisensor", + "mpileup", + "msisensorpro", "mutect2", "strelka", "tiddit", "snpeff", "vep", + "merge", "dnaseq", "dnascope", "tnscope" @@ -119,24 +99,23 @@ }, "skip_qc": { "type": "string", + "default": null, "fa_icon": "fas fa-forward", "description": "Disable specified QC and Reporting tools.", "help_text": "Multiple tools can be specified, separated by commas.\n\n> **NB** `--skip_qc BaseRecalibrator` is actually just not saving the reports.\n> **NB** `--skip_qc MarkDuplicates` does not skip `MarkDuplicates` but prevent the collection of duplicate metrics that slows down performance.\n> **NB** tools can be specified with no concern for case, or the presence of `-` or `_`", "list": [ - "null", - "all", - "bamQC", - "BaseRecalibrator", - "BCFtools", - "Documentation", - "FastQC", - "MarkDuplicates", - "MultiQC", + null, + "bamqc", + "baserecalibrator", + "bcftools", + "documentation", + "fastqc", + "markduplicates", + "multiqc", "samtools", "vcftools", "versions" - ], - "default": "null" + ] }, "target_bed": { "type": "string", @@ -163,6 +142,7 @@ }, "clip_r1": { "type": "integer", + "default": 0, "fa_icon": "fas fa-cut", "description": "Remove bp from the 5' end of read 1.", "help_text": "This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end.\n", @@ -170,6 +150,7 @@ }, "clip_r2": { "type": "integer", + "default": 0, "description": "Remove bp from the 5' end of read 2.", "help_text": "This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end.\n", "fa_icon": "fas fa-cut", @@ -177,6 +158,7 @@ }, "three_prime_clip_r1": { "type": "integer", + "default": 0, "fa_icon": "fas fa-cut", "description": "Remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed.", "help_text": "This may remove some unwanted bias from the 3' end that is not directly related to adapter sequence or basecall quality.\n", @@ -184,6 +166,7 @@ }, "three_prime_clip_r2": { "type": "integer", + "default": 0, "fa_icon": "fas fa-cut", "description": "Remove bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed.", "help_text": "This may remove some unwanted bias from the 3' end that is not directly related to adapter sequence or basecall quality.\n", @@ -191,6 +174,7 @@ }, "trim_nextseq": { "type": "integer", + "default": 0, "fa_icon": "fas fa-cut", "description": "Apply the --nextseq=X option, to trim based on quality after removing poly-G tails.", "help_text": "This may remove some unwanted bias from the 3' end that is not directly related to adapter sequence or basecall quality.", @@ -232,7 +216,7 @@ }, "markdup_java_options": { "type": "string", - "default": "-Xms4000m -Xmx7g", + "default": "\"-Xms4000m -Xmx7g\"", "fa_icon": "fas fa-memory", "description": "Establish values for GATK MarkDuplicates memory consumption", "help_text": "See [SciLifeLab/Sarek/pull/689](https://github.com/SciLifeLab/Sarek/pull/689)", @@ -240,6 +224,7 @@ }, "use_gatk_spark": { "type": "boolean", + "default": false, "fa_icon": "fas fa-forward", "description": "Enable usage of GATK Spark implementation" }, @@ -250,6 +235,7 @@ }, "skip_markduplicates": { "type": "boolean", + "default": false, "fa_icon": "fas fa-fast-forward", "description": "Skip GATK MarkDuplicates", "help_text": "This params will also save the mapped BAMS, to enable restart from step `prepare_recalibration`" @@ -265,14 +251,14 @@ "properties": { "ascat_ploidy": { "type": "string", - "default": "null", + "default": null, "fa_icon": "fas fa-wrench", "description": "Overwrite ASCAT ploidy", "help_text": "Requires that `--ascat_purity` is set" }, "ascat_purity": { "type": "string", - "default": "null", + "default": null, "fa_icon": "fas fa-wrench", "description": "Overwrite ASCAT purity", "help_text": "Requires that `--ascat_ploidy` is set" @@ -290,13 +276,13 @@ }, "cf_contamination": { "type": "string", - "default": "null", + "default": null, "fa_icon": "fas fa-wrench", "description": "Design known contamination value for Control-FREEC" }, "cf_ploidy": { - "type": "string", - "default": "2", + "type": "integer", + "default": 2, "fa_icon": "fas fa-wrench", "description": "Overwrite Control-FREEC ploidy" }, @@ -343,14 +329,14 @@ }, "read_structure1": { "type": "string", - "default": "null", + "default": null, "fa_icon": "fas fa-clipboard-list", "description": "When processing UMIs, a read structure should always be provided for each of the fastq files.", "help_text": "If the read does not contain any UMI, the structure will be +T (i.e. only template of any length).\nThe read structure follows a format adopted by different tools and described in the [fgbio documentation](https://github.com/fulcrumgenomics/fgbio/wiki/Read-Structures)" }, "read_structure2": { "type": "string", - "default": "null", + "default": null, "fa_icon": "fas fa-clipboard-list", "description": "When processing UMIs, a read structure should always be provided for each of the fastq files.", "help_text": "If the read does not contain any UMI, the structure will be +T (i.e. only template of any length).\nThe read structure follows a format adopted by different tools and described in the [fgbio documentation](https://github.com/fulcrumgenomics/fgbio/wiki/Read-Structures)" @@ -366,17 +352,12 @@ "properties": { "annotate_tools": { "type": "string", - "default": "null", + "default": null, "fa_icon": "fas fa-hammer", "description": "Specify from which tools Sarek should look for VCF files to annotate", "help_text": "Only for step `annotate`", "list": [ - "null", - "HaplotypeCaller", - "Manta", - "Mutect2", - "Strelka", - "TIDDIT", + null, "haplotypecaller", "manta", "mutect2", @@ -387,6 +368,7 @@ }, "annotation_cache": { "type": "boolean", + "default": null, "fa_icon": "fas fa-database", "description": "Enable the use of cache for annotation", "help_text": "And disable usage of Sarek snpeff and vep specific containers for annotation\n\nTo be used with `--snpeff_cache` and/or `--vep_cache`", @@ -400,28 +382,28 @@ }, "cadd_indels": { "type": "string", - "default": "null", + "default": null, "fa_icon": "fas fa-file", "description": "Path to CADD InDels file.", "hidden": true }, "cadd_indels_tbi": { "type": "string", - "default": "null", + "default": null, "fa_icon": "fas fa-file", "description": "Path to CADD InDels index.", "hidden": true }, "cadd_wg_snvs": { "type": "string", - "default": "null", + "default": null, "fa_icon": "fas fa-file", "description": "Path to CADD SNVs file.", "hidden": true }, "cadd_wg_snvs_tbi": { "type": "string", - "default": "null", + "default": null, "fa_icon": "fas fa-file", "description": "Path to CADD SNVs index.", "hidden": true @@ -434,7 +416,7 @@ }, "snpeff_cache": { "type": "string", - "default": "null", + "default": null, "fa_icon": "fas fa-database", "description": "Path to snpEff cache", "help_text": "To be used with `--annotation_cache`", @@ -442,7 +424,7 @@ }, "vep_cache": { "type": "string", - "default": "null", + "default": null, "fa_icon": "fas fa-database", "description": "Path to VEP cache", "help_text": "To be used with `--annotation_cache`", @@ -682,7 +664,7 @@ }, "sequencing_center": { "type": "string", - "default": "null", + "default": null, "fa_icon": "fas fa-university", "description": "Name of sequencing center to be displayed in BAM file", "help_text": "It will be in the CN field", @@ -710,13 +692,6 @@ "fa_icon": "fas fa-microchip", "help_text": "Should be an integer e.g. `--cpus 7`" }, - "single_cpu_mem": { - "type": "string", - "default": "7 GB", - "fa_icon": "fas fa-sd-card", - "description": "Use to set memory for a single CPU.", - "help_text": "Should be a string in the format integer-unit eg. `--single_cpu_mem '8.GB'`" - }, "max_cpus": { "type": "integer", "description": "Maximum number of CPUs that can be requested for any single job.", diff --git a/scripts/filter_locifile.py b/scripts/filter_locifile.py deleted file mode 100755 index 6f34f1437e..0000000000 --- a/scripts/filter_locifile.py +++ /dev/null @@ -1,25 +0,0 @@ -#! /usr/bin/env python - -import sys, re, math, random - -#VCF file whould be downloaded from ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp//release/20130502/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5b.20130502.sites.vcf.gz - -vcffile = "ALL.wgs.phase3_shapeit2_mvncall_integrated_v5b.20130502.sites.vcf" - -outfile = "1000Genomes_20130502_SNP_maf0.3.vcf" -of=open(outfile, 'w') -for line in open(vcffile, 'r'): - line=line.strip() - if not line.startswith("#"): - pma=re.compile('MULTI_ALLELIC') - ma=pma.search(line) - if not (ma): - psnp=re.compile('VT=SNP') - snp=psnp.search(line) - if(snp): - info=line.split("\t")[7] - af_info=info.split(";")[1] - fq=float(af_info.split("=")[1]) - if fq > 0.3: - of.write("%s\t%s\n" %(line.split("\t")[0], line.split("\t")[1])) - diff --git a/subworkflow/local/mapping.nf b/subworkflow/local/mapping.nf deleted file mode 100644 index 2b4da42f95..0000000000 --- a/subworkflow/local/mapping.nf +++ /dev/null @@ -1,186 +0,0 @@ -/* -================================================================================ - MAPPING -================================================================================ -*/ - -params.bwamem1_mem_options = [:] -params.bwamem1_mem_tumor_options = [:] -params.bwamem2_mem_options = [:] -params.bwamem2_mem_tumor_options = [:] -params.merge_bam_options = [:] -params.qualimap_bamqc_options = [:] -params.samtools_index_options = [:] -params.samtools_stats_options = [:] - -include { BWA_MEM as BWAMEM1_MEM } from '../../modules/nf-core/software/bwa/mem/main' addParams(options: params.bwamem1_mem_options) -include { BWA_MEM as BWAMEM1_MEM_T } from '../../modules/nf-core/software/bwa/mem/main' addParams(options: params.bwamem1_mem_tumor_options) -include { BWAMEM2_MEM } from '../../modules/nf-core/software/bwamem2/mem/main.nf' addParams(options: params.bwamem2_mem_options) -include { BWAMEM2_MEM as BWAMEM2_MEM_T } from '../../modules/nf-core/software/bwamem2/mem/main.nf' addParams(options: params.bwamem2_mem_tumor_options) -include { SAMTOOLS_MERGE } from '../../modules/nf-core/software/samtools/merge/main' addParams(options: params.merge_bam_options) -include { QUALIMAP_BAMQC } from '../../modules/nf-core/software/qualimap_bamqc' addParams(options: params.qualimap_bamqc_options) -include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams(options: params.samtools_index_options) -include { SAMTOOLS_STATS } from '../../modules/nf-core/software/samtools/stats/main' addParams(options: params.samtools_stats_options) - -workflow MAPPING { - take: - skip_bamqc // boolean: true/false - skip_samtools // boolean: true/false - bwa // channel: [mandatory] bwa - fai // channel: [mandatory] fai - fasta // channel: [mandatory] fasta - reads_input // channel: [mandatory] reads_input - save_bam_mapped // boolean: true/false - step // value: [mandatory] starting step - target_bed // channel: [optional] target_bed - - main: - - bam_mapped_index = Channel.empty() - bam_reports = Channel.empty() - - reads_input.groupTuple(by: [0,1]) - .branch{ - normal: it[0].status == 0 - tumor: it[0].status == 1 - }.set{ reads_input_status } - - if (step == "mapping") { - bam_bwamem1 = Channel.empty() - bam_bwamem2 = Channel.empty() - - if (params.aligner == "bwa-mem") { - BWAMEM1_MEM(reads_input_status.normal, bwa) - bam_bwamem1_n = BWAMEM1_MEM.out.bam - - BWAMEM1_MEM_T(reads_input_status.tumor, bwa) - bam_bwamem1_t = BWAMEM1_MEM_T.out.bam - - bam_bwamem1 = bam_bwamem1_n.mix(bam_bwamem1_t) - } else { - BWAMEM2_MEM(reads_input_status.normal, bwa) - bam_bwamem2_n = BWAMEM2_MEM.out.bam - - BWAMEM2_MEM_T(reads_input_status.tumor, bwa) - bam_bwamem2_t = BWAMEM2_MEM_T.out.bam - - bam_bwamem2 = bam_bwamem2_n.mix(bam_bwamem2_t) - } - - bam_bwa = bam_bwamem1.mix(bam_bwamem2) - - bam_bwa.map{ meta, bam -> - patient = meta.patient - sample = meta.sample - gender = meta.gender - status = meta.status - [patient, sample, gender, status, bam] - }.groupTuple(by: [0,1]) - .branch{ - single: it[4].size() == 1 - multiple: it[4].size() > 1 - }.set{ bam_bwa_to_sort } - - bam_bwa_single = bam_bwa_to_sort.single.map { - patient, sample, gender, status, bam -> - - def meta = [:] - meta.patient = patient - meta.sample = sample - meta.gender = gender[0] - meta.status = status[0] - meta.id = sample - - [meta, bam[0]] - } - - bam_bwa_multiple = bam_bwa_to_sort.multiple.map { - patient, sample, gender, status, bam -> - - def meta = [:] - meta.patient = patient - meta.sample = sample - meta.gender = gender[0] - meta.status = status[0] - meta.id = sample - - [meta, bam] - } - - // STEP 1.5: MERGING AND INDEXING BAM FROM MULTIPLE LANES - - SAMTOOLS_MERGE(bam_bwa_multiple) - bam_mapped = bam_bwa_single.mix(SAMTOOLS_MERGE.out.merged_bam) - - SAMTOOLS_INDEX(bam_mapped) - bam_mapped_index = bam_mapped.join(SAMTOOLS_INDEX.out.bai) - - qualimap_bamqc = Channel.empty() - samtools_stats = Channel.empty() - - if (!skip_bamqc) { - QUALIMAP_BAMQC(bam_mapped, target_bed) - qualimap_bamqc = QUALIMAP_BAMQC.out - } - - if (!skip_samtools) { - SAMTOOLS_STATS(bam_mapped_index) - samtools_stats = SAMTOOLS_STATS.out.stats - } - - bam_reports = samtools_stats.mix(qualimap_bamqc) - - if (save_bam_mapped) { - tsv_bam_mapped = bam_mapped.map { meta, bam -> [meta] } - // Creating TSV files to restart from this step - tsv_bam_mapped.collectFile(storeDir: "${params.outdir}/preprocessing/tsv") { meta -> - patient = meta.patient[0] - sample = meta.sample[0] - gender = meta.gender[0] - status = meta.status[0] - bam = "${params.outdir}/preprocessing/${sample}/mapped/${sample}.bam" - bai = "${params.outdir}/preprocessing/${sample}/mapped/${sample}.bam.bai" - ["mapped_${sample}.tsv", "${patient}\t${gender}\t${status}\t${sample}\t${bam}\t${bai}\n"] - } - - tsv_bam_mapped.map { meta -> - patient = meta.patient[0] - sample = meta.sample[0] - gender = meta.gender[0] - status = meta.status[0] - bam = "${params.outdir}/preprocessing/${sample}/mapped/${sample}.bam" - bai = "${params.outdir}/preprocessing/${sample}/mapped/${sample}.bam.bai" - "${patient}\t${gender}\t${status}\t${sample}\t${bam}\t${bai}\n" - }.collectFile(name: "mapped.tsv", sort: true, storeDir: "${params.outdir}/preprocessing/tsv") - } - if (params.skip_markduplicates) { - tsv_bam_mapped = bam_mapped.map { meta, bam -> [meta] } - // Creating TSV files to restart from this step - tsv_bam_mapped.collectFile(storeDir: "${params.outdir}/preprocessing/tsv") { meta -> - patient = meta.patient[0] - sample = meta.sample[0] - gender = meta.gender[0] - status = meta.status[0] - bam = "${params.outdir}/preprocessing/${sample}/mapped/${sample}.bam" - bai = "${params.outdir}/preprocessing/${sample}/mapped/${sample}.bam.bai" - table = "${params.outdir}/preprocessing/${sample}/mapped/${sample}.recal.table" - ["mapped_no_markduplicates_${sample}.tsv", "${patient}\t${gender}\t${status}\t${sample}\t${bam}\t${bai}\t${table}\n"] - } - - tsv_bam_mapped.map { meta -> - patient = meta.patient[0] - sample = meta.sample[0] - gender = meta.gender[0] - status = meta.status[0] - bam = "${params.outdir}/preprocessing/${sample}/mapped/${sample}.bam" - bai = "${params.outdir}/preprocessing/${sample}/mapped/${sample}.bam.bai" - table = "${params.outdir}/preprocessing/${sample}/mapped/${sample}.recal.table" - "${patient}\t${gender}\t${status}\t${sample}\t${bam}\t${bai}\t${table}\n" - }.collectFile(name: 'mapped_no_markduplicates.tsv', sort: true, storeDir: "${params.outdir}/preprocessing/tsv") - } - } - - emit: - bam = bam_mapped_index - qc = bam_reports -} diff --git a/subworkflow/local/markduplicates.nf b/subworkflow/local/markduplicates.nf deleted file mode 100644 index 379cc3e321..0000000000 --- a/subworkflow/local/markduplicates.nf +++ /dev/null @@ -1,58 +0,0 @@ -/* -================================================================================ - MARKDUPLICATES -================================================================================ -*/ - -params.markduplicates_options = [:] - -include { GATK4_MARKDUPLICATES } from '../../modules/nf-core/software/gatk4/markduplicates' addParams(options: params.markduplicates_options) -include { GATK4_MARKDUPLICATES_SPARK } from '../../modules/nf-core/software/gatk4/markduplicates' addParams(options: params.markduplicates_options) - -workflow MARKDUPLICATES { - take: - bam_mapped // channel: [mandatory] bam_mapped - step // value: [mandatory] starting step - - main: - - bam_markduplicates = bam_mapped - report_markduplicates = Channel.empty() - - if (params.use_gatk_spark) { - GATK4_MARKDUPLICATES_SPARK(bam_mapped) - report_markduplicates = GATK4_MARKDUPLICATES_SPARK.out.report - bam_markduplicates = GATK4_MARKDUPLICATES_SPARK.out.bam - } else { - GATK4_MARKDUPLICATES(bam_mapped) - report_markduplicates = GATK4_MARKDUPLICATES.out.report - bam_markduplicates = GATK4_MARKDUPLICATES.out.bam - } - - // Creating TSV files to restart from this step - bam_markduplicates.collectFile(storeDir: "${params.outdir}/preprocessing/tsv") { meta, bam, bai -> - patient = meta.patient - sample = meta.sample - gender = meta.gender - status = meta.status - bam = "${params.outdir}/preprocessing/${sample}/markduplicates/${sample}.md.bam" - bai = "${params.outdir}/preprocessing/${sample}/markduplicates/${sample}.md.bam.bai" - table = "${params.outdir}/preprocessing/${sample}/markduplicates/${sample}.recal.table" - ["markduplicates_${sample}.tsv", "${patient}\t${gender}\t${status}\t${sample}\t${bam}\t${bai}\t${table}\n"] - } - - bam_markduplicates.map { meta, bam, bai -> - patient = meta.patient - sample = meta.sample - gender = meta.gender - status = meta.status - bam = "${params.outdir}/preprocessing/${sample}/markduplicates/${sample}.md.bam" - bai = "${params.outdir}/preprocessing/${sample}/markduplicates/${sample}.md.bam.bai" - table = "${params.outdir}/preprocessing/${sample}/markduplicates/${sample}.recal.table" - "${patient}\t${gender}\t${status}\t${sample}\t${bam}\t${bai}\t${table}\n" - }.collectFile(name: 'markduplicates.tsv', sort: true, storeDir: "${params.outdir}/preprocessing/tsv") - - emit: - bam = bam_markduplicates - report = report_markduplicates -} diff --git a/subworkflow/local/prepare_recalibration.nf b/subworkflow/local/prepare_recalibration.nf deleted file mode 100644 index 7a92611338..0000000000 --- a/subworkflow/local/prepare_recalibration.nf +++ /dev/null @@ -1,87 +0,0 @@ -/* -================================================================================ - PREPARE RECALIBRATION -================================================================================ -*/ - -params.baserecalibrator_options = [:] -params.gatherbqsrreports_options = [:] - -include { GATK4_BASERECALIBRATOR as BASERECALIBRATOR } from '../../modules/nf-core/software/gatk4/baserecalibrator' addParams(options: params.baserecalibrator_options) -include { GATK4_GATHERBQSRREPORTS as GATHERBQSRREPORTS } from '../../modules/nf-core/software/gatk4/gatherbqsrreports' addParams(options: params.gatherbqsrreports_options) - -workflow PREPARE_RECALIBRATION { - take: - bam_markduplicates // channel: [mandatory] bam_markduplicates - dbsnp // channel: [optional] dbsnp - dbsnp_tbi // channel: [optional] dbsnp_tbi - dict // channel: [mandatory] dict - fai // channel: [mandatory] fai - fasta // channel: [mandatory] fasta - intervals // channel: [mandatory] intervals - known_indels // channel: [optional] known_indels - known_indels_tbi // channel: [optional] known_indels_tbi - step // value: [mandatory] starting step - - main: - - bam_baserecalibrator = bam_markduplicates.combine(intervals) - table_bqsr = Channel.empty() - - if (step in ["mapping", "preparerecalibration"]) { - - BASERECALIBRATOR(bam_baserecalibrator, dbsnp, dbsnp_tbi, dict, fai, fasta, known_indels, known_indels_tbi) - table_bqsr = BASERECALIBRATOR.out.report - - // STEP 3.5: MERGING RECALIBRATION TABLES - if (!params.no_intervals) { - BASERECALIBRATOR.out.report.map{ meta, table -> - patient = meta.patient - sample = meta.sample - gender = meta.gender - status = meta.status - [patient, sample, gender, status, table] - }.groupTuple(by: [0,1]).set{ recaltable } - - recaltable = recaltable.map { - patient, sample, gender, status, recal -> - - def meta = [:] - meta.patient = patient - meta.sample = sample - meta.gender = gender[0] - meta.status = status[0] - meta.id = sample - - [meta, recal] - } - - GATHERBQSRREPORTS(recaltable) - table_bqsr = GATHERBQSRREPORTS.out.table - } - - // Creating TSV files to restart from this step - table_bqsr.collectFile(storeDir: "${params.outdir}/preprocessing/tsv") { meta, table -> - patient = meta.patient - sample = meta.sample - gender = meta.gender - status = meta.status - bam = "${params.outdir}/preprocessing/${sample}/markduplicates/${sample}.md.bam" - bai = "${params.outdir}/preprocessing/${sample}/markduplicates/${sample}.md.bam.bai" - ["markduplicates_no_table_${sample}.tsv", "${patient}\t${gender}\t${status}\t${sample}\t${bam}\t${bai}\n"] - } - - table_bqsr.map { meta, table -> - patient = meta.patient - sample = meta.sample - gender = meta.gender - status = meta.status - bam = "${params.outdir}/preprocessing/${sample}/markduplicates/${sample}.md.bam" - bai = "${params.outdir}/preprocessing/${sample}/markduplicates/${sample}.md.bam.bai" - "${patient}\t${gender}\t${status}\t${sample}\t${bam}\t${bai}\n" - }.collectFile(name: 'markduplicates_no_table.tsv', sort: true, storeDir: "${params.outdir}/preprocessing/tsv") - } - - emit: - table_bqsr = table_bqsr -} diff --git a/subworkflow/local/recalibrate.nf b/subworkflow/local/recalibrate.nf deleted file mode 100644 index e470f46086..0000000000 --- a/subworkflow/local/recalibrate.nf +++ /dev/null @@ -1,124 +0,0 @@ -/* -================================================================================ - RECALIBRATE -================================================================================ -*/ - -params.applybqsr_options = [:] -params.merge_bam_options = [:] -params.qualimap_bamqc_options = [:] -params.samtools_index_options = [:] -params.samtools_stats_options = [:] - -include { GATK4_APPLYBQSR as APPLYBQSR } from '../../modules/nf-core/software/gatk4/applybqsr' addParams(options: params.applybqsr_options) -include { QUALIMAP_BAMQC } from '../../modules/nf-core/software/qualimap_bamqc' addParams(options: params.qualimap_bamqc_options) -include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams(options: params.samtools_index_options) -include { SAMTOOLS_MERGE } from '../../modules/nf-core/software/samtools/merge/main' addParams(options: params.merge_bam_options) -include { SAMTOOLS_STATS } from '../../modules/nf-core/software/samtools/stats/main' addParams(options: params.samtools_stats_options) - -workflow RECALIBRATE { - take: - skip_bamqc // boolean: true/false - skip_samtools // boolean: true/false - bam // channel: [mandatory] bam - dict // channel: [mandatory] dict - fai // channel: [mandatory] fai - fasta // channel: [mandatory] fasta - intervals // channel: [mandatory] intervals - step // value: [mandatory] starting step - target_bed // channel: [optional] target_bed - - main: - - bam_recalibrated_index = Channel.empty() - bam_recalibrated = Channel.empty() - bam_reports = Channel.empty() - - if (step in ["mapping", "preparerecalibration", "recalibrate"]) { - - bam_intervals = bam.combine(intervals) - - APPLYBQSR(bam_intervals, dict, fasta, fai) - - // STEP 4.5: MERGING AND INDEXING THE RECALIBRATED BAM FILES - if (params.no_intervals) { - bam_recalibrated = APPLYBQSR.out.bam - } else { - APPLYBQSR.out.bam.map{ meta, bam -> //, bai -> - patient = meta.patient - sample = meta.sample - gender = meta.gender - status = meta.status - [patient, sample, gender, status, bam] //, bai] - }.groupTuple(by: [0,1]).set{ bam_recalibrated_interval } - - bam_recalibrated_interval = bam_recalibrated_interval.map { - patient, sample, gender, status, bam -> //, bai -> - - def meta = [:] - meta.patient = patient - meta.sample = sample - meta.gender = gender[0] - meta.status = status[0] - meta.id = sample - - [meta, bam] - } - - SAMTOOLS_MERGE(bam_recalibrated_interval) - bam_recalibrated = SAMTOOLS_MERGE.out.merged_bam - } - - SAMTOOLS_INDEX(bam_recalibrated) - bam_recalibrated_index = bam_recalibrated.join(SAMTOOLS_INDEX.out.bai) - - qualimap_bamqc = Channel.empty() - samtools_stats = Channel.empty() - - if (!skip_bamqc) { - QUALIMAP_BAMQC(bam_recalibrated, target_bed) - qualimap_bamqc = QUALIMAP_BAMQC.out - } - - if (!skip_samtools) { - SAMTOOLS_STATS(bam_recalibrated_index) - samtools_stats = SAMTOOLS_STATS.out.stats - } - - bam_reports = samtools_stats.mix(qualimap_bamqc) - - //TODO: set bam_recalibrated with all these steps - // // When using sentieon for mapping, Channel bam_recalibrated is bam_sentieon_recal - // if (params.sentieon && step == 'mapping') bam_recalibrated = bam_sentieon_recal - - // // When no knownIndels for mapping, Channel bam_recalibrated is bam_duplicates_marked - // if (!params.known_indels && step == 'mapping') bam_recalibrated = bam_duplicates_marked - - // // When starting with variant calling, Channel bam_recalibrated is input_sample - // if (step == 'variantcalling') bam_recalibrated = input_sample - // Creating TSV files to restart from this step - bam_recalibrated_index.collectFile(storeDir: "${params.outdir}/preprocessing/tsv") { meta, bam, bai -> - patient = meta.patient - sample = meta.sample - gender = meta.gender - status = meta.status - bam = "${params.outdir}/preprocessing/${sample}/recalibrated/${sample}.recal.bam" - bai = "${params.outdir}/preprocessing/${sample}/recalibrated/${sample}.recal.bam.bai" - ["recalibrated_${sample}.tsv", "${patient}\t${gender}\t${status}\t${sample}\t${bam}\t${bai}\n"] - } - - bam_recalibrated_index.map { meta, bam, bai -> - patient = meta.patient - sample = meta.sample - gender = meta.gender - status = meta.status - bam = "${params.outdir}/preprocessing/${sample}/recalibrated/${sample}.recal.bam" - bai = "${params.outdir}/preprocessing/${sample}/recalibrated/${sample}.recal.bam.bai" - "${patient}\t${gender}\t${status}\t${sample}\t${bam}\t${bai}\n" - }.collectFile(name: 'recalibrated.tsv', sort: true, storeDir: "${params.outdir}/preprocessing/tsv") - } - - emit: - bam = bam_recalibrated_index - qc = bam_reports -} diff --git a/subworkflow/local/build_indices.nf b/subworkflows/local/build_indices.nf similarity index 61% rename from subworkflow/local/build_indices.nf rename to subworkflows/local/build_indices.nf index 28e56daf85..90c3f6ac2e 100644 --- a/subworkflow/local/build_indices.nf +++ b/subworkflows/local/build_indices.nf @@ -1,7 +1,7 @@ /* -================================================================================ - BUILDING INDICES -================================================================================ +======================================================================================== + BUILDING INDICES +======================================================================================== */ params.build_intervals_options = [:] @@ -10,7 +10,7 @@ params.bwamem2_index_options = [:] params.create_intervals_bed_options = [:] params.gatk4_dict_options = [:] params.index_target_bed_options = [:] -params.msisensor_scan_options = [:] +params.msisensorpro_scan_options = [:] params.samtools_faidx_options = [:] params.tabix_dbsnp_options = [:] params.tabix_germline_resource_options = [:] @@ -19,83 +19,85 @@ params.tabix_pon_options = [:] // Initialize channels based on params or indices that were just built -include { BUILD_INTERVALS } from '../../modules/local/build_intervals.nf' addParams(options: params.build_intervals_options) -include { BWA_INDEX as BWAMEM1_INDEX } from '../../modules/nf-core/software/bwa/index/main.nf' addParams(options: params.bwa_index_options) -include { BWAMEM2_INDEX } from '../../modules/nf-core/software/bwamem2/index/main.nf' addParams(options: params.bwamem2_index_options) -include { CREATE_INTERVALS_BED } from '../../modules/local/create_intervals_bed.nf' addParams(options: params.create_intervals_bed_options) -include { GATK4_CREATESEQUENCEDICTIONARY as GATK4_DICT } from '../../modules/nf-core/software/gatk4/createsequencedictionary/main.nf' addParams(options: params.gatk4_dict_options) -include { TABIX_TABIX as TABIX_DBSNP } from '../../modules/nf-core/software/tabix/tabix/main.nf' addParams(options: params.tabix_dbsnp_options) -include { TABIX_TABIX as TABIX_GERMLINE_RESOURCE } from '../../modules/nf-core/software/tabix/tabix/main.nf' addParams(options: params.tabix_germline_resource_options) -include { TABIX_TABIX as TABIX_KNOWN_INDELS } from '../../modules/nf-core/software/tabix/tabix/main.nf' addParams(options: params.tabix_known_indels_options) -include { TABIX_TABIX as TABIX_PON } from '../../modules/nf-core/software/tabix/tabix/main.nf' addParams(options: params.tabix_pon_options) -include { INDEX_TARGET_BED } from '../../modules/local/index_target_bed.nf' addParams(options: params.index_target_bed_options) -include { MSISENSOR_SCAN } from '../../modules/nf-core/software/msisensor/scan.nf' addParams(options: params.msisensor_scan_options) -include { SAMTOOLS_FAIDX } from '../../modules/nf-core/software/samtools/faidx/main.nf' addParams(options: params.samtools_faidx_options) - -workflow BUILD_INDICES{ +include { BUILD_INTERVALS } from '../../modules/local/build_intervals/main' addParams(options: params.build_intervals_options) +include { BWA_INDEX as BWAMEM1_INDEX } from '../../modules/nf-core/software/bwa/index/main' addParams(options: params.bwa_index_options) +include { BWAMEM2_INDEX } from '../../modules/nf-core/software/bwamem2/index/main' addParams(options: params.bwamem2_index_options) +include { CREATE_INTERVALS_BED } from '../../modules/local/create_intervals_bed/main' addParams(options: params.create_intervals_bed_options) +include { GATK4_CREATESEQUENCEDICTIONARY as GATK4_DICT } from '../../modules/nf-core/software/gatk4/createsequencedictionary/main' addParams(options: params.gatk4_dict_options) +include { TABIX_TABIX as TABIX_DBSNP } from '../../modules/nf-core/software/tabix/tabix/main' addParams(options: params.tabix_dbsnp_options) +include { TABIX_TABIX as TABIX_GERMLINE_RESOURCE } from '../../modules/nf-core/software/tabix/tabix/main' addParams(options: params.tabix_germline_resource_options) +include { TABIX_TABIX as TABIX_KNOWN_INDELS } from '../../modules/nf-core/software/tabix/tabix/main' addParams(options: params.tabix_known_indels_options) +include { TABIX_TABIX as TABIX_PON } from '../../modules/nf-core/software/tabix/tabix/main' addParams(options: params.tabix_pon_options) +include { INDEX_TARGET_BED } from '../../modules/local/index_target_bed/main' addParams(options: params.index_target_bed_options) +include { MSISENSORPRO_SCAN } from '../../modules/nf-core/software/msisensorpro/scan/main' addParams(options: params.msisensorpro_scan_options) +include { SAMTOOLS_FAIDX } from '../../modules/nf-core/software/samtools/faidx/main' addParams(options: params.samtools_faidx_options) + +workflow BUILD_INDICES { take: dbsnp // channel: [optional] dbsnp fasta // channel: [mandatory] fasta + fasta_fai // channel: [optional] fasta_fai germline_resource // channel: [optional] germline_resource known_indels // channel: [optional] known_indels pon // channel: [optional] pon - step // value: [mandatory] starting step target_bed // channel: [optionnal] target_bed - tools // list: [optional] tools to run main: result_bwa = Channel.empty() version_bwa = Channel.empty() - if (!(params.bwa) && 'mapping' in step) + if (!(params.bwa) && 'mapping' in params.step.toLowerCase()) if (params.aligner == "bwa-mem") (result_bwa, version_bwa) = BWAMEM1_INDEX(fasta) else (result_bwa, version_bwa) = BWAMEM2_INDEX(fasta) result_dict = Channel.empty() version_dict = Channel.empty() - if (!(params.dict) && !('annotate' in step) && !('controlfreec' in step)) + if (!(params.dict) && !('annotate' in params.step.toLowerCase()) && !('controlfreec' in params.step.toLowerCase())) (result_dict, version_dict) = GATK4_DICT(fasta) result_fai = Channel.empty() + if (fasta_fai) result_fai = fasta_fai version_fai = Channel.empty() - if (!(params.fasta_fai) && !('annotate' in step)) + if (!(params.fasta_fai) && !('annotate' in params.step.toLowerCase())) (result_fai, version_fai) = SAMTOOLS_FAIDX(fasta) result_dbsnp_tbi = Channel.empty() version_dbsnp_tbi = Channel.empty() - if (!(params.dbsnp_index) && params.dbsnp && ('mapping' in step || 'preparerecalibration' in step || 'controlfreec' in tools || 'haplotypecaller' in tools || 'mutect2' in tools || 'tnscope' in tools)) + if (!(params.dbsnp_index) && params.dbsnp && ('mapping' in params.step.toLowerCase() || 'prepare_recalibration' in params.step.toLowerCase() || 'controlfreec' in params.tools.toString().toLowerCase() || 'haplotypecaller' in params.tools.toString().toLowerCase() || 'mutect2' in params.tools.toString().toLowerCase() || 'tnscope' in params.tools.toString().toLowerCase())) (result_dbsnp_tbi, version_dbsnp_tbi) = TABIX_DBSNP([[id:"${dbsnp.fileName}"], dbsnp]) + result_dbsnp_tbi = result_dbsnp_tbi.map {meta, tbi -> [tbi]} result_target_bed = Channel.empty() version_target_bed = Channel.empty() - if ((params.target_bed) && ('manta' in tools || 'strelka' in tools)) + if ((params.target_bed) && ('manta' in params.tools.toString().toLowerCase() || 'strelka' in params.tools.toString().toLowerCase())) (result_target_bed, version_target_bed) = INDEX_TARGET_BED(target_bed) result_germline_resource_tbi = Channel.empty() version_germline_resource_tbi = Channel.empty() - if (!(params.germline_resource_index) && params.germline_resource && 'mutect2' in tools) + if (!(params.germline_resource_index) && params.germline_resource && 'mutect2' in params.tools.toString().toLowerCase()) (result_germline_resource_tbi, version_germline_resource_tbi) = TABIX_GERMLINE_RESOURCE([[id:"${germline_resource.fileName}"], germline_resource]) result_known_indels_tbi = Channel.empty() version_known_indels_tbi = Channel.empty() - if (!(params.known_indels_index) && params.known_indels && ('mapping' in step || 'preparerecalibration' in step)) + if (!(params.known_indels_index) && params.known_indels && ('mapping' in params.step.toLowerCase() || 'prepare_recalibration' in params.step.toLowerCase())) (result_known_indels_tbi, version_known_indels_tbi) = TABIX_KNOWN_INDELS([[id:"${known_indels.fileName}"], known_indels]) + result_known_indels_tbi = result_known_indels_tbi.map {meta, tbi -> [tbi]} - result_msisensor_scan = Channel.empty() - version_msisensor_scan = Channel.empty() - if ('msisensor' in tools) - (result_msisensor_scan, version_msisensor_scan) = MSISENSOR_SCAN(fasta, result_fai) + result_msisensorpro_scan = Channel.empty() + version_msisensorpro_scan = Channel.empty() + if ('msisensorpro' in params.tools.toString().toLowerCase()) + (result_msisensorpro_scan, version_msisensorpro_scan) = MSISENSORPRO_SCAN(fasta) result_pon_tbi = Channel.empty() version_pon_tbi = Channel.empty() - if (!(params.pon_index) && params.pon && ('tnscope' in tools || 'mutect2' in tools)) + if (!(params.pon_index) && params.pon && ('tnscope' in params.tools.toString().toLowerCase() || 'mutect2' in params.tools.toString().toLowerCase())) (result_pon_tbi, version_pon_tbi) = TABIX_PON([[id:"${pon.fileName}"], pon]) result_intervals = Channel.empty() if (params.no_intervals) { file("${params.outdir}/no_intervals.bed").text = "no_intervals\n" result_intervals = Channel.from(file("${params.outdir}/no_intervals.bed")) - } else if (!('annotate' in step) && !('controlfreec' in step)) + } else if (!('annotate' in params.step.toLowerCase()) && !('controlfreec' in params.step.toLowerCase())) if (!params.intervals) result_intervals = CREATE_INTERVALS_BED(BUILD_INTERVALS(result_fai)) else @@ -129,7 +131,7 @@ workflow BUILD_INDICES{ germline_resource_tbi = result_germline_resource_tbi intervals = result_intervals known_indels_tbi = result_known_indels_tbi - msisensor_scan = result_msisensor_scan + msisensorpro_scan = result_msisensorpro_scan pon_tbi = result_pon_tbi target_bed_gz_tbi = result_target_bed } \ No newline at end of file diff --git a/subworkflow/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf similarity index 52% rename from subworkflow/local/germline_variant_calling.nf rename to subworkflows/local/germline_variant_calling.nf index debeca7d04..c0151add27 100644 --- a/subworkflow/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -1,7 +1,7 @@ /* -================================================================================ - GERMLINE VARIANT CALLING -================================================================================ +======================================================================================== + GERMLINE VARIANT CALLING +======================================================================================== */ params.haplotypecaller_options = [:] @@ -10,11 +10,11 @@ params.concat_gvcf_options = [:] params.concat_haplotypecaller_options = [:] params.strelka_options = [:] -include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../modules/nf-core/software/gatk4/haplotypecaller' addParams(options: params.haplotypecaller_options) -include { GATK4_GENOTYPEGVCF as GENOTYPEGVCF } from '../../modules/nf-core/software/gatk4/genotypegvcf' addParams(options: params.genotypegvcf_options) -include { CONCAT_VCF as CONCAT_GVCF } from '../../modules/local/concat_vcf' addParams(options: params.concat_gvcf_options) -include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../modules/local/concat_vcf' addParams(options: params.concat_haplotypecaller_options) -include { STRELKA_GERMLINE as STRELKA } from '../../modules/nf-core/software/strelka/germline' addParams(options: params.strelka_options) +include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../modules/nf-core/software/gatk4/haplotypecaller/main' addParams(options: params.haplotypecaller_options) +include { GATK4_GENOTYPEGVCF as GENOTYPEGVCF } from '../../modules/nf-core/software/gatk4/genotypegvcf/main' addParams(options: params.genotypegvcf_options) +include { CONCAT_VCF as CONCAT_GVCF } from '../../modules/local/concat_vcf/main' addParams(options: params.concat_gvcf_options) +include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../modules/local/concat_vcf/main' addParams(options: params.concat_haplotypecaller_options) +include { STRELKA_GERMLINE as STRELKA } from '../../modules/nf-core/software/strelka/germline/main' addParams(options: params.strelka_options) workflow GERMLINE_VARIANT_CALLING { take: @@ -27,7 +27,6 @@ workflow GERMLINE_VARIANT_CALLING { intervals // channel: [mandatory] intervals target_bed // channel: [optional] target_bed target_bed_gz_tbi // channel: [optional] target_bed_gz_tbi - tools // list: [mandatory] list of tools main: @@ -38,9 +37,16 @@ workflow GERMLINE_VARIANT_CALLING { if (intervals == []) no_intervals = true - if ('haplotypecaller' in tools) { + if ('haplotypecaller' in params.tools.toLowerCase()) { + haplotypecaller_interval_bam = bam.combine(intervals) + bam.combine(intervals).map{ meta, bam, bai, intervals -> + new_meta = meta.clone() + new_meta.id = meta.sample + "_" + intervals.baseName + [new_meta, bam, bai, intervals] + }.set{haplotypecaller_interval_bam} + // STEP GATK HAPLOTYPECALLER.1 HAPLOTYPECALLER( @@ -52,26 +58,13 @@ workflow GERMLINE_VARIANT_CALLING { fai, no_intervals) - haplotypecaller_gvcf = HAPLOTYPECALLER.out.gvcf.map{ meta, vcf -> - patient = meta.patient - sample = meta.sample - gender = meta.gender - status = meta.status - [ patient, sample, gender, status, vcf] - }.groupTuple(by: [0,1]) - - haplotypecaller_gvcf = haplotypecaller_gvcf.map { patient, sample, gender, status, vcf -> - def meta = [:] - meta.patient = patient - meta.sample = sample - meta.gender = gender[0] - meta.status = status[0] - meta.id = meta.sample - [ meta, vcf ] - } + haplotypecaller_raw = HAPLOTYPECALLER.out.vcf.map{ meta,vcf -> + meta.id = meta.sample + [meta, vcf] + }.groupTuple() CONCAT_GVCF( - haplotypecaller_gvcf, + haplotypecaller_raw, fai, target_bed) @@ -80,7 +73,7 @@ workflow GERMLINE_VARIANT_CALLING { // STEP GATK HAPLOTYPECALLER.2 GENOTYPEGVCF( - HAPLOTYPECALLER.out.interval_gvcf, + HAPLOTYPECALLER.out.interval_vcf, dbsnp, dbsnp_tbi, dict, @@ -88,33 +81,20 @@ workflow GERMLINE_VARIANT_CALLING { fai, no_intervals) - haplotypecaller_interval_vcf = GENOTYPEGVCF.out.vcf.map{ meta, vcf -> - patient = meta.patient - sample = meta.sample - gender = meta.gender - status = meta.status - [ patient, sample, gender, status, vcf] - }.groupTuple(by: [0,1]) - - haplotypecaller_interval_vcf = haplotypecaller_interval_vcf.map { patient, sample, gender, status, vcf -> - def meta = [:] - meta.patient = patient - meta.sample = sample - meta.gender = gender[0] - meta.status = status[0] - meta.id = meta.sample - [ meta, vcf ] - } + haplotypecaller_results = GENOTYPEGVCF.out.vcf.map{ meta, vcf -> + meta.id = meta.sample + [meta, vcf] + }.groupTuple() CONCAT_HAPLOTYPECALLER( - haplotypecaller_interval_vcf, + haplotypecaller_results, fai, target_bed) - - haplotypecaller_vcf = CONCAT_GVCF.out.vcf + + haplotypecaller_vcf = CONCAT_HAPLOTYPECALLER.out.vcf } - if ('strelka' in tools) { + if ('strelka' in params.tools.toLowerCase()) { STRELKA( bam, fasta, diff --git a/subworkflows/local/mapping_csv.nf b/subworkflows/local/mapping_csv.nf new file mode 100644 index 0000000000..a622622914 --- /dev/null +++ b/subworkflows/local/mapping_csv.nf @@ -0,0 +1,42 @@ +/* +======================================================================================== + MAPPING_CSV +======================================================================================== +*/ + +workflow MAPPING_CSV { + take: + bam_mapped // channel: [mandatory] meta, bam, bai + save_bam_mapped // boolean: [mandatory] save_bam_mapped + skip_markduplicates // boolean: [mandatory] skip_markduplicates + + main: + if (save_bam_mapped) { + csv_bam_mapped = bam_mapped.map { meta, bam, bai -> [meta] } + // Creating csv files to restart from this step + csv_bam_mapped.collectFile(storeDir: "${params.outdir}/preprocessing/csv") { meta -> + patient = meta.patient[0] + sample = meta.sample[0] + gender = meta.gender[0] + status = meta.status[0] + bam = "${params.outdir}/preprocessing/${sample}/mapped/${sample}.bam" + bai = "${params.outdir}/preprocessing/${sample}/mapped/${sample}.bam.bai" + ["mapped_${sample}.csv", "patient,gender,status,sample,bam,bai\n${patient},${gender},${status},${sample},${bam},${bai}\n"] + }.collectFile(name: "mapped.csv", keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/preprocessing/csv") + } + + if (skip_markduplicates) { + csv_bam_mapped = bam_mapped.map { meta, bam, bai -> [meta] } + // Creating csv files to restart from this step + csv_bam_mapped.collectFile(storeDir: "${params.outdir}/preprocessing/csv") { meta -> + patient = meta.patient[0] + sample = meta.sample[0] + gender = meta.gender[0] + status = meta.status[0] + bam = "${params.outdir}/preprocessing/${sample}/mapped/${sample}.bam" + bai = "${params.outdir}/preprocessing/${sample}/mapped/${sample}.bam.bai" + table = "${params.outdir}/preprocessing/${sample}/recal_table/${sample}.recal.table" + ["mapped_no_markduplicates_${sample}.csv", "patient,gender,status,sample,bam,bai,table\n${patient},${gender},${status},${sample},${bam},${bai},${table}\n"] + }.collectFile(name: 'mapped_no_markduplicates.csv', keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/preprocessing/csv") + } +} diff --git a/subworkflows/local/markduplicates_csv.nf b/subworkflows/local/markduplicates_csv.nf new file mode 100644 index 0000000000..47b765f98d --- /dev/null +++ b/subworkflows/local/markduplicates_csv.nf @@ -0,0 +1,23 @@ +/* +======================================================================================== + MARKDUPLICATES_CSV +======================================================================================== +*/ + +workflow MARKDUPLICATES_CSV { + take: + bam_markduplicates // channel: [mandatory] meta, bam, bai + + main: + // Creating csv files to restart from this step + bam_markduplicates.collectFile(storeDir: "${params.outdir}/preprocessing/csv") { meta, bam, bai -> + patient = meta.patient + sample = meta.sample + gender = meta.gender + status = meta.status + bam = "${params.outdir}/preprocessing/${sample}/markduplicates/${sample}.md.bam" + bai = "${params.outdir}/preprocessing/${sample}/markduplicates/${sample}.md.bam.bai" + table = "${params.outdir}/preprocessing/${sample}/recal_table/${sample}.recal.table" + ["markduplicates_${sample}.csv", "patient,gender,status,sample,bam,bai,table\n${patient},${gender},${status},${sample},${bam},${bai},${table}\n"] + }.collectFile(name: 'markduplicates.csv', keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/preprocessing/csv") +} diff --git a/subworkflow/local/pair_variant_calling.nf b/subworkflows/local/pair_variant_calling.nf similarity index 75% rename from subworkflow/local/pair_variant_calling.nf rename to subworkflows/local/pair_variant_calling.nf index 5654217909..ab14158f33 100644 --- a/subworkflow/local/pair_variant_calling.nf +++ b/subworkflows/local/pair_variant_calling.nf @@ -1,18 +1,18 @@ /* -================================================================================ - SOMATIC VARIANT CALLING -================================================================================ +======================================================================================== + SOMATIC VARIANT CALLING +======================================================================================== */ params.manta_options = [:] -params.msisensor_msi_options = [:] +params.msisensorpro_msi_options = [:] params.strelka_options = [:] params.strelka_bp_options = [:] -include { MANTA_SOMATIC as MANTA } from '../../modules/nf-core/software/manta/somatic' addParams(options: params.manta_options) -include { MSISENSOR_MSI } from '../../modules/nf-core/software/msisensor/msi' addParams(options: params.msisensor_msi_options) -include { STRELKA_SOMATIC as STRELKA } from '../../modules/nf-core/software/strelka/somatic' addParams(options: params.strelka_options) -include { STRELKA_SOMATIC_BEST_PRACTICES as STRELKA_BP } from '../../modules/nf-core/software/strelka/somatic' addParams(options: params.strelka_bp_options) +include { MANTA_SOMATIC as MANTA } from '../../modules/nf-core/software/manta/somatic/main' addParams(options: params.manta_options) +include { MSISENSORPRO_MSI } from '../../modules/nf-core/software/msisensorpro/msi/main' addParams(options: params.msisensorpro_msi_options) +include { STRELKA_SOMATIC as STRELKA } from '../../modules/nf-core/software/strelka/somatic/main' addParams(options: params.strelka_options) +include { STRELKA_SOMATIC_BEST_PRACTICES as STRELKA_BP } from '../../modules/nf-core/software/strelka/somaticbp/main' addParams(options: params.strelka_bp_options) workflow PAIR_VARIANT_CALLING { take: @@ -23,10 +23,9 @@ workflow PAIR_VARIANT_CALLING { fai // channel: [mandatory] fai fasta // channel: [mandatory] fasta intervals // channel: [mandatory] intervals - msisensor_scan // channel: [optional] msisensor_scan + msisensorpro_scan // channel: [optional] msisensorpro_scan target_bed // channel: [optional] target_bed target_bed_gz_tbi // channel: [optional] target_bed_gz_tbi - tools // list: [mandatory] list of tools main: @@ -47,7 +46,7 @@ workflow PAIR_VARIANT_CALLING { meta.normal = normal[1] meta.tumor = tumor[1] meta.gender = normal[2] - meta.id = "${meta.tumor}_vs_${meta.normal}" + meta.id = "${meta.tumor}_vs_${meta.normal}".toString() [meta, normal[4], normal[5], tumor[4], tumor[5]] } @@ -55,7 +54,7 @@ workflow PAIR_VARIANT_CALLING { manta_vcf = Channel.empty() strelka_vcf = Channel.empty() - if ('manta' in tools) { + if ('manta' in params.tools.toLowerCase()) { MANTA( bam_pair, fasta, @@ -70,7 +69,7 @@ workflow PAIR_VARIANT_CALLING { manta_vcf = manta_candidate_small_indels_vcf.mix(manta_candidate_sv_vcf,manta_diploid_sv_vcf,manta_somatic_sv_vcf) - if ('strelka' in tools) { + if ('strelka' in params.tools.toLowerCase()) { STRELKA_BP( manta_csi_for_strelka_bp, fasta, @@ -84,13 +83,13 @@ workflow PAIR_VARIANT_CALLING { } } - if ('msisensor' in tools) { - MSISENSOR_MSI( + if ('msisensorpro' in params.tools.toLowerCase()) { + MSISENSORPRO_MSI( bam_pair, - msisensor_scan) + msisensorpro_scan) } - if ('strelka' in tools) { + if ('strelka' in params.tools.toLowerCase()) { STRELKA( bam_pair, fasta, diff --git a/subworkflows/local/prepare_recalibration_csv.nf b/subworkflows/local/prepare_recalibration_csv.nf new file mode 100644 index 0000000000..b6c4f177de --- /dev/null +++ b/subworkflows/local/prepare_recalibration_csv.nf @@ -0,0 +1,22 @@ +/* +======================================================================================== + PREPARE_RECALIBRATION_CSV +======================================================================================== +*/ + +workflow PREPARE_RECALIBRATION_CSV { + take: + table_bqsr // channel: [mandatory] meta, table + + main: + // Creating csv files to restart from this step + table_bqsr.collectFile(storeDir: "${params.outdir}/preprocessing/csv") { meta, table -> + patient = meta.patient + sample = meta.sample + gender = meta.gender + status = meta.status + bam = "${params.outdir}/preprocessing/${sample}/markduplicates/${sample}.md.bam" + bai = "${params.outdir}/preprocessing/${sample}/markduplicates/${sample}.md.bam.bai" + ["markduplicates_no_table_${sample}.csv", "patient,gender,status,sample,bam,bai\n${patient},${gender},${status},${sample},${bam},${bai}\n"] + }.collectFile(name: 'markduplicates_no_table.csv', keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/preprocessing/csv") +} diff --git a/subworkflows/local/recalibrate_csv.nf b/subworkflows/local/recalibrate_csv.nf new file mode 100644 index 0000000000..ad50b1f8d3 --- /dev/null +++ b/subworkflows/local/recalibrate_csv.nf @@ -0,0 +1,22 @@ +/* +======================================================================================== + RECALIBRATE_CSV +======================================================================================== +*/ + +workflow RECALIBRATE_CSV { + take: + bam_recalibrated_index // channel: [mandatory] meta, bam, bai + + main: + // Creating csv files to restart from this step + bam_recalibrated_index.collectFile(storeDir: "${params.outdir}/preprocessing/csv") { meta, bam, bai -> + patient = meta.patient + sample = meta.sample + gender = meta.gender + status = meta.status + bam = "${params.outdir}/preprocessing/${sample}/recalibrated/${sample}.recal.bam" + bai = "${params.outdir}/preprocessing/${sample}/recalibrated/${sample}.recal.bam.bai" + ["recalibrated_${sample}.csv", "patient,gender,status,sample,bam,bai\n${patient},${gender},${status},${sample},${bam},${bai}\n"] + }.collectFile(name: 'recalibrated.csv', keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/preprocessing/csv") +} diff --git a/subworkflow/nf-core/fastqc_trimgalore.nf b/subworkflows/nf-core/fastqc_trimgalore.nf similarity index 95% rename from subworkflow/nf-core/fastqc_trimgalore.nf rename to subworkflows/nf-core/fastqc_trimgalore.nf index 87cac4e452..3a1a1cec09 100644 --- a/subworkflow/nf-core/fastqc_trimgalore.nf +++ b/subworkflows/nf-core/fastqc_trimgalore.nf @@ -11,18 +11,18 @@ include { TRIMGALORE } from '../../modules/nf-core/software/trimgalore/mai workflow FASTQC_TRIMGALORE { take: reads // channel: [ val(meta), [ reads ] ] - skip_fastqc // boolean: true/false + // skip_fastqc // boolean: true/false skip_trimming // boolean: true/false main: fastqc_html = Channel.empty() fastqc_zip = Channel.empty() fastqc_version = Channel.empty() - if (!skip_fastqc) { + // if (!skip_fastqc) { FASTQC ( reads ).html.set { fastqc_html } fastqc_zip = FASTQC.out.zip fastqc_version = FASTQC.out.version - } + // } trim_reads = reads trim_html = Channel.empty() diff --git a/subworkflows/nf-core/mapping.nf b/subworkflows/nf-core/mapping.nf new file mode 100644 index 0000000000..6be238afc5 --- /dev/null +++ b/subworkflows/nf-core/mapping.nf @@ -0,0 +1,106 @@ +/* +======================================================================================== + MAPPING +======================================================================================== +*/ + +params.bwamem1_mem_options = [:] +params.bwamem1_mem_tumor_options = [:] +params.bwamem2_mem_options = [:] +params.bwamem2_mem_tumor_options = [:] +params.merge_bam_options = [:] +params.qualimap_bamqc_options = [:] +params.samtools_index_options = [:] +params.samtools_stats_options = [:] + +include { BWA_MEM as BWAMEM1_MEM } from '../../modules/nf-core/software/bwa/mem/main' addParams(options: params.bwamem1_mem_options) +include { BWA_MEM as BWAMEM1_MEM_T } from '../../modules/nf-core/software/bwa/mem/main' addParams(options: params.bwamem1_mem_tumor_options) +include { BWAMEM2_MEM } from '../../modules/nf-core/software/bwamem2/mem/main' addParams(options: params.bwamem2_mem_options) +include { BWAMEM2_MEM as BWAMEM2_MEM_T } from '../../modules/nf-core/software/bwamem2/mem/main' addParams(options: params.bwamem2_mem_tumor_options) +include { SAMTOOLS_MERGE } from '../../modules/nf-core/software/samtools/merge/main' addParams(options: params.merge_bam_options) +include { QUALIMAP_BAMQC } from '../../modules/nf-core/software/qualimap/bamqc/main' addParams(options: params.qualimap_bamqc_options) +include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams(options: params.samtools_index_options) +include { SAMTOOLS_STATS } from '../../modules/nf-core/software/samtools/stats/main' addParams(options: params.samtools_stats_options) + +workflow MAPPING { + take: + skip_bamqc // boolean: true/false + skip_samtools // boolean: true/false + aligner // string: [mandatory] "bwa-mem" or "bwa-mem2" + bwa // channel: [mandatory] bwa + fai // channel: [mandatory] fai + fasta // channel: [mandatory] fasta + reads_input // channel: [mandatory] meta, reads_input + target_bed // channel: [optional] target_bed + + main: + + bam_mapped_index = Channel.empty() + bam_reports = Channel.empty() + + // If meta.status is 1, then sample is tumor + // else, (even is no meta.status exist) sample is normal + reads_input.branch{ + tumor: it[0].status == 1 + normal: true + }.set{ reads_input_status } + + bam_bwamem1 = Channel.empty() + bam_bwamem2 = Channel.empty() + + if (aligner == "bwa-mem") { + BWAMEM1_MEM(reads_input_status.normal, bwa) + bam_bwamem1_n = BWAMEM1_MEM.out.bam + + BWAMEM1_MEM_T(reads_input_status.tumor, bwa) + bam_bwamem1_t = BWAMEM1_MEM_T.out.bam + + bam_bwamem1 = bam_bwamem1_n.mix(bam_bwamem1_t) + } else { + BWAMEM2_MEM(reads_input_status.normal, bwa) + bam_bwamem2_n = BWAMEM2_MEM.out.bam + + BWAMEM2_MEM_T(reads_input_status.tumor, bwa) + bam_bwamem2_t = BWAMEM2_MEM_T.out.bam + + bam_bwamem2 = bam_bwamem2_n.mix(bam_bwamem2_t) + } + + bam_bwa = bam_bwamem1.mix(bam_bwamem2) + + bam_bwa.map{ meta, bam -> + meta.remove('read_group') + meta.id = meta.sample + [meta, bam] + }.groupTuple().branch{ + single: it[1].size() == 1 + multiple: it[1].size() > 1 + }.set{ bam_bwa_to_sort } + + // STEP 1.5: MERGING AND INDEXING BAM FROM MULTIPLE LANES + + SAMTOOLS_MERGE(bam_bwa_to_sort.multiple) + bam_mapped = bam_bwa_to_sort.single.mix(SAMTOOLS_MERGE.out.merged_bam) + + SAMTOOLS_INDEX(bam_mapped) + bam_mapped_index = bam_mapped.join(SAMTOOLS_INDEX.out.bai) + + qualimap_bamqc = Channel.empty() + samtools_stats = Channel.empty() + + if (!skip_bamqc) { + QUALIMAP_BAMQC(bam_mapped, target_bed, params.target_bed) + qualimap_bamqc = QUALIMAP_BAMQC.out + } + + if (!skip_samtools) { + SAMTOOLS_STATS(bam_mapped_index) + samtools_stats = SAMTOOLS_STATS.out.stats + } + + bam_reports = samtools_stats.mix(qualimap_bamqc) + + emit: + bam = bam_mapped_index + qc = bam_reports +} diff --git a/subworkflows/nf-core/markduplicates.nf b/subworkflows/nf-core/markduplicates.nf new file mode 100644 index 0000000000..89f436b877 --- /dev/null +++ b/subworkflows/nf-core/markduplicates.nf @@ -0,0 +1,35 @@ +/* +======================================================================================== + MARKDUPLICATES +======================================================================================== +*/ + +params.markduplicates_options = [:] + +include { GATK4_MARKDUPLICATES } from '../../modules/nf-core/software/gatk4/markduplicates/main' addParams(options: params.markduplicates_options) +include { GATK4_MARKDUPLICATES_SPARK } from '../../modules/nf-core/software/gatk4/markduplicatesspark/main' addParams(options: params.markduplicatesspark_options) + +workflow MARKDUPLICATES { + take: + bam_mapped // channel: [mandatory] meta, bam, bai + use_gatk_spark // value: [mandatory] use gatk spark + save_metrics // value: [mandatory] save metrics + + main: + + report_markduplicates = Channel.empty() + + if (use_gatk_spark) { + GATK4_MARKDUPLICATES_SPARK(bam_mapped, save_metrics) + report_markduplicates = GATK4_MARKDUPLICATES_SPARK.out.metrics + bam_markduplicates = GATK4_MARKDUPLICATES_SPARK.out.bam + } else { + GATK4_MARKDUPLICATES(bam_mapped, save_metrics) + report_markduplicates = GATK4_MARKDUPLICATES.out.metrics + bam_markduplicates = GATK4_MARKDUPLICATES.out.bam + } + + emit: + bam = bam_markduplicates + report = report_markduplicates +} diff --git a/subworkflows/nf-core/prepare_recalibration.nf b/subworkflows/nf-core/prepare_recalibration.nf new file mode 100644 index 0000000000..df53093470 --- /dev/null +++ b/subworkflows/nf-core/prepare_recalibration.nf @@ -0,0 +1,53 @@ +/* +======================================================================================== + PREPARE RECALIBRATION +======================================================================================== +*/ + +params.baserecalibrator_options = [:] +params.gatherbqsrreports_options = [:] + +include { GATK4_BASERECALIBRATOR as BASERECALIBRATOR } from '../../modules/nf-core/software/gatk4/baserecalibrator/main' addParams(options: params.baserecalibrator_options) +include { GATK4_GATHERBQSRREPORTS as GATHERBQSRREPORTS } from '../../modules/nf-core/software/gatk4/gatherbqsrreports/main' addParams(options: params.gatherbqsrreports_options) + +workflow PREPARE_RECALIBRATION { + take: + bam_markduplicates // channel: [mandatory] bam_markduplicates + dict // channel: [mandatory] dict + fai // channel: [mandatory] fai + fasta // channel: [mandatory] fasta + intervals // channel: [mandatory] intervals + known_sites // channel: [optional] known_sites + known_sites_tbi // channel: [optional] known_sites_tbi + no_intervals // value: [mandatory] no_intervals + + main: + + bam_markduplicates.combine(intervals).map{ meta, bam, bai, intervals -> + new_meta = meta.clone() + new_meta.id = meta.sample + "_" + intervals.baseName + [new_meta, bam, bai, intervals] + }.set{bam_markduplicates_intervals} + + BASERECALIBRATOR(bam_markduplicates_intervals, fasta, fai, dict, known_sites, known_sites_tbi) + + // STEP 3.5: MERGING RECALIBRATION TABLES + if (no_intervals) { + BASERECALIBRATOR.out.table.map { meta, table -> + meta.id = meta.sample + [meta, table] + }.set{table_bqsr} + } else { + BASERECALIBRATOR.out.table + .map{ meta, table -> + meta.id = meta.sample + [meta, table] + }.groupTuple().set{recaltable} + + GATHERBQSRREPORTS(recaltable) + table_bqsr = GATHERBQSRREPORTS.out.table + } + + emit: + table_bqsr = table_bqsr +} diff --git a/subworkflows/nf-core/recalibrate.nf b/subworkflows/nf-core/recalibrate.nf new file mode 100644 index 0000000000..435f570b29 --- /dev/null +++ b/subworkflows/nf-core/recalibrate.nf @@ -0,0 +1,77 @@ +/* +======================================================================================== + RECALIBRATE +======================================================================================== +*/ + +params.applybqsr_options = [:] +params.merge_bam_options = [:] +params.qualimap_bamqc_options = [:] +params.samtools_index_options = [:] +params.samtools_stats_options = [:] + +include { GATK4_APPLYBQSR as APPLYBQSR } from '../../modules/nf-core/software/gatk4/applybqsr/main' addParams(options: params.applybqsr_options) +include { QUALIMAP_BAMQC } from '../../modules/nf-core/software/qualimap/bamqc/main' addParams(options: params.qualimap_bamqc_options) +include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams(options: params.samtools_index_options) +include { SAMTOOLS_MERGE } from '../../modules/nf-core/software/samtools/merge/main' addParams(options: params.merge_bam_options) +include { SAMTOOLS_STATS } from '../../modules/nf-core/software/samtools/stats/main' addParams(options: params.samtools_stats_options) + +workflow RECALIBRATE { + take: + skip_bamqc // boolean: true/false + skip_samtools // boolean: true/false + bam // channel: [mandatory] bam + dict // channel: [mandatory] dict + fai // channel: [mandatory] fai + fasta // channel: [mandatory] fasta + intervals // channel: [mandatory] intervals + target_bed // channel: [optional] target_bed + + main: + + bam_recalibrated_index = Channel.empty() + bam_recalibrated = Channel.empty() + bam_reports = Channel.empty() + + bam.combine(intervals).map{ meta, bam, bai, recal, intervals -> + new_meta = meta.clone() + new_meta.id = meta.sample + "_" + intervals.baseName + [new_meta, bam, bai, recal, intervals] + }.set{bam_intervals} + + APPLYBQSR(bam_intervals, fasta, fai, dict) + + // STEP 4.5: MERGING AND INDEXING THE RECALIBRATED BAM FILES + if (params.no_intervals) { + bam_recalibrated = APPLYBQSR.out.bam + } else { + APPLYBQSR.out.bam.map{ meta, bam -> + meta.id = meta.sample + [meta, bam] + }.groupTuple().set{bam_recalibrated_interval} + + SAMTOOLS_MERGE(bam_recalibrated_interval) + bam_recalibrated = SAMTOOLS_MERGE.out.merged_bam + + SAMTOOLS_INDEX(bam_recalibrated) + bam_recalibrated_index = bam_recalibrated.join(SAMTOOLS_INDEX.out.bai) + + qualimap_bamqc = Channel.empty() + samtools_stats = Channel.empty() + + if (!skip_bamqc) { + QUALIMAP_BAMQC(bam_recalibrated, target_bed, params.target_bed) + qualimap_bamqc = QUALIMAP_BAMQC.out + } + + if (!skip_samtools) { + SAMTOOLS_STATS(bam_recalibrated_index) + samtools_stats = SAMTOOLS_STATS.out.stats + } + bam_reports = samtools_stats.mix(qualimap_bamqc) + } + + emit: + bam = bam_recalibrated_index + qc = bam_reports +} diff --git a/tests/config/nextflow.config b/tests/config/nextflow.config index e224324ddf..e22feff405 100644 --- a/tests/config/nextflow.config +++ b/tests/config/nextflow.config @@ -31,5 +31,5 @@ includeConfig 'test_data.config' includeConfig '../../conf/modules.config' manifest { - nextflowVersion = '!>=20.11.0-edge' + nextflowVersion = '!>=21.04.0' } diff --git a/tests/config/pytest_software.yml b/tests/config/pytest_software.yml index a180bfce88..b215cffcc6 100644 --- a/tests/config/pytest_software.yml +++ b/tests/config/pytest_software.yml @@ -1,4 +1,4 @@ markduplicates: - - modules/nf-core/software/gatk4/markduplicates.nf + - modules/nf-core/software/gatk4/markduplicates/main.nf - subworkflow/local/markduplicates.nf - tests/subworkflow/local/markduplicates/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config index e1b4617821..910379f589 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -4,7 +4,8 @@ def nf_core_modules_data = "https://raw.githubusercontent.com/nf-core/test-datas params { test_data { 'nf-core' { - test_paired_end_sorted_bam = "${nf_core_modules_data}/genomics/sarscov2/illumina/bam/test_paired_end.sorted.bam" + test_paired_end_sorted_bam = "${nf_core_modules_data}/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam" + test_paired_end_sorted_bai = "${nf_core_modules_data}/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai" genome_fasta = "${nf_core_modules_data}/genomics/sarscov2/genome/genome.fasta" genome_sizes = "${nf_core_modules_data}/genomics/sarscov2/genome/genome.sizes" 'bed' { diff --git a/tests/subworkflows/local/markduplicates/main.nf b/tests/subworkflows/local/markduplicates/main.nf deleted file mode 100644 index fabd6cfbcf..0000000000 --- a/tests/subworkflows/local/markduplicates/main.nf +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl = 2 - -include { MARKDUPLICATES } from '../../../../subworkflow/local/markduplicates' addParams( - markduplicates_options: modules['markduplicates'] -) - -workflow test_markduplicates { - input = [[ id: 'test' ], - [ file(params.test_data['nf-core']['test_paired_end_sorted_bam'], checkIfExists: true)]] - - step = 'preparerecalibration' - - MARKDUPLICATES ( input, step ) -} diff --git a/tests/subworkflows/nf-core/markduplicates/main.nf b/tests/subworkflows/nf-core/markduplicates/main.nf new file mode 100644 index 0000000000..feb130c696 --- /dev/null +++ b/tests/subworkflows/nf-core/markduplicates/main.nf @@ -0,0 +1,19 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +// Don't overwrite global params.modules, create a copy instead and use that within the main script. +def modules = params.modules.clone() + +include { MARKDUPLICATES } from '../../../../subworkflows/nf-core/markduplicates' addParams( + markduplicates_options: modules['markduplicates'], + markduplicatesspark_options: modules['markduplicatesspark'] +) + +workflow test_markduplicates { + input = [[id: 'test'], + [file(params.test_data['nf-core']['test_paired_end_sorted_bam'], checkIfExists: true)], + [file(params.test_data['nf-core']['test_paired_end_sorted_bai'], checkIfExists: true)]] + + MARKDUPLICATES ( input, false, true ) +} diff --git a/tests/subworkflows/local/markduplicates/test.yml b/tests/subworkflows/nf-core/markduplicates/test.yml similarity index 51% rename from tests/subworkflows/local/markduplicates/test.yml rename to tests/subworkflows/nf-core/markduplicates/test.yml index fdf9e9b228..427736fe2d 100644 --- a/tests/subworkflows/local/markduplicates/test.yml +++ b/tests/subworkflows/nf-core/markduplicates/test.yml @@ -1,9 +1,8 @@ - name: subworkflow markduplicates - command: nextflow run ./tests/modules/local/subworkflows/markduplicates/ -entry test_markduplicates -c tests/config/nextflow.config + command: nextflow run ./tests/subworkflows/nf-core/markduplicates/ -entry test_markduplicates -c tests/config/nextflow.config tags: - markduplicates - gatk4 files: - path: output/preprocessing/test/markduplicates/test.md.bam - path: output/preprocessing/test/markduplicates/test.md.bam.bai - - path: output/preprocessing/test/markduplicates/test.recal.table diff --git a/tests/test_aligner.yml b/tests/test_aligner.yml index 6dffc1b3ff..437044bde4 100644 --- a/tests/test_aligner.yml +++ b/tests/test_aligner.yml @@ -1,36 +1,3 @@ -- name: Run bwa-mem - command: nextflow run main.nf -profile test,docker --aligner bwa-mem - tags: - - aligner - - bwa-mem - files: - - path: results/preprocessing/1234N/mapped/1234N.bam - - path: results/preprocessing/1234N/mapped/1234N.bam.bai - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_2_fastqc.zip - - name: Run bwa-mem2 command: nextflow run main.nf -profile test,docker --aligner bwa-mem2 tags: @@ -39,27 +6,22 @@ files: - path: results/preprocessing/1234N/mapped/1234N.bam - path: results/preprocessing/1234N/mapped/1234N.bam.bai - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_2_fastqc.zip + - path: results/preprocessing/1234N/markduplicates/1234N.md.bam + - path: results/preprocessing/1234N/markduplicates/1234N.md.bam.bai + - path: results/preprocessing/1234N/recal_table/1234N.recal.table + - path: results/preprocessing/1234N/recalibrated/1234N.recal.bam + - path: results/preprocessing/1234N/recalibrated/1234N.recal.bam.bai + - path: results/preprocessing/csv/markduplicates.csv + - path: results/preprocessing/csv/markduplicates_1234N.csv + - path: results/preprocessing/csv/markduplicates_no_table.csv + - path: results/preprocessing/csv/markduplicates_no_table_1234N.csv + - path: results/preprocessing/csv/recalibrated.csv + - path: results/preprocessing/csv/recalibrated_1234N.csv + - path: results/reports/fastqc/1234N-1234N_M1 + - path: results/reports/fastqc/1234N-1234N_M2 + - path: results/reports/fastqc/1234N-1234N_M4 + - path: results/reports/fastqc/1234N-1234N_M5 + - path: results/reports/fastqc/1234N-1234N_M6 + - path: results/reports/fastqc/1234N-1234N_M7 + - path: results/reports/qualimap/1234N + - path: results/reports/samtools_stats/1234N/1234N.bam.stats \ No newline at end of file diff --git a/tests/test_default.yml b/tests/test_default.yml index 645d6d82ae..61a5c744b9 100644 --- a/tests/test_default.yml +++ b/tests/test_default.yml @@ -6,29 +6,22 @@ files: - path: results/preprocessing/1234N/mapped/1234N.bam - path: results/preprocessing/1234N/mapped/1234N.bam.bai - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_2_fastqc.zip + - path: results/preprocessing/1234N/markduplicates/1234N.md.bam + - path: results/preprocessing/1234N/markduplicates/1234N.md.bam.bai + - path: results/preprocessing/1234N/recal_table/1234N.recal.table + - path: results/preprocessing/1234N/recalibrated/1234N.recal.bam + - path: results/preprocessing/1234N/recalibrated/1234N.recal.bam.bai + - path: results/preprocessing/csv/markduplicates.csv + - path: results/preprocessing/csv/markduplicates_1234N.csv + - path: results/preprocessing/csv/markduplicates_no_table.csv + - path: results/preprocessing/csv/markduplicates_no_table_1234N.csv + - path: results/preprocessing/csv/recalibrated.csv + - path: results/preprocessing/csv/recalibrated_1234N.csv + - path: results/reports/fastqc/1234N-1234N_M1 + - path: results/reports/fastqc/1234N-1234N_M2 + - path: results/reports/fastqc/1234N-1234N_M4 + - path: results/reports/fastqc/1234N-1234N_M5 + - path: results/reports/fastqc/1234N-1234N_M6 + - path: results/reports/fastqc/1234N-1234N_M7 - path: results/reports/qualimap/1234N - - path: results/reports/samtools_stats/1234N/1234N.bam.stats \ No newline at end of file + - path: results/reports/samtools_stats/1234N/1234N.bam.stats diff --git a/tests/test_gatk_spark.yml b/tests/test_gatk_spark.yml index 28291229b6..05aad7aa45 100644 --- a/tests/test_gatk_spark.yml +++ b/tests/test_gatk_spark.yml @@ -1,5 +1,5 @@ - name: Run default pipeline with gatk_spark - command: nextflow run main.nf -profile test_use_gatk_spark,docker + command: nextflow run main.nf -profile test,use_gatk_spark,docker tags: - gatk4 - gatk4_spark @@ -7,29 +7,22 @@ files: - path: results/preprocessing/1234N/mapped/1234N.bam - path: results/preprocessing/1234N/mapped/1234N.bam.bai - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_2_fastqc.zip + - path: results/preprocessing/1234N/markduplicates/1234N.md.bam + - path: results/preprocessing/1234N/markduplicates/1234N.md.bam.bai + - path: results/preprocessing/1234N/recal_table/1234N.recal.table + - path: results/preprocessing/1234N/recalibrated/1234N.recal.bam + - path: results/preprocessing/1234N/recalibrated/1234N.recal.bam.bai + - path: results/preprocessing/csv/markduplicates.csv + - path: results/preprocessing/csv/markduplicates_1234N.csv + - path: results/preprocessing/csv/markduplicates_no_table.csv + - path: results/preprocessing/csv/markduplicates_no_table_1234N.csv + - path: results/preprocessing/csv/recalibrated.csv + - path: results/preprocessing/csv/recalibrated_1234N.csv + - path: results/reports/fastqc/1234N-1234N_M1 + - path: results/reports/fastqc/1234N-1234N_M2 + - path: results/reports/fastqc/1234N-1234N_M4 + - path: results/reports/fastqc/1234N-1234N_M5 + - path: results/reports/fastqc/1234N-1234N_M6 + - path: results/reports/fastqc/1234N-1234N_M7 - path: results/reports/qualimap/1234N - path: results/reports/samtools_stats/1234N/1234N.bam.stats \ No newline at end of file diff --git a/tests/test_pair.yml b/tests/test_pair.yml index 374e5a5d5e..1b6ce52740 100644 --- a/tests/test_pair.yml +++ b/tests/test_pair.yml @@ -1,53 +1,42 @@ - name: Run default pipeline for tumor normal pair - command: nextflow run main.nf -profile test_pair,docker + command: nextflow run main.nf -profile test,pair,docker tags: - preprocessing - tumor_normal_pair files: - path: results/preprocessing/1234N/mapped/1234N.bam - path: results/preprocessing/1234N/mapped/1234N.bam.bai + - path: results/preprocessing/1234N/markduplicates/1234N.md.bam + - path: results/preprocessing/1234N/markduplicates/1234N.md.bam.bai + - path: results/preprocessing/1234N/recal_table/1234N.recal.table + - path: results/preprocessing/1234N/recalibrated/1234N.recal.bam + - path: results/preprocessing/1234N/recalibrated/1234N.recal.bam.bai - path: results/preprocessing/9876T/mapped/9876T.bam - path: results/preprocessing/9876T/mapped/9876T.bam.bai - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_2_fastqc.zip - - path: results/reports/fastqc/9876T-9876T_M1/9876T-9876T_M1_1_fastqc.html - - path: results/reports/fastqc/9876T-9876T_M1/9876T-9876T_M1_1_fastqc.zip - - path: results/reports/fastqc/9876T-9876T_M1/9876T-9876T_M1_2_fastqc.html - - path: results/reports/fastqc/9876T-9876T_M1/9876T-9876T_M1_2_fastqc.zip - - path: results/reports/fastqc/9876T-9876T_M2/9876T-9876T_M2_1_fastqc.html - - path: results/reports/fastqc/9876T-9876T_M2/9876T-9876T_M2_1_fastqc.zip - - path: results/reports/fastqc/9876T-9876T_M2/9876T-9876T_M2_2_fastqc.html - - path: results/reports/fastqc/9876T-9876T_M2/9876T-9876T_M2_2_fastqc.zip - - path: results/reports/fastqc/9876T-9876T_M4/9876T-9876T_M4_1_fastqc.html - - path: results/reports/fastqc/9876T-9876T_M4/9876T-9876T_M4_1_fastqc.zip - - path: results/reports/fastqc/9876T-9876T_M4/9876T-9876T_M4_2_fastqc.html - - path: results/reports/fastqc/9876T-9876T_M4/9876T-9876T_M4_2_fastqc.zip - - path: results/reports/fastqc/9876T-9876T_M5/9876T-9876T_M5_1_fastqc.html - - path: results/reports/fastqc/9876T-9876T_M5/9876T-9876T_M5_1_fastqc.zip - - path: results/reports/fastqc/9876T-9876T_M5/9876T-9876T_M5_2_fastqc.html - - path: results/reports/fastqc/9876T-9876T_M5/9876T-9876T_M5_2_fastqc.zip + - path: results/preprocessing/9876T/markduplicates/9876T.md.bam + - path: results/preprocessing/9876T/markduplicates/9876T.md.bam.bai + - path: results/preprocessing/9876T/recal_table/9876T.recal.table + - path: results/preprocessing/9876T/recalibrated/9876T.recal.bam + - path: results/preprocessing/9876T/recalibrated/9876T.recal.bam.bai + - path: results/preprocessing/csv/markduplicates.csv + - path: results/preprocessing/csv/markduplicates_1234N.csv + - path: results/preprocessing/csv/markduplicates_9876T.csv + - path: results/preprocessing/csv/markduplicates_no_table.csv + - path: results/preprocessing/csv/markduplicates_no_table_1234N.csv + - path: results/preprocessing/csv/markduplicates_no_table_9876T.csv + - path: results/preprocessing/csv/recalibrated.csv + - path: results/preprocessing/csv/recalibrated_1234N.csv + - path: results/preprocessing/csv/recalibrated_9876T.csv + - path: results/reports/fastqc/1234N-1234N_M1 + - path: results/reports/fastqc/1234N-1234N_M2 + - path: results/reports/fastqc/1234N-1234N_M4 + - path: results/reports/fastqc/1234N-1234N_M5 + - path: results/reports/fastqc/1234N-1234N_M6 + - path: results/reports/fastqc/1234N-1234N_M7 + - path: results/reports/fastqc/9876T-9876T_M1 + - path: results/reports/fastqc/9876T-9876T_M2 + - path: results/reports/fastqc/9876T-9876T_M4 + - path: results/reports/fastqc/9876T-9876T_M5 - path: results/reports/qualimap/1234N - path: results/reports/qualimap/9876T - path: results/reports/samtools_stats/1234N/1234N.bam.stats diff --git a/tests/test_targeted.yml b/tests/test_targeted.yml index 8ceb0ae552..203c691965 100644 --- a/tests/test_targeted.yml +++ b/tests/test_targeted.yml @@ -1,34 +1,27 @@ - name: Run default pipeline with target bed - command: nextflow run main.nf -profile test_targeted,docker + command: nextflow run main.nf -profile test,targeted,docker tags: - preprocessing - targeted files: - path: results/preprocessing/1234N/mapped/1234N.bam - path: results/preprocessing/1234N/mapped/1234N.bam.bai - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M1/1234N-1234N_M1_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M2/1234N-1234N_M2_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M4/1234N-1234N_M4_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M5/1234N-1234N_M5_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M6/1234N-1234N_M6_2_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_1_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_1_fastqc.zip - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_2_fastqc.html - - path: results/reports/fastqc/1234N-1234N_M7/1234N-1234N_M7_2_fastqc.zip + - path: results/preprocessing/1234N/markduplicates/1234N.md.bam + - path: results/preprocessing/1234N/markduplicates/1234N.md.bam.bai + - path: results/preprocessing/1234N/recal_table/1234N.recal.table + - path: results/preprocessing/1234N/recalibrated/1234N.recal.bam + - path: results/preprocessing/1234N/recalibrated/1234N.recal.bam.bai + - path: results/preprocessing/csv/markduplicates.csv + - path: results/preprocessing/csv/markduplicates_1234N.csv + - path: results/preprocessing/csv/markduplicates_no_table.csv + - path: results/preprocessing/csv/markduplicates_no_table_1234N.csv + - path: results/preprocessing/csv/recalibrated.csv + - path: results/preprocessing/csv/recalibrated_1234N.csv + - path: results/reports/fastqc/1234N-1234N_M1 + - path: results/reports/fastqc/1234N-1234N_M2 + - path: results/reports/fastqc/1234N-1234N_M4 + - path: results/reports/fastqc/1234N-1234N_M5 + - path: results/reports/fastqc/1234N-1234N_M6 + - path: results/reports/fastqc/1234N-1234N_M7 - path: results/reports/qualimap/1234N - path: results/reports/samtools_stats/1234N/1234N.bam.stats \ No newline at end of file diff --git a/tests/test_tools.yml b/tests/test_tools.yml index 1fe5b35cb6..e8a7af1c0a 100644 --- a/tests/test_tools.yml +++ b/tests/test_tools.yml @@ -1,5 +1,5 @@ - name: Run variant calling for Haplotypecaller - command: nextflow run main.nf -profile test_tool,docker --tools Haplotypecaller + command: nextflow run main.nf -profile test,tool,docker --tools Haplotypecaller tags: - haplotypecaller - intervals @@ -9,12 +9,8 @@ - path: results/variant_calling/1234N/haplotypecaller/haplotypecaller_1234N.vcf.gz.tbi - path: results/variant_calling/1234N/haplotypecaller_gvcf/haplotypecaller_gvcf_1234N.vcf.gz - path: results/variant_calling/1234N/haplotypecaller_gvcf/haplotypecaller_gvcf_1234N.vcf.gz.tbi - - path: results/variant_calling/9876T/haplotypecaller/haplotypecaller_9876T.vcf.gz - - path: results/variant_calling/9876T/haplotypecaller/haplotypecaller_9876T.vcf.gz.tbi - - path: results/variant_calling/9876T/haplotypecaller_gvcf/haplotypecaller_gvcf_9876T.vcf.gz - - path: results/variant_calling/9876T/haplotypecaller_gvcf/haplotypecaller_gvcf_9876T.vcf.gz.tbi - name: Run variant calling for Haplotypecaller without intervals - command: nextflow run main.nf -profile test_tool,docker --tools Haplotypecaller --no_intervals + command: nextflow run main.nf -profile test,tool,docker --tools Haplotypecaller --no_intervals tags: - haplotypecaller - no_intervals @@ -24,18 +20,13 @@ - path: results/variant_calling/1234N/haplotypecaller/haplotypecaller_1234N.vcf.gz.tbi - path: results/variant_calling/1234N/haplotypecaller_gvcf/haplotypecaller_gvcf_1234N.vcf.gz - path: results/variant_calling/1234N/haplotypecaller_gvcf/haplotypecaller_gvcf_1234N.vcf.gz.tbi - - path: results/variant_calling/9876T/haplotypecaller/haplotypecaller_9876T.vcf.gz - - path: results/variant_calling/9876T/haplotypecaller/haplotypecaller_9876T.vcf.gz.tbi - - path: results/variant_calling/9876T/haplotypecaller_gvcf/haplotypecaller_gvcf_9876T.vcf.gz - - path: results/variant_calling/9876T/haplotypecaller_gvcf/haplotypecaller_gvcf_9876T.vcf.gz.tbi -- name: Run variant calling for MSIsensor - command: nextflow run main.nf -profile test_tool,docker --tools MSIsensor +- name: Run variant calling for MSIsensor-pro + command: nextflow run main.nf -profile test,tool_pair,docker --tools msisensorpro tags: - - intervals - - msisensor + - msisensorpro - variant_calling files: - - path: results/variant_calling/9876T_vs_1234N/msisensor/msisensor_9876T_vs_1234N_dis.list - - path: results/variant_calling/9876T_vs_1234N/msisensor/msisensor_9876T_vs_1234N_germline.list - - path: results/variant_calling/9876T_vs_1234N/msisensor/msisensor_9876T_vs_1234N.list - - path: results/variant_calling/9876T_vs_1234N/msisensor/msisensor_9876T_vs_1234N_somatic.list \ No newline at end of file + - path: results/variant_calling/9876T_vs_1234N/msisensorpro/msisensorpro_9876T_vs_1234N_dis.list + - path: results/variant_calling/9876T_vs_1234N/msisensorpro/msisensorpro_9876T_vs_1234N_germline.list + - path: results/variant_calling/9876T_vs_1234N/msisensorpro/msisensorpro_9876T_vs_1234N.list + - path: results/variant_calling/9876T_vs_1234N/msisensorpro/msisensorpro_9876T_vs_1234N_somatic.list \ No newline at end of file diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 7ee8f844d7..034651c59c 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -1,147 +1,105 @@ -//////////////////////////////////////////////////// -/* -- INCLUDE SAREK FUNCTIONS -- */ -//////////////////////////////////////////////////// - -include { - check_parameter_existence; - check_parameter_list; - define_anno_list; - define_skip_qc_list; - define_step_list; - define_tool_list; - extract_bam; - extract_fastq; - extract_fastq_from_dir; - extract_recal; - has_extension -} from '../modules/local/functions' - -//////////////////////////////////////////////////// -/* -- SET UP CONFIGURATION VARIABLES -- */ -//////////////////////////////////////////////////// - -step_list = define_step_list() -step = params.step ? params.step.toLowerCase().replaceAll('-', '').replaceAll('_', '') : '' - -if (step.contains(',')) exit 1, 'You can choose only one step, see --help for more information' -if (!check_parameter_existence(step, step_list)) exit 1, "Unknown step ${step}, see --help for more information" - -tool_list = define_tool_list() -tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase().replaceAll('-', '').replaceAll('_', '')} : [] -if (step == 'controlfreec') tools = ['controlfreec'] -if (!check_parameter_list(tools, tool_list)) exit 1, 'Unknown tool(s), see --help for more information' - -skip_qc_list = define_skip_qc_list() -skip_qc = params.skip_qc ? params.skip_qc == 'all' ? skip_qc_list : params.skip_qc.split(',').collect{it.trim().toLowerCase().replaceAll('-', '').replaceAll('_', '')} : [] -if (!check_parameter_list(skip_qc, skip_qc_list)) exit 1, 'Unknown QC tool(s), see --help for more information' - -anno_list = define_anno_list() -annotate_tools = params.annotate_tools ? params.annotate_tools.split(',').collect{it.trim().toLowerCase().replaceAll('-', '')} : [] -if (!check_parameter_list(annotate_tools,anno_list)) exit 1, 'Unknown tool(s) to annotate, see --help for more information' - -if (!(params.aligner in ['bwa-mem', 'bwa-mem2'])) exit 1, 'Unknown aligner, see --help for more information' +/* +======================================================================================== + VALIDATE INPUTS +======================================================================================== +*/ + +def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) + +// Validate input parameters +WorkflowSarek.initialise(params, log) + +// Check input path parameters to see if they exist +checkPathParamList = [ + params.ac_loci, + params.ac_loci_gc, + params.cadd_indels, + params.cadd_indels_tbi, + params.cadd_wg_snvs, + params.cadd_wg_snvs_tbi, + params.chr_dir, + params.chr_length, + params.dbsnp, + params.fasta, + params.fasta_fai, + params.germline_resource, + params.input, + params.known_indels, + params.mappability, + params.multiqc_config, + params.pon, + params.snpeff_cache, + params.target_bed, + params.vep_cache +] + +for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + +// Check mandatory parameters +input_sample = Channel.empty() -// // Check parameters -if ((params.ascat_ploidy && !params.ascat_purity) || (!params.ascat_ploidy && params.ascat_purity)) exit 1, 'Please specify both --ascat_purity and --ascat_ploidy, or none of them' -if (params.cf_window && params.cf_coeff) exit 1, 'Please specify either --cf_window OR --cf_coeff, but not both of them' -if (params.umi && !(params.read_structure1 && params.read_structure2)) exit 1, 'Please specify both --read_structure1 and --read_structure2, when using --umi' +if (params.input) csv_file = file(params.input) +else { + log.warn "No samplesheet specified, attempting to restart from csv files present in ${params.outdir}" + switch (params.step.toLowerCase()) { + case 'mapping': break + case 'prepare_recalibration': csv_file = file("${params.outdir}/preprocessing/csv/markduplicates_no_table.csv", checkIfExists: true); break + case 'recalibrate': csv_file = file("${params.outdir}/preprocessing/csv/markduplicates.csv", checkIfExists: true); break + case 'variant_calling': csv_file = file("${params.outdir}/preprocessing/csv/recalibrated.csv", checkIfExists: true); break + // case 'controlfreec': csv_file = file("${params.outdir}/variant_calling/csv/control-freec_mpileup.csv", checkIfExists: true); break + case 'annotate': break + default: exit 1, "Unknown step ${params.step}" + } +} -// Handle input -tsv_path = null -if (params.input && (has_extension(params.input, "tsv") || has_extension(params.input, "vcf") || has_extension(params.input, "vcf.gz"))) tsv_path = params.input -if (params.input && (has_extension(params.input, "vcf") || has_extension(params.input, "vcf.gz"))) step = "annotate" +input_sample = extract_csv(csv_file) save_bam_mapped = params.skip_markduplicates ? true : params.save_bam_mapped ? true : false -// If no input file specified, trying to get TSV files corresponding to step in the TSV directory -// only for steps preparerecalibration, recalibrate, variantcalling and controlfreec -if (!params.input && params.sentieon) { - switch (step) { - case 'mapping': break - case 'recalibrate': tsv_path = "${params.outdir}/preprocessing/tsv/sentieon_deduped.tsv"; break - case 'variantcalling': tsv_path = "${params.outdir}/preprocessing/tsv/sentieon_recalibrated.tsv"; break - case 'annotate': break - default: exit 1, "Unknown step ${step}" - } -} else if (!params.input && !params.sentieon && !params.skip_markduplicates) { - switch (step) { - case 'mapping': break - case 'preparerecalibration': tsv_path = "${params.outdir}/preprocessing/tsv/markduplicates_no_table.tsv"; break - case 'recalibrate': tsv_path = "${params.outdir}/preprocessing/tsv/markduplicates.tsv"; break - case 'variantcalling': tsv_path = "${params.outdir}/preprocessing/tsv/recalibrated.tsv"; break - case 'controlfreec': tsv_path = "${params.outdir}/variant_calling/tsv/control-freec_mpileup.tsv"; break - case 'annotate': break - default: exit 1, "Unknown step ${step}" - } -} else if (!params.input && !params.sentieon && params.skip_markduplicates) { - switch (step) { - case 'mapping': break - case 'preparerecalibration': tsv_path = "${params.outdir}/preprocessing/tsv/mapped.tsv"; break - case 'recalibrate': tsv_path = "${params.outdir}/preprocessing/tsv/mapped_no_markduplicates.tsv"; break - case 'variantcalling': tsv_path = "${params.outdir}/preprocessing/tsv/recalibrated.tsv"; break - case 'controlfreec': tsv_path = "${params.outdir}/variant_calling/tsv/control-freec_mpileup.tsv"; break - case 'annotate': break - default: exit 1, "Unknown step ${step}" - } +// Save AWS IGenomes file containing annotation version +def anno_readme = params.genomes[ params.genome ]?.readme +if (anno_readme && file(anno_readme).exists()) { + file("${params.outdir}/genome/").mkdirs() + file(anno_readme).copyTo("${params.outdir}/genome/") } -input_sample = Channel.empty() -if (tsv_path) { - tsv_file = file(tsv_path) - switch (step) { - case 'mapping': input_sample = extract_fastq(tsv_file); break - case 'preparerecalibration': input_sample = extract_bam(tsv_file); break - case 'recalibrate': input_sample = extract_recal(tsv_file); break - case 'variantcalling': input_sample = extract_bam(tsv_file); break - case 'controlfreec': input_sample = extract_pileup(tsv_file); break - case 'annotate': break - default: exit 1, "Unknown step ${step}" - } -} else if (params.input && !has_extension(params.input, "tsv")) { - log.info "No TSV file" - if (step != 'mapping') exit 1, 'No step other than "mapping" supports a directory as an input' - log.info "Reading ${params.input} directory" - log.warn "[nf-core/sarek] in ${params.input} directory, all fastqs are assuming to be from the same sample, which is assumed to be a germline one" - input_sample = extract_fastq_from_dir(params.input) - tsv_file = params.input // used in the reports -} else if (tsv_path && step == 'annotate') { - log.info "Annotating ${tsv_path}" -} else if (step == 'annotate') { - log.info "Trying automatic annotation on files in the VariantCalling/ directory" -} else exit 1, 'No sample were defined, see --help' +// Stage dummy file to be used as an optional input where required +ch_dummy_file = file("$projectDir/assets/dummy_file.txt", checkIfExists: true) //////////////////////////////////////////////////// /* -- UPDATE MODULES OPTIONS BASED ON PARAMS -- */ //////////////////////////////////////////////////// -modules = params.modules - -if (params.save_reference) modules['build_intervals'].publish_files = ['bed':'intervals'] -if (params.save_reference) modules['bwa_index'].publish_files = ['amb':'bwa', 'ann':'bwa', 'bwt':'bwa', 'pac':'bwa', 'sa':'bwa'] -if (params.save_reference) modules['bwamem2_index'].publish_files = ['0123':'bwamem2', 'amb':'bwamem2', 'ann':'bwamem2', 'bwt.2bit.64':'bwamem2', 'bwt.8bit.32':'bwamem2', 'pac':'bwamem2'] -if (params.save_reference) modules['create_intervals_bed'].publish_files = ['bed':'intervals'] -if (params.save_reference) modules['dict'].publish_files = ['dict':'dict'] -if (params.save_reference) modules['index_target_bed'].publish_files = ['bed.gz':'target', 'bed.gz.tbi':'target'] -if (params.save_reference) modules['msisensor_scan'].publish_files = ['list':'msi'] -if (params.save_reference) modules['samtools_faidx'].publish_files = ['fai':'fai'] -if (params.save_reference) modules['tabix_dbsnp'].publish_files = ['vcf.gz.tbi':'dbsnp'] -if (params.save_reference) modules['tabix_germline_resource'].publish_files = ['vcf.gz.tbi':'germline_resource'] -if (params.save_reference) modules['tabix_known_indels'].publish_files = ['vcf.gz.tbi':'known_indels'] -if (params.save_reference) modules['tabix_pon'].publish_files = ['vcf.gz.tbi':'pon'] -if (save_bam_mapped) modules['samtools_index_mapping'].publish_files = ['bam':'mapped', 'bai':'mapped'] -if (params.skip_markduplicates) modules['baserecalibrator'].publish_files = ['recal.table':'mapped'] -if (params.skip_markduplicates) modules['gatherbqsrreports'].publish_files = ['recal.table':'mapped'] +def modules = params.modules.clone() + +if (params.save_reference) modules['build_intervals'].publish_files = ['bed':'intervals'] +if (params.save_reference) modules['bwa_index'].publish_files = ['amb':'bwa', 'ann':'bwa', 'bwt':'bwa', 'pac':'bwa', 'sa':'bwa'] +if (params.save_reference) modules['bwamem2_index'].publish_files = ['0123':'bwamem2', 'amb':'bwamem2', 'ann':'bwamem2', 'bwt.2bit.64':'bwamem2', 'bwt.8bit.32':'bwamem2', 'pac':'bwamem2'] +if (params.save_reference) modules['create_intervals_bed'].publish_files = ['bed':'intervals'] +if (params.save_reference) modules['dict'].publish_files = ['dict':'dict'] +if (params.save_reference) modules['index_target_bed'].publish_files = ['bed.gz':'target', 'bed.gz.tbi':'target'] +if (params.save_reference) modules['msisensorpro_scan'].publish_files = ['list':'msi'] +if (params.save_reference) modules['samtools_faidx'].publish_files = ['fai':'fai'] +if (params.save_reference) modules['tabix_dbsnp'].publish_files = ['vcf.gz.tbi':'dbsnp'] +if (params.save_reference) modules['tabix_germline_resource'].publish_files = ['vcf.gz.tbi':'germline_resource'] +if (params.save_reference) modules['tabix_known_indels'].publish_files = ['vcf.gz.tbi':'known_indels'] +if (params.save_reference) modules['tabix_pon'].publish_files = ['vcf.gz.tbi':'pon'] +if (save_bam_mapped) modules['samtools_index_mapping'].publish_files = ['bam':'mapped', 'bai':'mapped'] +if (params.skip_markduplicates) modules['baserecalibrator'].publish_files = ['recal.table':'mapped'] +if (params.skip_markduplicates) modules['gatherbqsrreports'].publish_files = ['recal.table':'mapped'] +if (!params.skip_markduplicates) modules['baserecalibrator'].publish_files = false // Initialize file channels based on params, defined in the params.genomes[params.genome] scope -chr_dir = params.chr_dir ? file(params.chr_dir) : [] -chr_length = params.chr_length ? file(params.chr_length) : [] -dbsnp = params.dbsnp ? file(params.dbsnp) : [] -fasta = params.fasta ? file(params.fasta) : [] -germline_resource = params.germline_resource ? file(params.germline_resource) : [] -known_indels = params.known_indels ? file(params.known_indels) : [] -loci = params.ac_loci ? file(params.ac_loci) : [] -loci_gc = params.ac_loci_gc ? file(params.ac_loci_gc) : [] -mappability = params.mappability ? file(params.mappability) : [] +chr_dir = params.chr_dir ? file(params.chr_dir) : ch_dummy_file +chr_length = params.chr_length ? file(params.chr_length) : ch_dummy_file +dbsnp = params.dbsnp ? file(params.dbsnp) : ch_dummy_file +fasta = params.fasta ? file(params.fasta) : ch_dummy_file +fasta_fai = params.fasta_fai ? file(params.fasta_fai) : ch_dummy_file +germline_resource = params.germline_resource ? file(params.germline_resource) : ch_dummy_file +known_indels = params.known_indels ? file(params.known_indels) : ch_dummy_file +loci = params.ac_loci ? file(params.ac_loci) : ch_dummy_file +loci_gc = params.ac_loci_gc ? file(params.ac_loci_gc) : ch_dummy_file +mappability = params.mappability ? file(params.mappability) : ch_dummy_file // Initialize value channels based on params, defined in the params.genomes[params.genome] scope snpeff_db = params.snpeff_db ?: Channel.empty() @@ -149,41 +107,38 @@ snpeff_species = params.species ?: Channel.empty() vep_cache_version = params.vep_cache_version ?: Channel.empty() // Initialize files channels based on params, not defined within the params.genomes[params.genome] scope -cadd_indels = params.cadd_indels ? file(params.cadd_indels) : [] -cadd_indels_tbi = params.cadd_indels_tbi ? file(params.cadd_indels_tbi) : [] -cadd_wg_snvs = params.cadd_wg_snvs ? file(params.cadd_wg_snvs) : [] -cadd_wg_snvs_tbi = params.cadd_wg_snvs_tbi ? file(params.cadd_wg_snvs_tbi) : [] -pon = params.pon ? file(params.pon) : [] -snpeff_cache = params.snpeff_cache ? file(params.snpeff_cache) : [] -target_bed = params.target_bed ? file(params.target_bed) : [] -vep_cache = params.vep_cache ? file(params.vep_cache) : [] +cadd_indels = params.cadd_indels ? file(params.cadd_indels) : ch_dummy_file +cadd_indels_tbi = params.cadd_indels_tbi ? file(params.cadd_indels_tbi) : ch_dummy_file +cadd_wg_snvs = params.cadd_wg_snvs ? file(params.cadd_wg_snvs) : ch_dummy_file +cadd_wg_snvs_tbi = params.cadd_wg_snvs_tbi ? file(params.cadd_wg_snvs_tbi) : ch_dummy_file +pon = params.pon ? file(params.pon) : ch_dummy_file +snpeff_cache = params.snpeff_cache ? file(params.snpeff_cache) : ch_dummy_file +target_bed = params.target_bed ? file(params.target_bed) : ch_dummy_file +vep_cache = params.vep_cache ? file(params.vep_cache) : ch_dummy_file // Initialize value channels based on params, not defined within the params.genomes[params.genome] scope read_structure1 = params.read_structure1 ?: Channel.empty() read_structure2 = params.read_structure2 ?: Channel.empty() -if ('mutect2' in tools && !(params.pon)) log.warn "[nf-core/sarek] Mutect2 was requested, but as no panel of normals were given, results will not be optimal" -if (params.sentieon) log.warn "[nf-core/sarek] Sentieon will be used, only works if Sentieon is available where nf-core/sarek is run" - //////////////////////////////////////////////////// /* -- INCLUDE LOCAL SUBWORKFLOWS -- */ //////////////////////////////////////////////////// -include { BUILD_INDICES } from '../subworkflow/local/build_indices' addParams( +include { BUILD_INDICES } from '../subworkflows/local/build_indices' addParams( build_intervals_options: modules['build_intervals'], bwa_index_options: modules['bwa_index'], bwamem2_index_options: modules['bwamem2_index'], create_intervals_bed_options: modules['create_intervals_bed'], - gatk_dict_options: modules['dict'], + gatk4_dict_options: modules['dict'], index_target_bed_options: modules['index_target_bed'], - msisensor_scan_options: modules['msisensor_scan'], + msisensorpro_scan_options: modules['msisensorpro_scan'], samtools_faidx_options: modules['samtools_faidx'], tabix_dbsnp_options: modules['tabix_dbsnp'], tabix_germline_resource_options: modules['tabix_germline_resource'], tabix_known_indels_options: modules['tabix_known_indels'], tabix_pon_options: modules['tabix_pon'] ) -include { MAPPING } from '../subworkflow/local/mapping' addParams( +include { MAPPING } from '../subworkflows/nf-core/mapping' addParams( bwamem1_mem_options: modules['bwa_mem1_mem'], bwamem1_mem_tumor_options: modules['bwa_mem1_mem_tumor'], bwamem2_mem_options: modules['bwa_mem2_mem'], @@ -193,32 +148,37 @@ include { MAPPING } from '../subworkflow/local/mapping' addParams( samtools_index_options: modules['samtools_index_mapping'], samtools_stats_options: modules['samtools_stats_mapping'] ) -include { MARKDUPLICATES } from '../subworkflow/local/markduplicates' addParams( - markduplicates_options: modules['markduplicates'] +include { MAPPING_CSV } from '../subworkflows/local/mapping_csv' +include { MARKDUPLICATES } from '../subworkflows/nf-core/markduplicates' addParams( + markduplicates_options: modules['markduplicates'], + markduplicatesspark_options: modules['markduplicatesspark'] ) -include { PREPARE_RECALIBRATION } from '../subworkflow/local/prepare_recalibration' addParams( +include { MARKDUPLICATES_CSV } from '../subworkflows/local/markduplicates_csv' +include { PREPARE_RECALIBRATION } from '../subworkflows/nf-core/prepare_recalibration' addParams( baserecalibrator_options: modules['baserecalibrator'], gatherbqsrreports_options: modules['gatherbqsrreports'] ) -include { RECALIBRATE } from '../subworkflow/local/recalibrate' addParams( +include { PREPARE_RECALIBRATION_CSV } from '../subworkflows/local/prepare_recalibration_csv' +include { RECALIBRATE } from '../subworkflows/nf-core/recalibrate' addParams( applybqsr_options: modules['applybqsr'], merge_bam_options: modules['merge_bam_recalibrate'], qualimap_bamqc_options: modules['qualimap_bamqc_recalibrate'], samtools_index_options: modules['samtools_index_recalibrate'], samtools_stats_options: modules['samtools_stats_recalibrate'] ) -include { GERMLINE_VARIANT_CALLING } from '../subworkflow/local/germline_variant_calling' addParams( +include { RECALIBRATE_CSV } from '../subworkflows/local/recalibrate_csv' +include { GERMLINE_VARIANT_CALLING } from '../subworkflows/local/germline_variant_calling' addParams( concat_gvcf_options: modules['concat_gvcf'], concat_haplotypecaller_options: modules['concat_haplotypecaller'], genotypegvcf_options: modules['genotypegvcf'], haplotypecaller_options: modules['haplotypecaller'], strelka_options: modules['strelka_germline'] ) -// include { TUMOR_VARIANT_CALLING } from '../subworkflow/local/tumor_variant_calling' addParams( +// include { TUMOR_VARIANT_CALLING } from '../subworkflows/local/tumor_variant_calling' addParams( // ) -include { PAIR_VARIANT_CALLING } from '../subworkflow/local/pair_variant_calling' addParams( +include { PAIR_VARIANT_CALLING } from '../subworkflows/local/pair_variant_calling' addParams( manta_options: modules['manta_somatic'], - msisensor_msi_options: modules['msisensor_msi'], + msisensorpro_msi_options: modules['msisensorpro_msi'], strelka_bp_options: modules['strelka_somatic_bp'], strelka_options: modules['strelka_somatic'] ) @@ -233,7 +193,7 @@ include { MULTIQC } from '../modules/nf-core/software/mult /* -- INCLUDE NF-CORE SUBWORKFLOWS -- */ //////////////////////////////////////////////////// -include { FASTQC_TRIMGALORE } from '../subworkflow/nf-core/fastqc_trimgalore' addParams( +include { FASTQC_TRIMGALORE } from '../subworkflows/nf-core/fastqc_trimgalore' addParams( fastqc_options: modules['fastqc'], trimgalore_options: modules['trimgalore'] ) @@ -247,12 +207,11 @@ workflow SAREK { BUILD_INDICES( dbsnp, fasta, + fasta_fai, germline_resource, known_indels, pon, - step, - target_bed, - tools) + target_bed) intervals = BUILD_INDICES.out.intervals @@ -265,7 +224,10 @@ workflow SAREK { known_indels_tbi = params.known_indels ? params.known_indels_index ? file(params.known_indels_index) : BUILD_INDICES.out.known_indels_tbi.collect() : [] pon_tbi = params.pon ? params.pon_index ? file(params.pon_index) : BUILD_INDICES.out.pon_tbi : [] - msisensor_scan = BUILD_INDICES.out.msisensor_scan + known_sites = [dbsnp, known_indels] + known_sites_tbi = dbsnp_tbi.mix(known_indels_tbi).collect() + + msisensorpro_scan = BUILD_INDICES.out.msisensorpro_scan target_bed_gz_tbi = BUILD_INDICES.out.target_bed_gz_tbi //////////////////////////////////////////////////// @@ -282,140 +244,186 @@ workflow SAREK { // trim only with `--trim_fastq` // additional options to be set up - FASTQC_TRIMGALORE( - input_sample, - ('fastqc' in skip_qc || step != "mapping"), - !(params.trim_fastq)) - - reads_input = FASTQC_TRIMGALORE.out.reads - - qc_reports = qc_reports.mix( - FASTQC_TRIMGALORE.out.fastqc_html, - FASTQC_TRIMGALORE.out.fastqc_zip, - FASTQC_TRIMGALORE.out.trim_html, - FASTQC_TRIMGALORE.out.trim_log, - FASTQC_TRIMGALORE.out.trim_zip) - - // STEP 1: MAPPING READS TO REFERENCE GENOME WITH BWA-MEM - - MAPPING( - ('bamqc' in skip_qc), - ('samtools' in skip_qc), - bwa, - fai, - fasta, - reads_input, - save_bam_mapped, - step, - target_bed) - - bam_mapped = MAPPING.out.bam - bam_mapped_qc = MAPPING.out.qc - - qc_reports = qc_reports.mix(bam_mapped_qc) - - // STEP 2: MARKING DUPLICATES - - bam_markduplicates = channel.empty() - - if (step == 'preparerecalibration') { - if (params.skip_markduplicates) bam_markduplicates = bam_mapped - else { - MARKDUPLICATES(bam_mapped, step) + if (params.step == 'mapping') { + FASTQC_TRIMGALORE( + input_sample, + !(params.trim_fastq)) + + reads_input = FASTQC_TRIMGALORE.out.reads + + qc_reports = qc_reports.mix( + FASTQC_TRIMGALORE.out.fastqc_html, + FASTQC_TRIMGALORE.out.fastqc_zip, + FASTQC_TRIMGALORE.out.trim_html, + FASTQC_TRIMGALORE.out.trim_log, + FASTQC_TRIMGALORE.out.trim_zip) + + // STEP 1: MAPPING READS TO REFERENCE GENOME + MAPPING( + 'bamqc' in params.skip_qc, + 'samtools' in params.skip_qc, + params.aligner, + bwa, + fai, + fasta, + reads_input, + target_bed) + + bam_mapped = MAPPING.out.bam + bam_mapped_qc = MAPPING.out.qc + + qc_reports = qc_reports.mix(bam_mapped_qc) + + // Create CSV to restart from this step + MAPPING_CSV(bam_mapped, save_bam_mapped, params.skip_markduplicates) + + if (params.skip_markduplicates) { + bam_markduplicates = bam_mapped + } else { + // STEP 2: MARKING DUPLICATES + MARKDUPLICATES(bam_mapped, params.use_gatk_spark, !('markduplicates' in params.skip_qc)) bam_markduplicates = MARKDUPLICATES.out.bam + + // Create CSV to restart from this step + MARKDUPLICATES_CSV(bam_markduplicates) } } - if (step == 'preparerecalibration') bam_markduplicates = input_sample - - // STEP 3: CREATING RECALIBRATION TABLES + if (params.step.toLowerCase() == 'prepare_recalibration') bam_markduplicates = input_sample - PREPARE_RECALIBRATION( - bam_markduplicates, - dbsnp, - dbsnp_tbi, - dict, - fai, - fasta, - intervals, - known_indels, - known_indels_tbi, - step) - - table_bqsr = PREPARE_RECALIBRATION.out.table_bqsr - - // STEP 4: RECALIBRATING - bam_applybqsr = bam_markduplicates.join(table_bqsr) - - if (step == 'recalibrate') bam_applybqsr = input_sample - - RECALIBRATE( - ('bamqc' in skip_qc), - ('samtools' in skip_qc), - bam_applybqsr, - dict, - fai, - fasta, - intervals, - step, - target_bed) + if (params.step.toLowerCase() in ['mapping', 'prepare_recalibration']) { + // STEP 3: CREATING RECALIBRATION TABLES + PREPARE_RECALIBRATION( + bam_markduplicates, + dict, + fai, + fasta, + intervals, + known_sites, + known_sites_tbi, + params.no_intervals) - bam_recalibrated = RECALIBRATE.out.bam - bam_recalibrated_qc = RECALIBRATE.out.qc + table_bqsr = PREPARE_RECALIBRATION.out.table_bqsr + PREPARE_RECALIBRATION_CSV(table_bqsr) - qc_reports = qc_reports.mix(bam_recalibrated_qc) + bam_applybqsr = bam_markduplicates.join(table_bqsr) + } - bam_variant_calling = bam_recalibrated + if (params.step.toLowerCase() == 'recalibrate') bam_applybqsr = input_sample - if (step == 'variantcalling') bam_variant_calling = input_sample + if (params.step.toLowerCase() in ['mapping', 'prepare_recalibration', 'recalibrate']) { + // STEP 4: RECALIBRATING + RECALIBRATE( + ('bamqc' in params.skip_qc), + ('samtools' in params.skip_qc), + bam_applybqsr, + dict, + fai, + fasta, + intervals, + target_bed) - //////////////////////////////////////////////////// - /* -- GERMLINE VARIANT CALLING -- */ - //////////////////////////////////////////////////// + bam_recalibrated = RECALIBRATE.out.bam + bam_recalibrated_qc = RECALIBRATE.out.qc - GERMLINE_VARIANT_CALLING( - bam_variant_calling, - dbsnp, - dbsnp_tbi, - dict, - fai, - fasta, - intervals, - target_bed, - target_bed_gz_tbi, - tools) + RECALIBRATE_CSV(bam_recalibrated) - //////////////////////////////////////////////////// - /* -- SOMATIC VARIANT CALLING -- */ - //////////////////////////////////////////////////// + qc_reports = qc_reports.mix(bam_recalibrated_qc) - // TUMOR_VARIANT_CALLING( - // bam_variant_calling, - // dbsnp, - // dbsnp_tbi, - // dict, - // fai, - // fasta, - // intervals, - // target_bed, - // target_bed_gz_tbi, - // tools) - - PAIR_VARIANT_CALLING( - bam_variant_calling, - dbsnp, - dbsnp_tbi, - dict, - fai, - fasta, - intervals, - msisensor_scan, - target_bed, - target_bed_gz_tbi, - tools) + bam_variant_calling = bam_recalibrated + } - //////////////////////////////////////////////////// - /* -- ANNOTATION -- */ - //////////////////////////////////////////////////// + if (params.step.toLowerCase() == 'variant_calling') bam_variant_calling = input_sample + + if (params.tools != null) { + + //////////////////////////////////////////////////// + /* -- GERMLINE VARIANT CALLING -- */ + //////////////////////////////////////////////////// + + GERMLINE_VARIANT_CALLING( + bam_variant_calling, + dbsnp, + dbsnp_tbi, + dict, + fai, + fasta, + intervals, + target_bed, + target_bed_gz_tbi) + + //////////////////////////////////////////////////// + /* -- SOMATIC VARIANT CALLING -- */ + //////////////////////////////////////////////////// + + // TUMOR_VARIANT_CALLING( + // bam_variant_calling, + // dbsnp, + // dbsnp_tbi, + // dict, + // fai, + // fasta, + // intervals, + // target_bed, + // target_bed_gz_tbi) + + PAIR_VARIANT_CALLING( + bam_variant_calling, + dbsnp, + dbsnp_tbi, + dict, + fai, + fasta, + intervals, + msisensorpro_scan, + target_bed, + target_bed_gz_tbi) + + //////////////////////////////////////////////////// + /* -- ANNOTATION -- */ + //////////////////////////////////////////////////// + } +} +def extract_csv(csv_file) { + Channel.from(csv_file).splitCsv(header: true).map{ row -> + def meta = [:] + + meta.patient = row.patient.toString() + meta.sample = row.sample.toString() + + // If no gender specified, gender is not considered (only used for somatic CNV) + if (row.gender == null) { + meta.gender = "NA" + } else meta.gender = row.gender.toString() + + // If no status specified, sample is considered normal + if (row.status == null) { + meta.status = 0 + } else meta.status = row.status.toInteger() + + if (row.lane != null) { + // mapping with fastq + meta.id = "${row.sample}-${row.lane}".toString() + def read1 = file(row.fastq1, checkIfExists: true) + def read2 = file(row.fastq2, checkIfExists: true) + def CN = params.sequencing_center ? "CN:${params.sequencing_center}\\t" : '' + def read_group = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:ILLUMINA\"" + meta.read_group = read_group.toString() + return [meta, [read1, read2]] + } else if (row.table != null) { + // recalibration + meta.id = meta.sample + def bam = file(row.bam, checkIfExists: true) + def bai = file(row.bai, checkIfExists: true) + def table = file(row.table, checkIfExists: true) + return [meta, bam, bai, table] + } else { + // prepare_recalibration or variant_calling + meta.id = meta.sample + def bam = file(row.bam, checkIfExists: true) + def bai = file(row.bai, checkIfExists: true) + return [meta, bam, bai] + } + } } \ No newline at end of file