Adding meta to multivcfanalyzer, updating test to nf-test (#5773)

* Adding meta to multivcfanalyzer, updating test to nf-test * Apply suggestions from code review Co-authored-by: James A. Fellows Yates <[email protected]> * fix meta.yml * Add sort to vcfs * Add tab to tags * add checks for files due to failing md5 sums across docker,conda,singularity --------- Co-authored-by: James A. Fellows Yates <[email protected]> Co-authored-by: Simon Pearce <[email protected]>
nf-core · Jun 21, 2024 · edc4ccb · edc4ccb
1 parent 73a6d7e
commit edc4ccb
Show file tree

Hide file tree

Showing 9 changed files with 410 additions and 99 deletions.
diff --git a/modules/nf-core/multivcfanalyzer/main.nf b/modules/nf-core/multivcfanalyzer/main.nf
@@ -8,30 +8,30 @@ process MULTIVCFANALYZER {
         'biocontainers/multivcfanalyzer:0.85.2--hdfd78af_1' }"
 
     input:
-    path vcfs
-    path fasta
-    path snpeff_results
-    path gff
+    tuple val(meta), path(vcfs)
+    tuple val(meta2), path(fasta)
+    tuple val(meta3), path(snpeff_results)
+    tuple val(meta4), path(gff)
     val allele_freqs
     val genotype_quality
     val coverage
     val homozygous_freq
     val heterozygous_freq
-    path gff_exclude
+    tuple val(meta5), path(gff_exclude)
 
 
     output:
-    path('fullAlignment.fasta.gz')                       , emit: full_alignment
-    path('info.txt')                                     , emit: info_txt
-    path('snpAlignment.fasta.gz')                        , emit: snp_alignment
-    path('snpAlignmentIncludingRefGenome.fasta.gz')      , emit: snp_genome_alignment
-    path('snpStatistics.tsv')                            , emit: snpstatistics
-    path('snpTable.tsv')                                 , emit: snptable
-    path('snpTableForSnpEff.tsv')                        , emit: snptable_snpeff
-    path('snpTableWithUncertaintyCalls.tsv')             , emit: snptable_uncertainty
-    path('structureGenotypes.tsv')                       , emit: structure_genotypes
-    path('structureGenotypes_noMissingData-Columns.tsv') , emit: structure_genotypes_nomissing
-    path('MultiVCFAnalyzer.json')                        , emit: json
+    tuple val(meta), path('fullAlignment.fasta.gz')                       , emit: full_alignment
+    tuple val(meta), path('info.txt')                                     , emit: info_txt
+    tuple val(meta), path('snpAlignment.fasta.gz')                        , emit: snp_alignment
+    tuple val(meta), path('snpAlignmentIncludingRefGenome.fasta.gz')      , emit: snp_genome_alignment
+    tuple val(meta), path('snpStatistics.tsv')                            , emit: snpstatistics
+    tuple val(meta), path('snpTable.tsv')                                 , emit: snptable
+    tuple val(meta), path('snpTableForSnpEff.tsv')                        , emit: snptable_snpeff
+    tuple val(meta), path('snpTableWithUncertaintyCalls.tsv')             , emit: snptable_uncertainty
+    tuple val(meta), path('structureGenotypes.tsv')                       , emit: structure_genotypes
+    tuple val(meta), path('structureGenotypes_noMissingData-Columns.tsv') , emit: structure_genotypes_nomissing
+    tuple val(meta), path('MultiVCFAnalyzer.json')                        , emit: json
     path "versions.yml"                                  , emit: versions
 
     when:
@@ -58,7 +58,7 @@ process MULTIVCFANALYZER {
         ${homozygous_freq}  \\
         ${heterozygous_freq}  \\
         ${cmd_gff_exclude}  \\
-        ${vcfs.join(" ")}
+        ${vcfs.sort().join(" ")}
 
     gzip \\
         $args2 \\
@@ -69,4 +69,32 @@ process MULTIVCFANALYZER {
         multivcfanalyzer: \$(echo \$(multivcfanalyzer --help | head -n 1) | cut -f 3 -d ' ' )
     END_VERSIONS
     """
+    stub:
+
+    def args2 = task.ext.args2 ?: ''
+
+    def cmd_snpeff_results = snpeff_results ? "${snpeff_results}" : "NA"
+    def cmd_gff            = gff ? "${gff}" : "NA"
+    def cmd_allele_freqs   = allele_freqs ? "T" : "F"
+    def cmd_gff_exclude    = gff_exclude ? "${gff}" : "NA"
+
+    """
+    echo "" | gzip > fullAlignment.fasta.gz
+    touch info.txt
+    echo "" | gzip > snpAlignment.fasta.gz
+    echo "" | gzip > snpAlignmentIncludingRefGenome.fasta.gz
+    touch snpStatistics.tsv
+    touch snpTable.tsv
+    touch snpTableForSnpEff.tsv
+    touch snpTableWithUncertaintyCalls.tsv
+    touch structureGenotypes.tsv
+    touch structureGenotypes_noMissingData-Columns.tsv
+    touch MultiVCFAnalyzer.json
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        multivcfanalyzer: \$(echo \$(multivcfanalyzer --help | head -n 1) | cut -f 3 -d ' ' )
+    END_VERSIONS
+
+    """
 }
diff --git a/modules/nf-core/multivcfanalyzer/meta.yml b/modules/nf-core/multivcfanalyzer/meta.yml
@@ -14,7 +14,7 @@ tools:
       documentation: "https://github.com/alexherbig/MultiVCFAnalyzer"
       tool_dev_url: "https://github.com/alexherbig/MultiVCFAnalyzer"
       doi: "10.1038/nature13591"
-      licence: "['GPL >=3']"
+      licence: ["GPL >=3"]
 input:
   - vcfs:
       type: file
@@ -48,10 +48,10 @@ input:
         Minimum number of a reads that a position must be covered by to be
         reported
   - homozygous_freq:
-      type: number
+      type: integer
       description: Fraction of reads a base must have to be called 'homozygous'
   - heterozygous_freq:
-      type: mumber
+      type: integer
       description: |
         Fraction of which whereby if a call falls above this value, and lower
         than the homozygous threshold, a base will be called 'heterozygous'.

diff --git a/modules/nf-core/multivcfanalyzer/tests/main.nf.test b/modules/nf-core/multivcfanalyzer/tests/main.nf.test
@@ -0,0 +1,128 @@
+nextflow_process {
+
+    name "Test Process MULTIVCFANALYZER"
+    script "../main.nf"
+    process "MULTIVCFANALYZER"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "multivcfanalyzer"
+    tag "gunzip"
+    tag "gatk/unifiedgenotyper"
+
+    setup {
+            run("GATK_UNIFIEDGENOTYPER") {
+                script "../../gatk/unifiedgenotyper/main.nf"
+                process{
+                    """
+                    input[0] = Channel.of([ [ id:'test' ], // meta map
+                           file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
+                           file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)
+                         ],
+                         [ [ id:'test2' ], // meta map
+                           file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true),
+                           file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true)
+                         ]
+                         )
+                    input[1] = [ [],
+                        file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+                        ]
+                    input[2] = [ [],
+                        file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
+                        ]
+                    input[3] = [ [],
+                        file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
+                        ]
+                    input[4] = [[],[]]
+                    input[5] = [[],[]]
+                    input[6] = [[],[]]
+                    input[7] = [[],[]]
+                    """
+                }
+            }
+            run("GUNZIP") {
+                script "../../gunzip/main.nf"
+                process{
+                    """
+                    input[0] = GATK_UNIFIEDGENOTYPER.out.vcf
+                    """
+                }
+            }
+        }
+
+    test("sarscov2 - vcf") {
+
+        when {
+            process {
+                """
+                input[0] = GUNZIP.out.gunzip.collect{ meta, vcf -> vcf }.map{ vcf -> [[ id: 'testVCF'], vcf]}
+                input[1] =  [ [] ,
+                    file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+                    ]
+                input[2] = [[],[]]
+                input[3] = [[],[]]
+                input[4] = true
+                input[5] = 30
+                input[6] = 5
+                input[7] = 0.8
+                input[8] = 0.2
+                input[9] = [[],[]]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                                  process.out.snptable_snpeff,
+                                  process.out.json,
+                                  process.out.versions,
+                                  file(process.out.info_txt[0][1]).readLines().any { it.contains('Run finished:') },
+                                  file(process.out.full_alignment[0][1]).readLines().any { it.contains('>') },
+                                  file(process.out.snp_alignment[0][1]).readLines().any { it.contains('>') },
+                                  file(process.out.snp_genome_alignment[0][1]).readLines().any { it.contains('>Reference') },
+                                  file(process.out.snpstatistics[0][1]).readLines().any { it.contains('SNP statistics for') },
+                                  file(process.out.snptable[0][1]).readLines().any { it.contains('Position') },
+                                  file(process.out.snptable_uncertainty[0][1]).readLines().any { it.contains('Position') },
+                                  file(process.out.structure_genotypes[0][1]).readLines().any { it.contains('-1') },
+                                  file(process.out.structure_genotypes_nomissing[0][1]).readLines().any { it.contains('-1') }).match()
+                                }
+            )
+        }
+
+    }
+
+    test("sarscov2 - vcf - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = GUNZIP.out.gunzip.collect{ meta, vcf -> vcf }.map{ vcf -> [[ id: 'testVCF'], vcf]}
+                input[1] =  [ [] ,
+                    file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+                    ]
+                input[2] = [[],[]]
+                input[3] = [[],[]]
+                input[4] = true
+                input[5] = 30
+                input[6] = 5
+                input[7] = 0.8
+                input[8] = 0.2
+                input[9] = [[],[]]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}