Skip to content

Commit

Permalink
Adding meta to multivcfanalyzer, updating test to nf-test (#5773)
Browse files Browse the repository at this point in the history
* Adding meta to multivcfanalyzer, updating test to nf-test

* Apply suggestions from code review

Co-authored-by: James A. Fellows Yates <[email protected]>

* fix meta.yml

* Add sort to vcfs

* Add tab to tags

* add checks for files due to failing md5 sums across docker,conda,singularity

---------

Co-authored-by: James A. Fellows Yates <[email protected]>
Co-authored-by: Simon Pearce <[email protected]>
  • Loading branch information
3 people authored Jun 21, 2024
1 parent 73a6d7e commit edc4ccb
Show file tree
Hide file tree
Showing 9 changed files with 410 additions and 99 deletions.
62 changes: 45 additions & 17 deletions modules/nf-core/multivcfanalyzer/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,30 +8,30 @@ process MULTIVCFANALYZER {
'biocontainers/multivcfanalyzer:0.85.2--hdfd78af_1' }"

input:
path vcfs
path fasta
path snpeff_results
path gff
tuple val(meta), path(vcfs)
tuple val(meta2), path(fasta)
tuple val(meta3), path(snpeff_results)
tuple val(meta4), path(gff)
val allele_freqs
val genotype_quality
val coverage
val homozygous_freq
val heterozygous_freq
path gff_exclude
tuple val(meta5), path(gff_exclude)


output:
path('fullAlignment.fasta.gz') , emit: full_alignment
path('info.txt') , emit: info_txt
path('snpAlignment.fasta.gz') , emit: snp_alignment
path('snpAlignmentIncludingRefGenome.fasta.gz') , emit: snp_genome_alignment
path('snpStatistics.tsv') , emit: snpstatistics
path('snpTable.tsv') , emit: snptable
path('snpTableForSnpEff.tsv') , emit: snptable_snpeff
path('snpTableWithUncertaintyCalls.tsv') , emit: snptable_uncertainty
path('structureGenotypes.tsv') , emit: structure_genotypes
path('structureGenotypes_noMissingData-Columns.tsv') , emit: structure_genotypes_nomissing
path('MultiVCFAnalyzer.json') , emit: json
tuple val(meta), path('fullAlignment.fasta.gz') , emit: full_alignment
tuple val(meta), path('info.txt') , emit: info_txt
tuple val(meta), path('snpAlignment.fasta.gz') , emit: snp_alignment
tuple val(meta), path('snpAlignmentIncludingRefGenome.fasta.gz') , emit: snp_genome_alignment
tuple val(meta), path('snpStatistics.tsv') , emit: snpstatistics
tuple val(meta), path('snpTable.tsv') , emit: snptable
tuple val(meta), path('snpTableForSnpEff.tsv') , emit: snptable_snpeff
tuple val(meta), path('snpTableWithUncertaintyCalls.tsv') , emit: snptable_uncertainty
tuple val(meta), path('structureGenotypes.tsv') , emit: structure_genotypes
tuple val(meta), path('structureGenotypes_noMissingData-Columns.tsv') , emit: structure_genotypes_nomissing
tuple val(meta), path('MultiVCFAnalyzer.json') , emit: json
path "versions.yml" , emit: versions

when:
Expand All @@ -58,7 +58,7 @@ process MULTIVCFANALYZER {
${homozygous_freq} \\
${heterozygous_freq} \\
${cmd_gff_exclude} \\
${vcfs.join(" ")}
${vcfs.sort().join(" ")}
gzip \\
$args2 \\
Expand All @@ -69,4 +69,32 @@ process MULTIVCFANALYZER {
multivcfanalyzer: \$(echo \$(multivcfanalyzer --help | head -n 1) | cut -f 3 -d ' ' )
END_VERSIONS
"""
stub:

def args2 = task.ext.args2 ?: ''

def cmd_snpeff_results = snpeff_results ? "${snpeff_results}" : "NA"
def cmd_gff = gff ? "${gff}" : "NA"
def cmd_allele_freqs = allele_freqs ? "T" : "F"
def cmd_gff_exclude = gff_exclude ? "${gff}" : "NA"

"""
echo "" | gzip > fullAlignment.fasta.gz
touch info.txt
echo "" | gzip > snpAlignment.fasta.gz
echo "" | gzip > snpAlignmentIncludingRefGenome.fasta.gz
touch snpStatistics.tsv
touch snpTable.tsv
touch snpTableForSnpEff.tsv
touch snpTableWithUncertaintyCalls.tsv
touch structureGenotypes.tsv
touch structureGenotypes_noMissingData-Columns.tsv
touch MultiVCFAnalyzer.json
cat <<-END_VERSIONS > versions.yml
"${task.process}":
multivcfanalyzer: \$(echo \$(multivcfanalyzer --help | head -n 1) | cut -f 3 -d ' ' )
END_VERSIONS
"""
}
6 changes: 3 additions & 3 deletions modules/nf-core/multivcfanalyzer/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ tools:
documentation: "https://github.com/alexherbig/MultiVCFAnalyzer"
tool_dev_url: "https://github.com/alexherbig/MultiVCFAnalyzer"
doi: "10.1038/nature13591"
licence: "['GPL >=3']"
licence: ["GPL >=3"]
input:
- vcfs:
type: file
Expand Down Expand Up @@ -48,10 +48,10 @@ input:
Minimum number of a reads that a position must be covered by to be
reported
- homozygous_freq:
type: number
type: integer
description: Fraction of reads a base must have to be called 'homozygous'
- heterozygous_freq:
type: mumber
type: integer
description: |
Fraction of which whereby if a call falls above this value, and lower
than the homozygous threshold, a base will be called 'heterozygous'.
Expand Down
128 changes: 128 additions & 0 deletions modules/nf-core/multivcfanalyzer/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
nextflow_process {

name "Test Process MULTIVCFANALYZER"
script "../main.nf"
process "MULTIVCFANALYZER"

tag "modules"
tag "modules_nfcore"
tag "multivcfanalyzer"
tag "gunzip"
tag "gatk/unifiedgenotyper"

setup {
run("GATK_UNIFIEDGENOTYPER") {
script "../../gatk/unifiedgenotyper/main.nf"
process{
"""
input[0] = Channel.of([ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)
],
[ [ id:'test2' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true)
]
)
input[1] = [ [],
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
input[2] = [ [],
file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
]
input[3] = [ [],
file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
]
input[4] = [[],[]]
input[5] = [[],[]]
input[6] = [[],[]]
input[7] = [[],[]]
"""
}
}
run("GUNZIP") {
script "../../gunzip/main.nf"
process{
"""
input[0] = GATK_UNIFIEDGENOTYPER.out.vcf
"""
}
}
}

test("sarscov2 - vcf") {

when {
process {
"""
input[0] = GUNZIP.out.gunzip.collect{ meta, vcf -> vcf }.map{ vcf -> [[ id: 'testVCF'], vcf]}
input[1] = [ [] ,
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
input[2] = [[],[]]
input[3] = [[],[]]
input[4] = true
input[5] = 30
input[6] = 5
input[7] = 0.8
input[8] = 0.2
input[9] = [[],[]]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.snptable_snpeff,
process.out.json,
process.out.versions,
file(process.out.info_txt[0][1]).readLines().any { it.contains('Run finished:') },
file(process.out.full_alignment[0][1]).readLines().any { it.contains('>') },
file(process.out.snp_alignment[0][1]).readLines().any { it.contains('>') },
file(process.out.snp_genome_alignment[0][1]).readLines().any { it.contains('>Reference') },
file(process.out.snpstatistics[0][1]).readLines().any { it.contains('SNP statistics for') },
file(process.out.snptable[0][1]).readLines().any { it.contains('Position') },
file(process.out.snptable_uncertainty[0][1]).readLines().any { it.contains('Position') },
file(process.out.structure_genotypes[0][1]).readLines().any { it.contains('-1') },
file(process.out.structure_genotypes_nomissing[0][1]).readLines().any { it.contains('-1') }).match()
}
)
}

}

test("sarscov2 - vcf - stub") {

options "-stub"

when {
process {
"""
input[0] = GUNZIP.out.gunzip.collect{ meta, vcf -> vcf }.map{ vcf -> [[ id: 'testVCF'], vcf]}
input[1] = [ [] ,
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
input[2] = [[],[]]
input[3] = [[],[]]
input[4] = true
input[5] = 30
input[6] = 5
input[7] = 0.8
input[8] = 0.2
input[9] = [[],[]]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
Loading

0 comments on commit edc4ccb

Please sign in to comment.