Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add VEP module #547

Merged
merged 13 commits into from
Jul 6, 2021
30 changes: 30 additions & 0 deletions software/ensemblvep/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
FROM nfcore/base:1.14
LABEL \
author="Maxime Garcia" \
description="VEP image for nf-core pipelines" \
maintainer="[email protected]"

# Install the conda environment
COPY environment.yml /
RUN conda env create -f /environment.yml && conda clean -a

# Add conda installation dir to PATH (instead of doing 'conda activate')
ENV PATH /opt/conda/envs/nf-core-vep-104.3/bin:$PATH

# Setup default ARG variables
ARG GENOME=GRCh38
ARG SPECIES=homo_sapiens
ARG VEP_VERSION=99

# Download Genome
RUN vep_install \
-a c \
-c .vep \
-s ${SPECIES} \
-y ${GENOME} \
--CACHE_VERSION ${VEP_VERSION} \
--CONVERT \
--NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE

# Dump the details of the installed packages to a file for posterity
RUN conda env export --name nf-core-vep-104.3 > nf-core-vep-104.3.yml
27 changes: 27 additions & 0 deletions software/ensemblvep/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env bash
set -euo pipefail

# Build and push all containers

build_push() {
GENOME=$1
SPECIES=$2
VEP_VERSION=$3
VEP_TAG=$4

docker build \
-t nfcore/vep:${VEP_TAG}.${GENOME} \
software/vep/. \
--build-arg GENOME=${GENOME} \
--build-arg SPECIES=${SPECIES} \
--build-arg VEP_VERSION=${VEP_VERSION}

docker push nfcore/vep:${VEP_TAG}.${GENOME}
}

build_push "GRCh37" "homo_sapiens" "104" "104.3"
build_push "GRCh38" "homo_sapiens" "104" "104.3"
build_push "GRCm38" "mus_musculus" "102" "104.3"
build_push "GRCm39" "mus_musculus" "104" "104.3"
build_push "CanFam3.1" "canis_lupus_familiaris" "104" "104.3"
build_push "WBcel235" "caenorhabditis_elegans" "104" "104.3"
10 changes: 10 additions & 0 deletions software/ensemblvep/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# You can use this file to create a conda environment for this module:
# conda env create -f environment.yml
name: nf-core-vep-104.3
channels:
- conda-forge
- bioconda
- defaults

dependencies:
- bioconda::ensembl-vep=104.3
68 changes: 68 additions & 0 deletions software/ensemblvep/functions.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
//
// Utility functions used in nf-core DSL2 module files
//

//
// Extract name of software tool from process name using $task.process
//
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}

//
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
//
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.args3 = args.args3 ?: ''
options.publish_by_meta = args.publish_by_meta ?: []
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
}

//
// Tidy up and join elements of a list to return a path string
//
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
}

//
// Function to save/publish module results
//
def saveFiles(Map args) {
if (!args.filename.endsWith('.version.txt')) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
}
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}
}
62 changes: 62 additions & 0 deletions software/ensemblvep/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName } from './functions'

params.options = [:]
options = initOptions(params.options)
params.use_cache = false
params.vep_tag = ""

process ENSEMBLVEP {
label 'process_medium'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }

conda (params.enable_conda ? "bioconda::ensembl-vep=104.3" : null)
if (params.use_cache) {
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/ensembl-vep:104.3--pl5262h4a94de4_0"
} else {
container "quay.io/biocontainers/ensembl-vep:104.3--pl5262h4a94de4_0"
}
} else {
container "nfcore/vep:${params.vep_tag}"
}

input:
tuple val(meta), path(vcf)
val genome
val species
val cache_version
path cache

output:
tuple val(meta), path("*.ann.vcf"), emit: vcf
path "*.summary.html", emit: report
path "*.version.txt", emit: version

script:
def software = getSoftwareName(task.process)
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
dir_cache = params.use_cache ? "\${PWD}/${cache}" : "/.vep"
"""
mkdir $prefix

vep \\
-i $vcf \\
-o ${prefix}.ann.vcf \\
$options.args \\
--assembly $genome \\
--species $species \\
--cache \\
--cache_version $cache_version \\
--dir_cache $dir_cache \\
--fork $task.cpus \\
--format vcf \\
--stats_file ${prefix}.summary.html

rm -rf $prefix

echo \$(vep --help 2>&1) > ${software}.version.txt
"""
}
64 changes: 64 additions & 0 deletions software/ensemblvep/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
name: ENSEMBLVEP
description: Ensembl Variant Effect Predictor (VEP)
keywords:
- annotation
tools:
- ensemblvep:
description: |
VEP determines the effect of your variants (SNPs, insertions, deletions, CNVs
or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions.
homepage: https://www.ensembl.org/info/docs/tools/vep/index.html
documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html
params:
- use_cache:
type: boolean
description: |
Enable the usage of containers with cache
Does not work with conda
- vep_tag:
type: value
description: |
Specify the tag for the container
https://hub.docker.com/r/nfcore/vep/tags
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
description: |
vcf to annotate
- vep_genome:
type: value
description: |
which genome to annotate with
- vep_species:
type: value
description: |
which species to annotate with
- vep_cache_version:
type: value
description: |
which version of the cache to annotate with
- vep_cache:
type: file
description: |
path to VEP cache (optional)
output:
- vcf_ann:
type: file
description: |
annotated vcf
pattern: "*.ann.vcf"
- report:
type: file
description: VEP report file
pattern: "*.html"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@maxulysse"
4 changes: 4 additions & 0 deletions tests/config/pytest_software.yml
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,10 @@ dshbio/splitgff3:
- software/dshbio/splitgff3/**
- tests/software/dshbio/splitgff3/**

ensemblvep:
- software/ensemblvep/**
- tests/software/ensemblvep/**

fastp:
- software/fastp/**
- tests/software/fastp/**
Expand Down
12 changes: 12 additions & 0 deletions tests/software/ensemblvep/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { ENSEMBLVEP } from '../../../software/ensemblvep/main.nf' addParams( vep_tag: '104.3.WBcel235', use_cache: false )

workflow test_ensemblvep {
input = [ [ id:'test' ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) ]
]
ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [] )
}
7 changes: 7 additions & 0 deletions tests/software/ensemblvep/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
- name: ensemblvep test_ensemblvep
command: nextflow run tests/software/ensemblvep -entry test_ensemblvep -c tests/config/nextflow.config
tags:
- ensemblvep
files:
- path: output/ensemblvep/test.ann.vcf
- path: output/ensemblvep/test.summary.html