data/config.cfg

## Config file of iseq. Comments lines start with. I suggested you just copy and modify this file ##
[tools_path]
# Java Environment, download from http://www.oracle.com/technetwork/java/javase/downloads/,
# Mutect need use 1.7, gatk(>=3.7) use 1.8
java=/path/jdk1.8.0_101/bin/java
jdk_17=/path/jdk1.7.0_79
jdk_18=/path/jdk1.8.0_101

# Perl Environment, download from https://www.perl.org/get.html
perl=/path/perl

# R Environment, download from https://www.r-project.org/
R=/path/R
Rscript=/path/Rscript

# GATK, Genome Analysis Toolkit,
# download from https://software.broadinstitute.org/gatk/ or
# use BioInstaller::install.bioinfo('gatk', '/path/gatk')
gatk=/home/ljf/opt/caller/gatk/gatk3.7/GenomeAnalysisTK.jar

# Torrent caller, a ion torrent platform variant caller,
# download from http://updates.iontorrent.com/tvc_standalone/ or
# use BioInstaller::install.bioinfo('tvc', '/path/tvc')
tvc=/path/tvc-5.0.3-CentOS_7.1.1503_x86_64-binary/bin/variant_caller_pipeline.py

# LoFreq, a fast and sensitive variant caller, download from http://csb5.github.io/lofreq/installation/ or
# use BioInstaller::install.bioinfo('lofreq', '/path/lofreq')
lofreq=/path/lofreq_star-2.1.2/bin/lofreq

# LoFreq, a fast and sensitive variant caller, download from http://csb5.github.io/lofreq/installation/ or
# use BioInstaller::install.bioinfo('pindel', '/path/pindel')
pindel_dir=/path/pindel/

# Bwa, a software package for mapping low-divergent sequences against a large reference genome,
# download from http://bio-bwa.sourceforge.net/ or
# use BioInstaller::install.bioinfo('bwa', '/path/bwa')
bwa=/path/bwa

# STAR, a RNA-seq aligner,
# download from https://github.com/alexdobin/STAR or
# use BioInstaller::install.bioinfo('star', '/path/star')
star=/opt/bin/STAR/bin/Linux_x86_64/STAR

# Bowtie, an ultrafast, memory-efficient short read aligner,
# download from http://bowtie-bio.sourceforge.net/index.shtml or
# conda install bowtie or
# use BioInstaller::install.bioinfo('bowtie', '/path/bowtie')
bowtie=/path/bowtie/bowtie1.1.2/bowtie
bowtie_build=/path/bowtie/bowtie1.1.2/bowtie-build
bowtie2=/path/bowtie/bowtie2-2.2.6/bowtie2
bowtie2build=/path/bowtie/bowtie2-2.2.6/bowtie2-build

# Tophat, a fast splice junction mapper for RNA-Seq reads,
# download from http://ccb.jhu.edu/software/tophat/index.shtml or
# conda install tophat or
# use BioInstaller::install.bioinfo('tophat', '/path/tophat')
tophat=/path/tophat-2.1.0.Linux_x86_64/tophat

# TMAP, torrent mapping alignment program, download from https://github.com/iontorrent/TS/tree/master/Analysis/TMAP or
# use BioInstaller::install.bioinfo('tmap', '/path/tmap')
tmap=/path/TMAP/tmap

# samtools, tools (written in C using htslib) for manipulating next-generation sequencing data,
# download from https://github.com/samtools/samtools or
# use BioInstaller::install.bioinfo('samtools', '/path/samtools')
samtools=/opt/bin/samtools/samtools

# Picard, A set of command line tools (in Java) for manipulating
# high-throughput sequencing (HTS) data and formats such as SAM/BAM/CRAM and VCF,
# download from https://github.com/broadinstitute/picard or
# use BioInstaller::install.bioinfo('picard', '/path/picard')
picard=/path/picard/build/libs/picard.jar

# Mutect, a method developed at the Broad Institute for the reliable and
# accurate identification of somatic point mutations in next generation sequencing data of cancer genomes,
#download from http://archive.broadinstitute.org/cancer/cga/mutect or
# use BioInstaller::install.bioinfo('mutect', '/path/Mutect')
mutect=/path/muTect-1.1.7.jar

# VarScan, a tool that detects variants (SNPs and indels) in next-generation sequencing data,
# download from http://massgenomics.org/varscan or
# use BioInstaller::install.bioinfo('varscan2', '/path/VarScan')
varscan=/path/VarScan.v2.4.3.jar

# freebayes, bayesian haplotype-based genetic polymorphism discovery and genotyping,
# download from https://github.com/ekg/freebayes or
# conda install freebayes or
# use BioInstaller::install.bioinfo('VarScan', '/path/VarScan')
freebayes=/opt/bin/freebayes/bin/freebayes

# bcftools, containing all the vcf* commands which previously lived in the htslib repository,
# download from https://github.com/samtools/bcftools or
# conda install bcftools or
# use BioInstaller::install.bioinfo('bcftools', '/path/bcftools')
bcftools=/path/bcftools

# tabix, part of htslib, fast retrieval of sequence features from generic TAB-delimited files
# conda install htslib or
# use BioInstaller::install.bioinfo('htslib', '/path/htslib')
tabix=/path/htslib/tabix

# ANNOVAR, an efficient software tool to utilize update-to-date information to
# functionally annotate genetic variants detected from diverse genomes,
# download from http://annovar.openbioinformatics.org/en/latest/ or
# use BioInstaller::install.bioinfo('annovar', '/path/annovar_dir')
annovar_dir=/path/annovar/

# vcftools, a program package designed for working with VCF files,
# such as those generated by the 1000 Genomes Project,
# download from https://github.com/vcftools/vcftools or
# conda install vcftools or
# use BioInstaller::install.bioinfo('vcftools', '/path/vcftools')
vcftools=/path/vcftools/bin/vcftools

# fastqc, a quality control tool for high throughput sequence data,
# download from http://www.bioinformatics.babraham.ac.uk/projects/fastqc/ or
# use BioInstaller::install.bioinfo('fastqc', '/path/fastqc')
fastqc=/path/fastqc/fastqc

# prinseq,
# download from http://edwards.sdsu.edu/cgi-bin/prinseq/prinseq.cgi or
# use BioInstaller::install.bioinfo('prinseq', '/path/prinseq')
prinseq=/path/prinseq-lite-0.20.4/

[extra_files]
# reference genome, such as hg19.fa, mm10.fa
reffa=/path/hg19.fa

# intervals is a bed format file to be analysis
intervals=/path/cds_final_hg19.bed

# cosmic is a VCF format file that be download from http://archive.broadinstitute.org/cancer/cga/mutect
cosmic=/path/b37_cosmic_v54_120711_chr.vcf

# known_transcript is a GTF format file indicating known transcript
known_transcript=/u1/sd/annotation/refseq_hg19_07292013.gtf

# known_sites_vcf is GATK bundle files
known_sites_vcf=/path/reference/broad/dbsnp_138.hg19.vcf:/path/reference/broad/Mills_and_1000G_gold_standard.indels.hg19.sites.vcf

# tvc_params_json is the tvc JSON format parameters file
tvc_params_json=/opt/bin/tvc-5.0.3-CentOS_7.1.1503_x86_64-binary/share/TVC/pluginMedia/parameter_sets/ocp_somatic_lowstringency_pgm_parameters.json

# lofreq_dbsnp and dbnsnp file is the common SNP file (lofreq_dbsnp is gzip format)
lofreq_dbsnp=/u6/home/ljf/db/dbsnp_147.hg19.common.vcf.gz
dbsnp=/u6/home/ljf/db/dbsnp_147.hg19.common.vcf

[preprocess]
# genome_indexer can be used to control the genomeindex mode behaviour
# supported: bwa, star, bowtie, bowtie2, tmap
# only exists in mapper be run
genome_indexer=bwa,star,bowtie,bowtie2

# mapper can be used to control the mapping softwares using in analysis
mapper=bwa

[variantcaller]
# caller can be used to control the mutation calling softwares using in analysis
caller=tvc,varscan,mutect,HaplotypeCaller,UnifiedGenotyper,lofreq

# *_filtration can be used to control the filter function for caller
unifiedgenotyper_filtration=common_filter
lofreq_filtration=
varscan_filtration=
haplotypecaller_filtration=common_filter
mutect_filtration=

[reffa_class_sets]
# reffa_class_sets can be used to control the class ReffaFile method
reffafile_generate_dict_extra=
reffafile_tmap_index_extra=
reffafile_bowtie2_index_extra=
reffafile_bowtie_index_extra=
reffafile_star_index_pass1_extra=
reffafile_star_index_pass2_extra=
reffafile_bwa_index_extra=

reffafile_bwa_index_thread=30
reffafile_star_index_thread=30
reffafile_bowtie_index_thread=30
reffafile_bowtie2_index_thread=30
reffafile_tophat_index_thread=30

[fastq_class_sets]
# fastq_class_sets can be used to control the class FastqFile method
fastqfile_bwa_mapping_extra=
fastqfile_star_mapping_extra=
fastqfile_bowtie2_mapping_extra=--chunkmbs 2000  -n 2 -l 28 -e 70
fastqfile_bowtie_mapping_extra=
fastqfile_tophat_mapping_extra=
fastqfile_tmap_mapping_extra=-v stage1 map1 map2 map3
fastqfile_fastqc_extra=

fastqfile_bwa_mapping_thread=30
fastqfile_star_mapping_thread=30
fastqfile_bowtie2_mapping_thread=30
fastqfile_bowtie_mapping_thread=30
fastqfile_tophat_mapping_thread=30
fastqfile_tmap_mapping_thread=30

[bam_class_sets]
# bam_class_sets can be used to control the class BamFile method
bamfile_mpileup_extra=-q 1
bamfile_index_extra=
bamfile_sort_extra=
bamfile_RGID=1
bamfile_RGLB=Jhuanglab
bamfile_RGPL=ILLUMINA
bamfile_RGPU=Hiseq
bamfile_split_ntrim_extra=-RMQF 255 -RMQT 60 -rf ReassignOneMappingQuality -U ALLOW_N_CIGAR_READS
bamfile_contig_reorder_extra=
bamfile_add_read_group_extra=
bamfile_mark_duplicates_extra=
bamfile_recalibration_extra=
bamfile_indel_realigner_extra=
bamfile_realigner_target_creator_extra=
bamfile_print_reads_extra=
bamfile_haplotype_caller_extra_rna=--unsafe -stand_call_conf 20.0 -stand_emit_conf 20.0 -dontUseSoftClippedBases
bamfile_haplotype_caller_extra_dna=--unsafe
bamfile_unifiedgenotyper_caller_extra=-dcov 1000 --unsafe -glm BOTH
bamfile_mutect_caller_extra=--unsafe
bamfile_varscan_caller_extra_somatic=
bamfile_varscan_caller_extra_germline=--min-var-freq 0.01
bamfile_torrent_caller_extra=
bamfile_lofreq_caller_extra_germline=
bamfile_lofreq_caller_extra_somatic=
bamfile_freebayes_caller_extra=
bamfile_index_thread=30
bamfile_sort_thread=30
bamfile_unifiedgenotyper_caller_thread=30
bamfile_realigner_target_creator_thread=30
bamfile_torrent_caller_thread=30
bamfile_lofreq_caller_thread=30
bamfile_pindel_caller_thread=30
bamfile_pindel_genome_name=hg19
bamfile_pindel_genome_date=2012
bamfile_pindel_insertsize=400

[vcf_class_sets]
# vcf_class_sets can be used to control the class VcfFile method
vcffile_annovar_extra=-nastring . -csvout

[csv_class_sets]
# csv_class_sets can be used to control the class CsvFile method
csvfile_mpileup_extra=-q 1

[annovar_sets]
annovar_buildver=hg19
annovar_flag=-protocol refGene,cytoBand,genomicSuperDups,esp6500siv2_all,1000g2015aug_all,1000g2015aug_afr,1000g2015aug_eas,1000g2015aug_eur,snp138,avsnp142,avsnp144,avsnp147,ljb26_all,cosmic70,cosmic81 -operation g,r,r,f,f,f,f,f,f,f,f,f,f,f,f
annovar_colnames=Chr,Start,End,Ref,Alt,Func.refGene,Gene.refGene,GeneDetail.refGene,ExonicFunc.refGene,AAChange.refGene,cytoBand,snp138,avsnp142,avsnp144,avsnp147,SIFT_score,SIFT_pred,Polyphen2_HDIV_score,Polyphen2_HDIV_pred,cosmic70,cosmic81

[others]
# gatk tmp dir
gatk_tmp_dir=~/tmp/gatk_tmp

# tvc tmp dir
tvc_tmp_dir=~/tmp/tvc_tmp

# java_max_mem can be used control the java max memory be used
java_max_mem=128g

# freq_exon_only can be used to control the mutation stat step
# 1, only stat exon region mutation sites
# 0, stat all sites
freq_exon_only=1