From ea4f2a2bae51d8546fb2ed466ec469213839553c Mon Sep 17 00:00:00 2001
From: BIOPZ-Katsantoni Maria <maria.katsantoni@unibas.ch>
Date: Tue, 10 Jan 2023 11:16:22 +0100
Subject: [PATCH 1/4] feat: add single-strand preprocessing subworkflow

---
 workflow/rules/rcrunch_preprocessing_se.smk | 208 ++++++++++++++++++++
 1 file changed, 208 insertions(+)
 create mode 100644 workflow/rules/rcrunch_preprocessing_se.smk

diff --git a/workflow/rules/rcrunch_preprocessing_se.smk b/workflow/rules/rcrunch_preprocessing_se.smk
new file mode 100644
index 0000000..01504fb
--- /dev/null
+++ b/workflow/rules/rcrunch_preprocessing_se.smk
@@ -0,0 +1,208 @@
+# ________________________________________________________________________________
+# Preprocessing subworkflow for paired-end data. First step of analysis
+# ________________________________________________________________________________
+import os
+rule create_index:
+    """
+        Create an index of the human genome
+    """
+    input:
+        genome = config["genome"],
+        gtf = config["gtf"]
+
+    output:
+        chromosome_info = os.path.join(
+            config["output_dir"],
+            "STAR_index",
+            "chrNameLength.txt"),
+
+        chromosomes_names = os.path.join(
+            config["output_dir"],
+            "STAR_index",
+            "chrName.txt")
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        output_dir = os.path.join(
+            config["output_dir"],
+            "STAR_index"),
+        outFileNamePrefix = os.path.join(
+            config["output_dir"],
+            "STAR_index/STAR_"),
+        sjdbOverhang = config["sjdbOverhang"]
+
+    singularity:
+        "docker://zavolab/star:2.6.0a"
+
+    threads: 12
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "preprocessing",
+            "STAR_index.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "preprocessing",
+            "STAR_index.stderr.log"
+            ),
+
+    shell:
+        "(mkdir -p {params.output_dir}; \
+        chmod -R 777 {params.output_dir}; \
+        STAR \
+        --runMode genomeGenerate \
+        --sjdbOverhang {params.sjdbOverhang} \
+        --genomeDir {params.output_dir} \
+        --genomeFastaFiles {input.genome} \
+        --runThreadN {threads} \
+        --outFileNamePrefix {params.outFileNamePrefix} \
+        --sjdbGTFfile {input.gtf}) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule create_csv_from_ensembl_gtf:
+    """
+        Create a csv based on the gtf file of ensembl
+    """
+    input:
+        gtf = config["gtf"]
+
+    output:
+        output = os.path.join(
+            config["output_dir"],
+            "ensembl_csv",
+            "ensembl.csv"),
+
+        flag = os.path.join(
+            config["output_dir"],
+            "ensembl_csv",
+            "done"),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_ensembl__gtf_to_csv.py"
+            ),
+
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0"
+
+    threads: 8
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "preprocessing",
+            "ensembl_csv.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "preprocessing",
+            "ensembl_csv.stderr.log"
+            ),
+
+    shell:
+        "(python {params.script} \
+        --gtf_file {input.gtf} \
+        --flag {output.flag} \
+        --out_file {output.output}) 1> {log.stdout} 2> {log.stderr}"
+
+rule cutadapt:
+    """
+        Trim 3' and 5' adapters
+    """
+    input:
+        reads = lambda wildcards:
+            os.path.join(
+                config["input_dir"],
+                config[wildcards.sample]['mate1'] + '.fastq.gz'),
+        mate1_3p = lambda wildcards: config[wildcards.sample]['mate1_3p'],
+        mate1_5p = lambda wildcards: config[wildcards.sample]['mate1_5p'],
+    output:
+        reads = os.path.join(
+            config["output_dir"],
+            "cutadapt_replicates",
+            "{sample}.cutadapt_standard.fastq.gz")
+
+    params:
+        cluster_log_path = config["cluster_log"],
+
+    singularity:
+        "docker://zavolab/cutadapt:1.16"
+
+    threads: 8
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "preprocessing",
+            "cutadapt__{sample}.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "preprocessing",
+            "cutadapt__{sample}.stderr.log"
+            )
+
+    shell:
+        "cutadapt \
+        -e 0.1 \
+        -O 1 \
+        -j {threads} \
+        -m 2 \
+        -n 1 \
+        -a file:{input.mate1_3p} \
+        -g file:{input.mate1_5p} \
+        -o {output.reads} \
+        {input.reads}) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule umi_tools_format:
+    """
+        Create a UMI-tools barcode collapse format for any ENCODE samples.
+    """
+    input:
+        reads = os.path.join(
+            config["output_dir"],
+            "cutadapt_replicates2",
+            "{sample}.cutadapt_standard.fastq.gz")
+
+    output:
+        reads = os.path.join(
+            config["output_dir"],
+            "cutadapt_replicates2",
+            "{sample}.cutadapt_encode.fastq.gz")
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_convert_fastq_to_umi_format.py"
+            ),
+        read_format = lambda wildcards:
+            config[wildcards.sample]['format']
+        
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "preprocessing",
+            "umi_format__{sample}.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "preprocessing",
+            "umi_format__{sample}.stderr.log"
+            ),
+    shell:
+        "(python {params.script} \
+        --infile {input.reads} \
+        --outfile {output.reads} \
+        --read_format {params.read_format};) 1> {log.stdout} 2> {log.stderr}"

From 93cfe62b1189d9fc2d7a86d4d216f533f5281716 Mon Sep 17 00:00:00 2001
From: BIOPZ-Katsantoni Maria <maria.katsantoni@unibas.ch>
Date: Fri, 13 Jan 2023 10:37:20 +0100
Subject: [PATCH 2/4] feat: single and paired-end implementation

---
 test/test_singularity_execution/config.yaml  |   18 +-
 workflow/Snakefile                           |   26 +-
 workflow/rules/common.smk                    |   38 +
 workflow/rules/rcrunch_genomic.smk           | 1457 +++++++++
 workflow/rules/rcrunch_genomic_pe.smk        | 1511 +--------
 workflow/rules/rcrunch_genomic_se.smk        |  151 +
 workflow/rules/rcrunch_preprocessing.smk     |  111 +
 workflow/rules/rcrunch_preprocessing_pe.smk  |  136 +-
 workflow/rules/rcrunch_preprocessing_se.smk  |  135 +-
 workflow/rules/rcrunch_transcriptomic.smk    | 2349 ++++++++++++++
 workflow/rules/rcrunch_transcriptomic_pe.smk | 2888 ++----------------
 workflow/rules/rcrunch_transcriptomic_se.smk |  508 +++
 12 files changed, 4966 insertions(+), 4362 deletions(-)
 create mode 100644 workflow/rules/common.smk
 create mode 100644 workflow/rules/rcrunch_genomic.smk
 create mode 100644 workflow/rules/rcrunch_genomic_se.smk
 create mode 100644 workflow/rules/rcrunch_preprocessing.smk
 create mode 100644 workflow/rules/rcrunch_transcriptomic.smk
 create mode 100644 workflow/rules/rcrunch_transcriptomic_se.smk

diff --git a/test/test_singularity_execution/config.yaml b/test/test_singularity_execution/config.yaml
index 0980356..648b1fa 100644
--- a/test/test_singularity_execution/config.yaml
+++ b/test/test_singularity_execution/config.yaml
@@ -29,7 +29,7 @@
 
   # ------------------------------------------
   # Experiment specific info - REQUIRED
-  EXPERIMENT_SET : ["PUM2_K562_ENCSR661ICQ_2"]
+  EXPERIMENT_SET : ["PUM2_K562_ENCSR661ICQ_2_paired_end", "PUM2_K562_ENCSR661ICQ_2_single_end"]
 
   # Experiment specific info - REQUIRED
   # ***Note: an experiment refers to the foreground (replicates) and background (smis) samples
@@ -38,7 +38,7 @@
             # as one sample (This functionality is useful for treating technical replicates as one sample)
             # Make sure that the samples you merge have the same sample features (e.g paired, sense, dup_type)
             # Choose an informative experiment name
-  PUM2_K562_ENCSR661ICQ_2: { 
+  PUM2_K562_ENCSR661ICQ_2_paired_end: { 
                               rbp: "PUM2",
                               replicates: ["ENCFF041KJT_ENCFF462SCV"],
                               smis: ["ENCFF616FCF_ENCFF495ZPY"],
@@ -46,14 +46,23 @@
                               window_b: 300,
                               step_size: 150,
                               background_type: "standard"}
+  PUM2_K562_ENCSR661ICQ_2_single_end: { 
+                              rbp: "PUM2",
+                              replicates: ["ENCFF462SCV"],
+                              smis: ["ENCFF495ZPY"],
+                              window_f: 300,
+                              window_b: 300,
+                              step_size: 150,
+                              background_type: "standard"}
   
   # Sample specific info - REQUIRED
+  ENCFF462SCV: { mate1: "ENCFF462SCV.chr20", paired: 1, sense: 1, format: "encode", dup_type: "umis",  mate1_3p: "../input_files_test/mate2_3p.fasta", mate1_5p: "../input_files_test/mate2_5p.fasta" }
+  ENCFF495ZPY: { mate1: "ENCFF495ZPY.chr20", paired: 1, sense: 1, format: "encode", dup_type: "umis",  mate1_3p: "../input_files_test/mate2_3p.fasta", mate1_5p: "../input_files_test/mate2_5p.fasta" }
   ENCFF041KJT_ENCFF462SCV: { mate1: "ENCFF041KJT.chr20", mate2: "ENCFF462SCV.chr20", paired: 2, sense: 2, format: "encode", dup_type: "umis", mate1_3p: "../input_files_test/mate1_3p.fasta", mate1_5p: "../input_files_test/mate1_5p.fasta", mate2_3p: "../input_files_test/mate2_3p.fasta", mate2_5p: "../input_files_test/mate2_5p.fasta" }
   ENCFF616FCF_ENCFF495ZPY: { mate1: "ENCFF616FCF.chr20", mate2: "ENCFF495ZPY.chr20", paired: 2, sense: 2, format: "encode", dup_type: "umis", mate1_3p: "../input_files_test/mate1_3p.fasta", mate1_5p: "../input_files_test/mate1_5p.fasta", mate2_3p: "../input_files_test/mate2_3p.fasta", mate2_5p: "../input_files_test/mate2_5p.fasta" }
-  
   # ----------------------------------------------------------------------------------------------
   # RCRUNCH specific options - REQUIRED - DEFAULT VALUES SUGGESTED 
-  method_types : ["GN"]         # options: GN, TR  if you want BOTH methods, you can specify ['GN', 'TR']
+  method_types : ["GN", "TR"]         # options: GN, TR  if you want BOTH methods, you can specify ['GN', 'TR']
                                 # This will create both transcriptomic and genomic predictions
   multimappers: 1               # change if you want to accept reads that map to multiple regions 
                                 # (the number indicates the number of regions a read can map to at
@@ -65,7 +74,6 @@
   
 
   ## RCRUNCH specific options - DEFAULTS - Use with care
-  seq_type: "pe"                # paired end - currently only paired-end seq are allowed
   motif_lengths: [6, 8]         # Motif sizes to be considered.
   peak_center : ["crosslink", "peak_center"]   # whether to use  'crosslink' (position within where most reads start)
                                 # or 'peak_center' as the center of the peak
diff --git a/workflow/Snakefile b/workflow/Snakefile
index 7e42f1c..8e61a59 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -46,20 +46,16 @@ rule finish:
             # experiment=config[config["EXPERIMENT_SET"][0]]["replicates"])
 
 
-
-if config['seq_type'] == 'pe':
-    if ('TR' in config['method_types']) & ('GN' in config['method_types']):
-        include: 'rules/rcrunch_preprocessing_pe.smk'
-        include: 'rules/rcrunch_genomic_pe.smk'
-        include: 'rules/rcrunch_transcriptomic_pe.smk'
-        include: 'rules/rcrunch_motif_analysis.smk'
-    elif 'TR' in config['method_types']:
-        include: 'rules/rcrunch_preprocessing_pe.smk'
-        include: 'rules/rcrunch_transcriptomic_pe.smk'
-        include: 'rules/rcrunch_motif_analysis.smk'
-    elif 'GN' in config['method_types']:
-        include: 'rules/rcrunch_preprocessing_pe.smk'
-        include: 'rules/rcrunch_genomic_pe.smk'
-        include: 'rules/rcrunch_motif_analysis.smk'
+include: 'rules/rcrunch_preprocessing_pe.smk'
+include: 'rules/rcrunch_preprocessing_se.smk'
+include: 'rules/rcrunch_preprocessing.smk'
+include: 'rules/rcrunch_genomic_pe.smk'
+include: 'rules/rcrunch_genomic_se.smk'
+include: 'rules/rcrunch_genomic.smk'
+include: 'rules/rcrunch_transcriptomic.smk'
+include: 'rules/rcrunch_transcriptomic_pe.smk'
+include: 'rules/rcrunch_transcriptomic_se.smk'
+include: 'rules/rcrunch_motif_analysis.smk'
+include: 'rules/common.smk'
 
     
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
new file mode 100644
index 0000000..fb1b213
--- /dev/null
+++ b/workflow/rules/common.smk
@@ -0,0 +1,38 @@
+def get_mates(sample):
+    """Get number of mates"""
+    try:
+        if config[sample]['mate2']:
+            return "pe"
+        else:
+            return "se"
+    except KeyError:
+        return "se"
+
+def get_mates_number(sample):
+    """Get library type"""
+    try:
+        if config[sample]['mate2']:
+            return 2
+        else:
+            return 1
+    except KeyError:
+        return 2
+
+def get_library_type(sample, sense):
+    try:
+        if config[sample]['mate2']:
+            if str(int(sense)) == '2':
+                return "ISR"
+            if str(int(sense)) == '1':
+                return "ISF"
+        else:
+            if str(int(sense)) == '2':
+                return "SR"
+            if str(int(sense)) == '1':
+                return "SF"
+    except KeyError:
+        if str(int(sense)) == '2':
+            return "SR"
+        if str(int(sense)) == '1':
+            return "SF"
+        
\ No newline at end of file
diff --git a/workflow/rules/rcrunch_genomic.smk b/workflow/rules/rcrunch_genomic.smk
new file mode 100644
index 0000000..e117c45
--- /dev/null
+++ b/workflow/rules/rcrunch_genomic.smk
@@ -0,0 +1,1457 @@
+# ________________________________________________________________________________
+# Genomic rcrunch for paired end CLIP data
+# ________________________________________________________________________________
+import os
+
+
+rule GN_index_mappings:
+    """
+        Sort and index alignments
+        according to coordinates
+    """
+    input:  
+        bam = lambda wildcards: expand(
+            os.path.join(
+                config["output_dir"],
+                "GN",
+                "alignment",
+                "{sample}",
+                "{sample}.{mates}.bam"),
+            sample=wildcards.sample,
+            mates=get_mates(wildcards.sample)),
+
+        logfile = lambda wildcards: expand(
+            os.path.join(
+                config["output_dir"],
+                "GN",
+                "alignment",
+                "{sample}",
+                "{sample}.{mates}.Log.final.out"),
+            sample=wildcards.sample,
+            mates=get_mates(wildcards.sample)),
+
+    output:
+        bam = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "alignment",
+            "{sample}_sorted.bam"
+            )),
+
+        bai = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "alignment",
+            "{sample}_sorted.bam.bai"
+            )),
+    params:
+        cluster_log_path = config["cluster_log"],
+        prefix = os.path.join(
+            config["output_dir"],
+            "GN",
+            "alignment",
+            "{sample}_temp"
+            ),
+
+    singularity:
+        "docker://zavolab/samtools:1.8"
+
+    threads: 8
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "GN",
+            "{sample}",
+            "sort_mappings.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "GN",
+            "{sample}",
+            "sort_mappings.stderr.log"
+            ),
+
+    shell:
+        "(samtools sort \
+        -T {params.prefix} \
+        -@ {threads} \
+        {input.bam} > {output.bam}; \
+        samtools index \
+        {output.bam} {output.bai} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule GN_flag_ncRNA_reads:
+    """
+        Remove reads mapping to specific
+        nc categories specified in the config
+        - identification step
+    """
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "GN",
+            "alignment",
+            "{sample}_sorted.bam"
+            ),
+        bai = os.path.join(
+            config["output_dir"],
+            "GN",
+            "alignment",
+            "{sample}_sorted.bam.bai"
+            ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_filter_ncRNAs.py"
+            ),
+        ncRNAs = config['ncRNAs'],
+        ncRNA_biotypes = expand(config['ncRNA_biotypes']),
+        paired = lambda wildcards:
+            config[wildcards.sample]['paired'],
+        sense = lambda wildcards:
+            config[wildcards.sample]['sense'],
+        
+    output:
+        outfile = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "remove_ncRNAs",
+            "{sample}.ncrna.txt")),
+        flag = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "remove_ncRNAs",
+            "{sample}_done"
+            )),
+
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "GN",
+            "{sample}",
+            "flag_ncRNA_reads.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "GN",
+            "{sample}",
+            "flag_ncRNA_reads.stderr.log"
+            ),
+
+    shell:
+        "(python {params.script} \
+        --bamfile {input.bam} \
+        --RNA_central {params.ncRNAs} \
+        --outfile {output.outfile} \
+        --paired {params.paired} \
+        --sense {params.sense} \
+        --ncRNAs {params.ncRNA_biotypes} \
+        --flag {output.flag} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule GN_remove_ncRNA_reads:
+    """
+        Removal of the ncreads 
+        - removal step
+    """
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "GN",
+            "alignment",
+            "{sample}_sorted.bam"
+            ),
+        read_names = os.path.join(
+            config["output_dir"],
+            "GN",
+            "remove_ncRNAs",
+            "{sample}.ncrna.txt"
+            ),
+
+    output:
+        bam = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "remove_ncRNAs",
+            "{sample}.filtered.bam"
+            )),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+
+    singularity:
+        "docker://zavolab/picard:2.18.9"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "GN",
+            "{sample}",
+            "remove_ncRNA_reads.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "GN",
+            "{sample}",
+            "remove_ncRNA_reads.stderr.log"
+            ),
+
+    shell:
+        "(java -jar /usr/local/bin/picard.jar FilterSamReads \
+        I={input.bam} \
+        O={output.bam} \
+        READ_LIST_FILE={input.read_names} \
+        FILTER=excludeReadList \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule GN_sort_ncRNA_rm:
+   """
+        Sort and index alignments
+        according to coordinates
+    """
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "GN",
+            "remove_ncRNAs",
+            "{sample}.filtered.bam"
+            ),
+
+    output:
+        bam = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "remove_ncRNAs",
+            "{sample}.filtered.sorted.bam"
+            )),
+        bai = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "remove_ncRNAs",
+            "{sample}.filtered.sorted.bam.bai"
+            )),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        prefix = os.path.join(
+            config["output_dir"],
+            "GN",
+            "remove_ncRNAs",
+            "{sample}_temp"
+            ),
+
+    singularity:
+        "docker://zavolab/samtools:1.8"
+
+    threads: 8
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "GN",
+            "{sample}",
+            "sort_ncrna_rm.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "GN",
+            "{sample}",
+            "sort_ncrna_rm.stderr.log"
+            ),
+
+    shell:
+        "(samtools sort \
+        -T {params.prefix} \
+        -@ {threads} \
+        {input.bam} > {output.bam}; \
+        samtools index \
+        {output.bam} {output.bai} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule GN_flag_duplicates:
+    """
+        Duplicate removal in the absence of UMIs
+        - detection step
+    """
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "GN",
+            "remove_ncRNAs",
+            "{sample}.filtered.sorted.bam"
+            ),
+        bai = os.path.join(
+            config["output_dir"],
+            "GN",
+            "remove_ncRNAs",
+            "{sample}.filtered.sorted.bam.bai"
+            ),
+
+    output:
+        bam = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "flag_duplicates",
+            "{sample}.duplicates.Processed.out.bam"))
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        output_dir = os.path.join(
+            config["output_dir"],
+            "GN",
+            "flag_duplicates"
+            ),
+        outFileNamePrefix = os.path.join(
+            config["output_dir"],
+            "GN",
+            "flag_duplicates",
+            "{sample}.duplicates."
+            ),
+
+    singularity:
+        "docker://zavolab/star:2.6.0a"
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "GN",
+            "{sample}",
+            "flag_duplicates.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "GN",
+            "{sample}",
+            "flag_duplicates.stderr.log"
+            ),
+
+    shell:
+        "(STAR \
+        --inputBAMfile {input.bam} \
+        --bamRemoveDuplicatesType UniqueIdenticalNotMulti \
+        --runMode inputAlignmentsFromBAM \
+        --outFileNamePrefix {params.outFileNamePrefix} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule GN_remove_duplicates:
+    """
+        Duplicate removal in the absence of UMIs
+        - removal step
+    """
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "GN",
+            "flag_duplicates",
+            "{sample}.duplicates.Processed.out.bam"
+            ),
+
+    output:
+        bam = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "remove_duplicates",
+            "{sample}.duplicates.bam"
+            )),
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_filter_duplicates.py"
+            ),
+
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "GN",
+            "{sample}",
+            "remove_duplicates.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "GN",
+            "{sample}",
+            "remove_duplicates.stderr.log"
+            ),
+
+    shell:
+        "(python {params.script} \
+        --bamfile {input.bam} \
+        --outfile {output.bam} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule GN_no_duplicate_removal:
+    """ 
+        No duplicate removal
+    """
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "GN",
+            "remove_ncRNAs",
+            "{sample}.filtered.sorted.bam"
+            ),
+        bai = os.path.join(
+            config["output_dir"],
+            "GN",
+            "remove_ncRNAs",
+            "{sample}.filtered.sorted.bam.bai"
+            ),
+
+    output:
+        bam = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "remove_duplicates",
+            "{sample}.with_duplicates.bam"
+            )),
+        bai = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "remove_duplicates",
+            "{sample}.with_duplicates.bam.bai"
+            )),
+
+    singularity:
+        "docker://bash:5.0.16"
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "GN",
+            "{sample}",
+            "no_duplicate_collapse.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "GN",
+            "{sample}",
+            "no_duplicate_collapse.stderr.log"
+            ),
+
+    shell:
+        "(cp {input.bam} {output.bam}; \
+          cp {input.bai} {output.bai}; \
+          ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule GN_merge:
+    input:
+        bam = lambda wildcards:
+            expand(
+                os.path.join(
+                    config["output_dir"],
+                    "GN",
+                    "remove_duplicates",
+                    "{sample}.{dup_type}.{mates}.bam"),
+                sample=config[wildcards.experiment][wildcards.name],
+                dup_type=config[config[wildcards.experiment][wildcards.name][0]]['dup_type'],
+                mates=get_mates(config[wildcards.experiment][wildcards.name][0]),
+                ),
+    output:
+        bam = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_{name}_merge_samples.bam"
+            )),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+
+    singularity:
+        "docker://zavolab/samtools:1.8"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "GN",
+            "{experiment}",
+            "merge_samples__{name}.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "GN",
+            "{experiment}",
+            "merge_samples__{name}.stderr.log"
+            ),
+
+    shell:
+        "(samtools merge \
+        {output.bam} {input.bam}; \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule GN_sort_merged:
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_{name}_merge_samples.bam"
+            ),
+
+    output:
+        bam = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_{name}_merge_samples.sorted.bam"
+            ),
+        bai = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_{name}_merge_samples.sorted.bam.bai"
+            ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        prefix = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_{name}_merge_samples_temp")
+
+    singularity:
+        "docker://zavolab/samtools:1.8"
+
+    threads: 8
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "GN",
+            "{experiment}",
+            "merge_samples__{name}.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "GN",
+            "{experiment}",
+            "merge_samples__{name}.stderr.log"
+            ),
+
+    shell:
+        "(samtools sort \
+        -T {params.prefix} \
+        -@ {threads} \
+        {input.bam} > {output.bam}; \
+        samtools index \
+        {output.bam} {output.bai} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule GN_read_frequencies:
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_{name}_merge_samples.sorted.bam"
+            ),
+
+    output:
+        frequencies = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_{name}.frequencies.csv"
+            )),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        paired = lambda wildcards:
+            config[config[wildcards.experiment][wildcards.name][0]]['paired'],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_bam_get_read_frequencies.py"
+            ),
+
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "GN",
+            "{experiment}",
+            "read_frequencies__{name}.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "GN",
+            "{experiment}",
+            "read_frequencies__{name}.stderr.log"
+            ),
+
+    shell:
+        "(python {params.script} \
+        --bamfile {input.bam} \
+        --paired {params.paired} \
+        --outfile {output.frequencies}; \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule GN_bam_to_bed:
+    """
+        Convert bamfile to bedfile using bedtools
+    """
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_{name}_merge_samples.sorted.bam"
+            ),
+
+    output:
+        bed = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_{name}_merge_samples.bed"
+            )),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+
+    singularity:
+        "docker://zavolab/bedtools:2.27.0"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "GN",
+            "{experiment}",
+            "bam_to_bed___{name}.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "GN",
+            "{experiment}",
+            "bam_to_bed__{name}.stderr.log"
+            ),
+
+    shell:
+        "(bedtools bamtobed \
+        -i {input.bam} > {output.bed}; \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule GN_genome_coverage:
+    """
+        Obtain windows of read coverage genome
+    """
+    input:
+        chromosome_info = os.path.join(
+            config["output_dir"],
+            "STAR_index",
+            "chrNameLength.txt"
+            ),
+        fg_bed = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_replicates_merge_samples.bed"
+            ),
+        bg_bed = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_smis_merge_samples.bed"
+            ),
+        fg_frequencies = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_replicates.frequencies.csv"
+            ),
+        bg_frequencies = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_smis.frequencies.csv"
+            ),
+
+    output:
+        coverage = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}.gn_coverage.bed"
+            )),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "sliding_windows.py"
+            ),
+        output_dir = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}"),
+        window_f = lambda wildcards:
+            config[wildcards.experiment]["window_f"],
+        window_b = lambda wildcards:
+            config[wildcards.experiment]["window_b"],
+        step_size = lambda wildcards:
+            config[wildcards.experiment]["step_size"],
+        sense_f = lambda wildcards:
+            config[config[wildcards.experiment]["replicates"][0]]['sense'],
+        sense_b = lambda wildcards:
+            config[config[wildcards.experiment]["smis"][0]]['sense'],
+        paired_f = lambda wildcards:
+            config[config[wildcards.experiment]["replicates"][0]]['paired'],
+        paired_b = lambda wildcards:
+            config[config[wildcards.experiment]["smis"][0]]['paired'],
+        prefix = "{experiment}.gn_coverage",
+        background_type = lambda wildcards:
+            config[wildcards.experiment]["background_type"],
+
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    threads: 12
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "GN",
+            "{experiment}",
+            "genome_coverage.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "GN",
+            "{experiment}",
+            "genome_coverage.stderr.log"
+            ),
+
+    shell:
+        "(python {params.script} \
+        --bed_foreground {input.fg_bed} \
+        --bed_background {input.bg_bed} \
+        --foreground_frequencies {input.fg_frequencies} \
+        --background_frequencies {input.bg_frequencies} \
+        --out {params.output_dir} \
+        --prefix {params.prefix} \
+        --chromosomes {input.chromosome_info} \
+        --sense_f {params.sense_f} \
+        --sense_b {params.sense_b} \
+        --paired_f {params.paired_f} \
+        --paired_b {params.paired_b} \
+        --window_f {params.window_f} \
+        --window_b {params.window_b} \
+        --step_size {params.step_size} \
+        --data_type genome \
+        --background {params.background_type} \
+        --cutoff 1 \
+        --threads {threads}; \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule GN_enriched_regions:
+    """
+        Obtain enriched regions for binding based on EM using
+        bg for noise estimation. Modified and adapted from:
+        Berger, Severin M., et al. "Crunch: Integrated processing and modeling of
+        ChIP-seq data in terms of regulatory motifs."
+        Genome research (2019): gr-239319
+        Custom script.
+   """
+    input:
+        reads = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}.gn_coverage.bed"
+            ),
+
+    output:
+        reads = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_enriched_regions.csv"
+            ),
+        parameters = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_parameters.csv"
+            ),
+        all_regions = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_total_regions.csv"
+            ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        plot_dir = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "enriched_regions_plots"),
+        fdr_cutoff = config['FDR'],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_find_enriched_regions.py"
+            ),
+
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "GN",
+            "{experiment}",
+            "enriched_regions.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "GN",
+            "{experiment}",
+            "enriched_regions.stderr.log"
+            ),
+
+    shell:
+        "(mkdir -p {params.plot_dir}; \
+        python {params.script} \
+        --windows_table {input.reads} \
+        --out {output.reads} \
+        --out_parameters {output.parameters} \
+        --all_regions {output.all_regions} \
+        --out_folder {params.plot_dir} \
+        --fdr_cutoff {params.fdr_cutoff} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule GN_fit_peaks:
+    """
+        Obtain peaks
+    """
+    input:
+        fg_bam = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_replicates_merge_samples.sorted.bam"
+            ),
+        bg_bam = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_smis_merge_samples.sorted.bam"
+            ),
+        fg_frequencies = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_replicates.frequencies.csv"
+            ),
+        bg_frequencies = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_smis.frequencies.csv"
+            ),
+        reads = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_enriched_regions.csv"
+            ),
+        parameters = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_parameters.csv"
+            ),
+        chromosomes_g = os.path.join(
+            config["output_dir"],
+            "STAR_index",
+            "chrNameLength.txt"
+            ),
+
+    output:
+        peaks = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_total_peaks.csv"
+            ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_define_peaks.py"
+            ),
+        plot_dir = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "fit_peaks_plots"),
+        fragment_size = config["fragment_size"],
+        window_size = lambda wildcards:
+            config[wildcards.experiment]['window_f'],
+        paired_f = lambda wildcards:
+            config[config[wildcards.experiment]["replicates"][0]]['paired'],
+        paired_b = lambda wildcards:
+            config[config[wildcards.experiment]["smis"][0]]['paired'],
+        sense_f = lambda wildcards:
+            config[config[wildcards.experiment]["replicates"][0]]['sense'],
+        sense_b = lambda wildcards:
+            config[config[wildcards.experiment]["smis"][0]]['sense'],
+        background_type = lambda wildcards:
+            config[wildcards.experiment]["background_type"],        
+
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    threads: 12
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "GN",
+            "{experiment}",
+            "fit_peaks.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "GN",
+            "{experiment}",
+            "fit_peaks.stderr.log"
+            ),
+
+    shell:
+        "(mkdir -p {params.plot_dir}; \
+        python {params.script} \
+        --enriched_regions {input.reads} \
+        --parameters {input.parameters} \
+        --fragment_size {params.fragment_size} \
+        --window_size {params.window_size} \
+        --chromosomes_g {input.chromosomes_g} \
+        --bam_foreground_g {input.fg_bam} \
+        --bam_background_g {input.bg_bam} \
+        --bam_fg_fq {input.fg_frequencies} \
+        --bam_bg_fq {input.bg_frequencies} \
+        --paired_f {params.paired_f} \
+        --paired_b {params.paired_b} \
+        --sense_f {params.sense_f} \
+        --sense_b {params.sense_b} \
+        --background {params.background_type} \
+        --out {output.peaks} \
+        --out_folder {params.plot_dir} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule GN_peaks_split_sets_crosslink:
+    """
+        Split peaks into training and test set
+    """
+    input:
+        genome_fasta = config["genome"],
+        all_regions = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_total_regions.csv"
+            ),
+        reads = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_total_peaks.csv"
+            ),
+        chromosomes_g = os.path.join(
+            config["output_dir"],
+            "STAR_index",
+            "chrNameLength.txt"
+            ),
+
+    output:
+        training = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "motif_analysis_crossvalidation",
+            "{run}",
+            "crosslink",
+            "training.fasta"
+            )),
+        test = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "motif_analysis_crossvalidation",
+            "{run}",
+            "crosslink",
+            "test.fasta"
+            )),
+        training_bg = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "motif_analysis_crossvalidation",
+            "{run}",
+            "crosslink",
+            "training_bg.fasta"
+            )),
+        test_bg = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "motif_analysis_crossvalidation",
+            "{run}",
+            "crosslink",
+            "test_bg.fasta"
+            )),
+        dirtemp = temp(
+            directory(
+                os.path.join(
+                config["output_dir"],
+                "GN",
+                "{experiment}",
+                "motif_analysis_crossvalidation",
+                "{run}",
+                "crosslink",
+                "tmpdir")
+            )
+        ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_peaks_split_sets.py"
+            ),
+        output_dir = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "motif_analysis_crossvalidation",
+            "{run}",
+            "crosslink"
+            ),
+        genome_tag = config["genome_tag"],
+        peak_size = 50,
+        
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "GN",
+            "{experiment}",
+            "motif_analysis_crossvalidation",
+            "peaks_split_sets__{run}_crosslink.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "GN",
+            "{experiment}",
+            "motif_analysis_crossvalidation",
+            "peaks_split_sets__{run}_crosslink.stderr.log"
+            ),
+
+    shell:
+        "(mkdir -p {params.output_dir}; \
+        mkdir -p {output.dirtemp}; \
+        python {params.script} \
+        --peaks_file {input.reads} \
+        --genome_fasta {input.genome_fasta} \
+        --all_regions {input.all_regions} \
+        --genome_tag {params.genome_tag} \
+        --tempdir {output.dirtemp} \
+        --chromosomes_g {input.chromosomes_g} \
+        --peak_size {params.peak_size} \
+        --out_folder {params.output_dir} \
+        --crosslink_type 'crosslink' \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule GN_peaks_split_sets:
+    """
+        Split peaks into training and test set
+    """
+    input:
+        genome_fasta = config["genome"],
+        all_regions = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_total_regions.csv"
+            ),
+        reads = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_total_peaks.csv"
+            ),
+        chromosomes_g = os.path.join(
+            config["output_dir"],
+            "STAR_index",
+            "chrNameLength.txt"
+            ),
+
+    output:
+        training = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "motif_analysis_crossvalidation",
+            "{run}",
+            "peak_center",
+            "training.fasta"
+            )),
+        test = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "motif_analysis_crossvalidation",
+            "{run}",
+            "peak_center",
+            "test.fasta"
+            )),
+        training_bg = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "motif_analysis_crossvalidation",
+            "{run}",
+            "peak_center",
+            "training_bg.fasta"
+            )),
+        test_bg = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "motif_analysis_crossvalidation",
+            "{run}",
+            "peak_center",
+            "test_bg.fasta"
+            )),
+        dirtemp = temp(
+            directory(
+                os.path.join(
+                config["output_dir"],
+                "GN",
+                "{experiment}",
+                "motif_analysis_crossvalidation",
+                "{run}",
+                "peak_center",
+                "tmpdir")
+            )
+        ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_peaks_split_sets.py"
+            ),
+        output_dir = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "motif_analysis_crossvalidation",
+            "{run}",
+            "peak_center"
+            ),
+        genome_tag = config["genome_tag"],
+        peak_size = 50
+        
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "GN",
+            "{experiment}",
+            "motif_analysis_crossvalidation",
+            "peaks_split_sets__{run}_peak_center.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "GN",
+            "{experiment}",
+            "motif_analysis_crossvalidation",
+            "peaks_split_sets__{run}_peak_center.stderr.log"
+            ),
+
+    shell:
+        "(mkdir -p {params.output_dir}; \
+        mkdir -p {output.dirtemp}; \
+        python {params.script} \
+        --peaks_file {input.reads} \
+        --genome_fasta {input.genome_fasta} \
+        --all_regions {input.all_regions} \
+        --genome_tag {params.genome_tag} \
+        --tempdir {output.dirtemp} \
+        --chromosomes_g {input.chromosomes_g} \
+        --peak_size {params.peak_size} \
+        --out_folder {params.output_dir} \
+        --crosslink_type 'peak_center' \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+rule GN_peaks_split_sets_crosslink_all_motifs:
+    """
+        Get all peaks crosslink
+    """
+    input:
+        genome_fasta = config["genome"],
+        all_regions = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_total_regions.csv"
+            ),
+        reads = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_total_peaks.csv"
+            ),
+        chromosomes_g = os.path.join(
+            config["output_dir"],
+            "STAR_index",
+            "chrNameLength.txt"
+            ),
+
+    output:
+        test = temp(
+            os.path.join(
+                config["output_dir"],
+                "GN",
+                "{experiment}",
+                "motif_analysis_final",
+                "crosslink",
+                "test.fasta")
+            ),
+        test_bg = temp(
+            os.path.join(
+                config["output_dir"],
+                "GN",
+                "{experiment}",
+                "motif_analysis_final",
+                "crosslink",
+                "test_bg.fasta")
+            ),
+        dirtemp = temp(
+            directory(
+                os.path.join(
+                config["output_dir"],
+                "GN",
+                "{experiment}",
+                "motif_analysis_final",
+                "crosslink",
+                "tmpdir")
+            )
+        ),
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "total_enrichment_peakstofasta.py"
+            ),
+        output_dir = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "motif_analysis_final",
+            "crosslink",
+            ),
+        genome_tag = config["genome_tag"],
+        peak_size = 50,
+        
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "GN",
+            "{experiment}",
+            "motif_analysis_final",
+            "peaks_split_sets__all_motifs_crosslink.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "GN",
+            "{experiment}",
+            "motif_analysis_final",
+            "peaks_split_sets__all_motifs_crosslink.stderr.log"
+            ),
+
+    shell:
+        "(mkdir -p {params.output_dir}; \
+        mkdir -p {output.dirtemp}; \
+        python {params.script} \
+        --peaks_file {input.reads} \
+        --genome_fasta {input.genome_fasta} \
+        --all_regions {input.all_regions} \
+        --genome_tag {params.genome_tag} \
+        --tempdir {output.dirtemp} \
+        --chromosomes_g {input.chromosomes_g} \
+        --peak_size {params.peak_size} \
+        --out_folder {params.output_dir} \
+        --crosslink_type 'crosslink' \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule GN_peaks_split_sets_all_motifs:
+    """
+        get all peaks crosslink
+    """
+    input:
+        genome_fasta = config["genome"],
+        all_regions = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_total_regions.csv"
+            ),
+        reads = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "{experiment}_total_peaks.csv"
+            ),
+        chromosomes_g = os.path.join(
+            config["output_dir"],
+            "STAR_index",
+            "chrNameLength.txt"
+            ),
+
+    output:
+        test = temp(
+            os.path.join(
+                config["output_dir"],
+                "GN",
+                "{experiment}",
+                "motif_analysis_final",
+                "peak_center",
+                "test.fasta")
+            ),
+        test_bg = temp(
+            os.path.join(
+                config["output_dir"],
+                "GN",
+                "{experiment}",
+                "motif_analysis_final",
+                "peak_center",
+                "test_bg.fasta")
+            ),
+        dirtemp = temp(
+            directory(
+                os.path.join(
+                config["output_dir"],
+                "GN",
+                "{experiment}",
+                "motif_analysis_final",
+                "peak_center",
+                "tmpdir")
+            )
+        ),
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "total_enrichment_peakstofasta.py"
+            ),
+        output_dir = os.path.join(
+            config["output_dir"],
+            "GN",
+            "{experiment}",
+            "motif_analysis_final",
+            "peak_center",
+            ),
+        genome_tag = config["genome_tag"],
+        peak_size = 50
+        
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "GN",
+            "{experiment}",
+            "motif_analysis_final",
+            "peaks_split_sets__all_motifs_standard.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "GN",
+            "{experiment}",
+            "motif_analysis_final",
+            "peaks_split_sets__all_motifs_standard.stderr.log"
+            ),
+
+    shell:
+        "(mkdir -p {params.output_dir}; \
+        mkdir -p {output.dirtemp}; \
+        python {params.script} \
+        --peaks_file {input.reads} \
+        --genome_fasta {input.genome_fasta} \
+        --all_regions {input.all_regions} \
+        --genome_tag {params.genome_tag} \
+        --tempdir {output.dirtemp} \
+        --chromosomes_g {input.chromosomes_g} \
+        --peak_size {params.peak_size} \
+        --out_folder {params.output_dir} \
+        --crosslink_type 'peak_center' \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
diff --git a/workflow/rules/rcrunch_genomic_pe.smk b/workflow/rules/rcrunch_genomic_pe.smk
index 97d5040..c9e9985 100644
--- a/workflow/rules/rcrunch_genomic_pe.smk
+++ b/workflow/rules/rcrunch_genomic_pe.smk
@@ -3,7 +3,7 @@
 # ________________________________________________________________________________
 import os
 
-rule GN_map_STAR:
+rule GN_map_STAR_pe:
     """ map reads to genome"""
     input:
         genome = os.path.join(
@@ -16,7 +16,7 @@ rule GN_map_STAR:
                 os.path.join(
                     config["output_dir"],
                     "cutadapt",
-                    "{sample}_mate1.cutadapt_{format}.fastq.gz"),
+                    "{sample}_mate1.pe.cutadapt_{format}.fastq.gz"),
                 sample=wildcards.sample,
                 format=config[wildcards.sample]['format']
                 ),
@@ -25,36 +25,25 @@ rule GN_map_STAR:
                 os.path.join(
                     config["output_dir"],
                     "cutadapt",
-                    "{sample}_mate2.cutadapt_{format}.fastq.gz"),
+                    "{sample}_mate2.pe.cutadapt_{format}.fastq.gz"),
                 sample=wildcards.sample,
                 format=config[wildcards.sample]['format']
                 ),
     
     output:
-        bamfile = temp(os.path.join(
+        bamfile = os.path.join(
             config["output_dir"],
             "GN",
             "alignment",
             "{sample}",
-            "{sample}.bam"
-            )),
-        logfile = temp(os.path.join(
+            "{sample}.pe.bam"
+            ),
+        logfile = os.path.join(
             config["output_dir"],
             "GN",
             "alignment",
             "{sample}",
-            "{sample}Log.final.out"
-            )),
-        outdir = temp(
-            directory(
-                os.path.join(
-                    config["output_dir"],
-                    "GN",
-                    "alignment",
-                    "{sample}")
-                    )
-                ),
-
+            "{sample}.pe.Log.final.out"),
     params:
         cluster_log_path = config["cluster_log"],
         sample_id = "{sample}",
@@ -67,13 +56,15 @@ rule GN_map_STAR:
             "GN",
             "alignment",
             "{sample}",
-            "{sample}",
+            "{sample}.pe.",
             ),
         multimappers = config['multimappers']
     
     singularity:
         "docker://zavolab/star:2.6.0a"
 
+    shadow: "full"
+    
     threads: 12
 
     log:
@@ -81,13 +72,13 @@ rule GN_map_STAR:
             config["local_log"],
             "GN",
             "{sample}",
-            "map_STAR.stdout.log"
+            "map_STAR.pe.stdout.log"
             ),
         stderr = os.path.join(
             config["local_log"],
             "GN",
             "{sample}",
-            "map_STAR.stderr.log"
+            "map_STAR.pe.stderr.log"
             ),
 
     shell:
@@ -114,281 +105,9 @@ rule GN_map_STAR:
         ) 1> {log.stdout} 2> {log.stderr}"
 
 
-rule GN_index_mappings:
-    """
-        Sort and index alignments
-        according to coordinates
-    """
-    input:
-        bam = os.path.join(
-            config["output_dir"],
-            "GN",
-            "alignment",
-            "{sample}",
-            "{sample}.bam"
-            ),
-        indir = os.path.join(
-            config["output_dir"],
-            "GN",
-            "alignment",
-            "{sample}"
-            ),
-
-    output:
-        bam = temp(os.path.join(
-            config["output_dir"],
-            "GN",
-            "alignment",
-            "{sample}_sorted.bam"
-            )),
-
-        bai = temp(os.path.join(
-            config["output_dir"],
-            "GN",
-            "alignment",
-            "{sample}_sorted.bam.bai"
-            )),
-    params:
-        cluster_log_path = config["cluster_log"],
-        prefix = os.path.join(
-            config["output_dir"],
-            "GN",
-            "alignment",
-            "{sample}_temp"
-            ),
-
-    singularity:
-        "docker://zavolab/samtools:1.8"
-
-    threads: 8
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "GN",
-            "{sample}",
-            "sort_mappings.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "GN",
-            "{sample}",
-            "sort_mappings.stderr.log"
-            ),
-
-    shell:
-        "(samtools sort \
-        -T {params.prefix} \
-        -@ {threads} \
-        {input.bam} > {output.bam}; \
-        samtools index \
-        {output.bam} {output.bai} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule GN_flag_ncRNA_reads:
-    """
-        Remove reads mapping to specific
-        nc categories specified in the config
-        - identification step
-    """
-    input:
-        bam = os.path.join(
-            config["output_dir"],
-            "GN",
-            "alignment",
-            "{sample}_sorted.bam"
-            ),
-        bai = os.path.join(
-            config["output_dir"],
-            "GN",
-            "alignment",
-            "{sample}_sorted.bam.bai"
-            ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "mk_filter_ncRNAs.py"
-            ),
-        ncRNAs = config['ncRNAs'],
-        ncRNA_biotypes = expand(config['ncRNA_biotypes']),
-        paired = lambda wildcards:
-            config[wildcards.sample]['paired'],
-        sense = lambda wildcards:
-            config[wildcards.sample]['sense'],
-        
-    output:
-        outfile = temp(os.path.join(
-            config["output_dir"],
-            "GN",
-            "remove_ncRNAs",
-            "{sample}.ncrna.txt")),
-        flag = temp(os.path.join(
-            config["output_dir"],
-            "GN",
-            "remove_ncRNAs",
-            "{sample}_done"
-            )),
-
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "GN",
-            "{sample}",
-            "flag_ncRNA_reads.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "GN",
-            "{sample}",
-            "flag_ncRNA_reads.stderr.log"
-            ),
-
-    shell:
-        "(python {params.script} \
-        --bamfile {input.bam} \
-        --RNA_central {params.ncRNAs} \
-        --outfile {output.outfile} \
-        --paired {params.paired} \
-        --sense {params.sense} \
-        --ncRNAs {params.ncRNA_biotypes} \
-        --flag {output.flag} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule GN_remove_ncRNA_reads:
-    """
-        Removal of the ncreads 
-        - removal step
-    """
-    input:
-        bam = os.path.join(
-            config["output_dir"],
-            "GN",
-            "alignment",
-            "{sample}_sorted.bam"
-            ),
-        read_names = os.path.join(
-            config["output_dir"],
-            "GN",
-            "remove_ncRNAs",
-            "{sample}.ncrna.txt"
-            ),
-
-    output:
-        bam = temp(os.path.join(
-            config["output_dir"],
-            "GN",
-            "remove_ncRNAs",
-            "{sample}.filtered.bam"
-            )),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-
-    singularity:
-        "docker://zavolab/picard:2.18.9"
-
-    threads: 1
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "GN",
-            "{sample}",
-            "remove_ncRNA_reads.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "GN",
-            "{sample}",
-            "remove_ncRNA_reads.stderr.log"
-            ),
-
-    shell:
-        "(java -jar /usr/local/bin/picard.jar FilterSamReads \
-        I={input.bam} \
-        O={output.bam} \
-        READ_LIST_FILE={input.read_names} \
-        FILTER=excludeReadList \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule GN_sort_ncRNA_rm:
-   """
-        Sort and index alignments
-        according to coordinates
-    """
-    input:
-        bam = os.path.join(
-            config["output_dir"],
-            "GN",
-            "remove_ncRNAs",
-            "{sample}.filtered.bam"
-            ),
-
-    output:
-        bam = temp(os.path.join(
-            config["output_dir"],
-            "GN",
-            "remove_ncRNAs",
-            "{sample}.filtered.sorted.bam"
-            )),
-        bai = temp(os.path.join(
-            config["output_dir"],
-            "GN",
-            "remove_ncRNAs",
-            "{sample}.filtered.sorted.bam.bai"
-            )),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        prefix = os.path.join(
-            config["output_dir"],
-            "GN",
-            "remove_ncRNAs",
-            "{sample}_temp"
-            ),
-
-    singularity:
-        "docker://zavolab/samtools:1.8"
-
-    threads: 8
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "GN",
-            "{sample}",
-            "sort_ncrna_rm.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "GN",
-            "{sample}",
-            "sort_ncrna_rm.stderr.log"
-            ),
-
-    shell:
-        "(samtools sort \
-        -T {params.prefix} \
-        -@ {threads} \
-        {input.bam} > {output.bam}; \
-        samtools index \
-        {output.bam} {output.bai} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule GN_flag_duplicates:
+rule GN_umi_collapse_pe:
     """
-        Duplicate removal in the absence of UMIs
-        - detection step
+        Duplicate removal when there are UMIs
     """
     input:
         bam = os.path.join(
@@ -408,1214 +127,40 @@ rule GN_flag_duplicates:
         bam = temp(os.path.join(
             config["output_dir"],
             "GN",
-            "flag_duplicates",
-            "{sample}.duplicates.Processed.out.bam"))
+            "remove_duplicates",
+            "{sample}.umis.pe.bam"
+            )),
 
     params:
         cluster_log_path = config["cluster_log"],
-        output_dir = os.path.join(
-            config["output_dir"],
-            "GN",
-            "flag_duplicates"
-            ),
-        outFileNamePrefix = os.path.join(
-            config["output_dir"],
-            "GN",
-            "flag_duplicates",
-            "{sample}.duplicates."
-            ),
-
-    singularity:
-        "docker://zavolab/star:2.6.0a"
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "GN",
-            "{sample}",
-            "flag_duplicates.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "GN",
-            "{sample}",
-            "flag_duplicates.stderr.log"
-            ),
-
-    shell:
-        "(STAR \
-        --inputBAMfile {input.bam} \
-        --bamRemoveDuplicatesType UniqueIdenticalNotMulti \
-        --runMode inputAlignmentsFromBAM \
-        --outFileNamePrefix {params.outFileNamePrefix} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule GN_remove_duplicates:
-    """
-        Duplicate removal in the absence of UMIs
-        - removal step
-    """
-    input:
-        bam = os.path.join(
-            config["output_dir"],
-            "GN",
-            "flag_duplicates",
-            "{sample}.duplicates.Processed.out.bam"
-            ),
-
-    output:
-        bam = temp(os.path.join(
+        metrics_file = os.path.join(
             config["output_dir"],
             "GN",
             "remove_duplicates",
-            "{sample}.duplicates.bam"
-            )),
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "mk_filter_duplicates.py"
+            "{sample}.umis.pe.metrics"
             ),
 
     singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
+        "docker://zavolab/umi-tools:0.5.4"
 
     log:
         stdout = os.path.join(
             config["local_log"],
             "GN",
             "{sample}",
-            "remove_duplicates.stdout.log"
+            "umi_collapse.pe.stdout.log"
             ),
         stderr = os.path.join(
             config["local_log"],
             "GN",
             "{sample}",
-            "remove_duplicates.stderr.log"
+            "umi_collapse.pe.stderr.log"
             ),
 
     shell:
-        "(python {params.script} \
-        --bamfile {input.bam} \
-        --outfile {output.bam} \
+        "(umi_tools \
+        dedup \
+        -I {input.bam} \
+        --paired \
+        -S {output.bam} \
         ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule GN_no_duplicate_removal:
-    """ 
-        No duplicate removal
-    """
-    input:
-        bam = os.path.join(
-            config["output_dir"],
-            "GN",
-            "remove_ncRNAs",
-            "{sample}.filtered.sorted.bam"
-            ),
-        bai = os.path.join(
-            config["output_dir"],
-            "GN",
-            "remove_ncRNAs",
-            "{sample}.filtered.sorted.bam.bai"
-            ),
-
-    output:
-        bam = temp(os.path.join(
-            config["output_dir"],
-            "GN",
-            "remove_duplicates",
-            "{sample}.with_duplicates.bam"
-            )),
-        bai = temp(os.path.join(
-            config["output_dir"],
-            "GN",
-            "remove_duplicates",
-            "{sample}.with_duplicates.bam.bai"
-            )),
-
-    singularity:
-        "docker://bash:5.0.16"
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "GN",
-            "{sample}",
-            "no_duplicate_collapse.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "GN",
-            "{sample}",
-            "no_duplicate_collapse.stderr.log"
-            ),
-
-    shell:
-        "(cp {input.bam} {output.bam}; \
-          cp {input.bai} {output.bai}; \
-          ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule GN_umi_collapse:
-    """
-        Duplicate removal when there are UMIs
-    """
-    input:
-        bam = os.path.join(
-            config["output_dir"],
-            "GN",
-            "remove_ncRNAs",
-            "{sample}.filtered.sorted.bam"
-            ),
-        bai = os.path.join(
-            config["output_dir"],
-            "GN",
-            "remove_ncRNAs",
-            "{sample}.filtered.sorted.bam.bai"
-            ),
-
-    output:
-        bam = temp(os.path.join(
-            config["output_dir"],
-            "GN",
-            "remove_duplicates",
-            "{sample}.umis.bam"
-            )),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        metrics_file = os.path.join(
-            config["output_dir"],
-            "GN",
-            "remove_duplicates",
-            "{sample}.umis.metrics"
-            ),
-
-    singularity:
-        "docker://zavolab/umi-tools:0.5.4"
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "GN",
-            "{sample}",
-            "umi_collapse.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "GN",
-            "{sample}",
-            "umi_collapse.stderr.log"
-            ),
-
-    shell:
-        "(umi_tools \
-        dedup \
-        -I {input.bam} \
-        --paired \
-        -S {output.bam} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule GN_merge:
-    input:
-        bam = lambda wildcards:
-            expand(
-                os.path.join(
-                    config["output_dir"],
-                    "GN",
-                    "remove_duplicates",
-                    "{sample}.{dup_type}.bam"),
-                sample=config[wildcards.experiment][wildcards.name],
-                dup_type=config[config[wildcards.experiment][wildcards.name][0]]['dup_type'],
-                ),
-    output:
-        bam = temp(os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_{name}_merge_samples.bam"
-            )),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-
-    singularity:
-        "docker://zavolab/samtools:1.8"
-
-    threads: 1
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "GN",
-            "{experiment}",
-            "merge_samples__{name}.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "GN",
-            "{experiment}",
-            "merge_samples__{name}.stderr.log"
-            ),
-
-    shell:
-        "(samtools merge \
-        {output.bam} {input.bam}; \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule GN_sort_merged:
-    input:
-        bam = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_{name}_merge_samples.bam"
-            ),
-
-    output:
-        bam = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_{name}_merge_samples.sorted.bam"
-            ),
-        bai = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_{name}_merge_samples.sorted.bam.bai"
-            ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        prefix = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_{name}_merge_samples_temp")
-
-    singularity:
-        "docker://zavolab/samtools:1.8"
-
-    threads: 8
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "GN",
-            "{experiment}",
-            "merge_samples__{name}.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "GN",
-            "{experiment}",
-            "merge_samples__{name}.stderr.log"
-            ),
-
-    shell:
-        "(samtools sort \
-        -T {params.prefix} \
-        -@ {threads} \
-        {input.bam} > {output.bam}; \
-        samtools index \
-        {output.bam} {output.bai} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule GN_read_frequencies:
-    input:
-        bam = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_{name}_merge_samples.sorted.bam"
-            ),
-
-    output:
-        frequencies = temp(os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_{name}.frequencies.csv"
-            )),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        paired = lambda wildcards:
-            config[config[wildcards.experiment][wildcards.name][0]]['paired'],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "mk_bam_get_read_frequencies.py"
-            ),
-
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    threads: 1
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "GN",
-            "{experiment}",
-            "read_frequencies__{name}.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "GN",
-            "{experiment}",
-            "read_frequencies__{name}.stderr.log"
-            ),
-
-    shell:
-        "(python {params.script} \
-        --bamfile {input.bam} \
-        --paired {params.paired} \
-        --outfile {output.frequencies}; \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule GN_bam_to_bed:
-    """
-        Convert bamfile to bedfile using bedtools
-    """
-    input:
-        bam = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_{name}_merge_samples.sorted.bam"
-            ),
-
-    output:
-        bed = temp(os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_{name}_merge_samples.bed"
-            )),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-
-    singularity:
-        "docker://zavolab/bedtools:2.27.0"
-
-    threads: 1
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "GN",
-            "{experiment}",
-            "bam_to_bed___{name}.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "GN",
-            "{experiment}",
-            "bam_to_bed__{name}.stderr.log"
-            ),
-
-    shell:
-        "(bedtools bamtobed \
-        -i {input.bam} > {output.bed}; \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule GN_genome_coverage:
-    """
-        Obtain windows of read coverage genome
-    """
-    input:
-        chromosome_info = os.path.join(
-            config["output_dir"],
-            "STAR_index",
-            "chrNameLength.txt"
-            ),
-        fg_bed = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_replicates_merge_samples.bed"
-            ),
-        bg_bed = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_smis_merge_samples.bed"
-            ),
-        fg_frequencies = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_replicates.frequencies.csv"
-            ),
-        bg_frequencies = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_smis.frequencies.csv"
-            ),
-
-    output:
-        coverage = temp(os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}.gn_coverage.bed"
-            )),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "sliding_windows.py"
-            ),
-        output_dir = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}"),
-        window_f = lambda wildcards:
-            config[wildcards.experiment]["window_f"],
-        window_b = lambda wildcards:
-            config[wildcards.experiment]["window_b"],
-        step_size = lambda wildcards:
-            config[wildcards.experiment]["step_size"],
-        sense_f = lambda wildcards:
-            config[config[wildcards.experiment]["replicates"][0]]['sense'],
-        sense_b = lambda wildcards:
-            config[config[wildcards.experiment]["smis"][0]]['sense'],
-        paired_f = lambda wildcards:
-            config[config[wildcards.experiment]["replicates"][0]]['paired'],
-        paired_b = lambda wildcards:
-            config[config[wildcards.experiment]["smis"][0]]['paired'],
-        prefix = "{experiment}.gn_coverage",
-        background_type = lambda wildcards:
-            config[wildcards.experiment]["background_type"],
-
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    threads: 12
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "GN",
-            "{experiment}",
-            "genome_coverage.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "GN",
-            "{experiment}",
-            "genome_coverage.stderr.log"
-            ),
-
-    shell:
-        "(python {params.script} \
-        --bed_foreground {input.fg_bed} \
-        --bed_background {input.bg_bed} \
-        --foreground_frequencies {input.fg_frequencies} \
-        --background_frequencies {input.bg_frequencies} \
-        --out {params.output_dir} \
-        --prefix {params.prefix} \
-        --chromosomes {input.chromosome_info} \
-        --sense_f {params.sense_f} \
-        --sense_b {params.sense_b} \
-        --paired_f {params.paired_f} \
-        --paired_b {params.paired_b} \
-        --window_f {params.window_f} \
-        --window_b {params.window_b} \
-        --step_size {params.step_size} \
-        --data_type genome \
-        --background {params.background_type} \
-        --cutoff 1 \
-        --threads {threads}; \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule GN_enriched_regions:
-    """
-        Obtain enriched regions for binding based on EM using
-        bg for noise estimation. Modified and adapted from:
-        Berger, Severin M., et al. "Crunch: Integrated processing and modeling of
-        ChIP-seq data in terms of regulatory motifs."
-        Genome research (2019): gr-239319
-        Custom script.
-   """
-    input:
-        reads = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}.gn_coverage.bed"
-            ),
-
-    output:
-        reads = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_enriched_regions.csv"
-            ),
-        parameters = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_parameters.csv"
-            ),
-        all_regions = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_total_regions.csv"
-            ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        plot_dir = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "enriched_regions_plots"),
-        fdr_cutoff = config['FDR'],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "mk_find_enriched_regions.py"
-            ),
-
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    threads: 1
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "GN",
-            "{experiment}",
-            "enriched_regions.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "GN",
-            "{experiment}",
-            "enriched_regions.stderr.log"
-            ),
-
-    shell:
-        "(mkdir -p {params.plot_dir}; \
-        python {params.script} \
-        --windows_table {input.reads} \
-        --out {output.reads} \
-        --out_parameters {output.parameters} \
-        --all_regions {output.all_regions} \
-        --out_folder {params.plot_dir} \
-        --fdr_cutoff {params.fdr_cutoff} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule GN_fit_peaks:
-    """
-        Obtain peaks
-    """
-    input:
-        fg_bam = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_replicates_merge_samples.sorted.bam"
-            ),
-        bg_bam = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_smis_merge_samples.sorted.bam"
-            ),
-        fg_frequencies = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_replicates.frequencies.csv"
-            ),
-        bg_frequencies = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_smis.frequencies.csv"
-            ),
-        reads = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_enriched_regions.csv"
-            ),
-        parameters = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_parameters.csv"
-            ),
-        chromosomes_g = os.path.join(
-            config["output_dir"],
-            "STAR_index",
-            "chrNameLength.txt"
-            ),
-
-    output:
-        peaks = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_total_peaks.csv"
-            ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "mk_define_peaks.py"
-            ),
-        plot_dir = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "fit_peaks_plots"),
-        fragment_size = config["fragment_size"],
-        window_size = lambda wildcards:
-            config[wildcards.experiment]['window_f'],
-        paired_f = lambda wildcards:
-            config[config[wildcards.experiment]["replicates"][0]]['paired'],
-        paired_b = lambda wildcards:
-            config[config[wildcards.experiment]["smis"][0]]['paired'],
-        sense_f = lambda wildcards:
-            config[config[wildcards.experiment]["replicates"][0]]['sense'],
-        sense_b = lambda wildcards:
-            config[config[wildcards.experiment]["smis"][0]]['sense'],
-        background_type = lambda wildcards:
-            config[wildcards.experiment]["background_type"],        
-
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    threads: 12
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "GN",
-            "{experiment}",
-            "fit_peaks.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "GN",
-            "{experiment}",
-            "fit_peaks.stderr.log"
-            ),
-
-    shell:
-        "(mkdir -p {params.plot_dir}; \
-        python {params.script} \
-        --enriched_regions {input.reads} \
-        --parameters {input.parameters} \
-        --fragment_size {params.fragment_size} \
-        --window_size {params.window_size} \
-        --chromosomes_g {input.chromosomes_g} \
-        --bam_foreground_g {input.fg_bam} \
-        --bam_background_g {input.bg_bam} \
-        --bam_fg_fq {input.fg_frequencies} \
-        --bam_bg_fq {input.bg_frequencies} \
-        --paired_f {params.paired_f} \
-        --paired_b {params.paired_b} \
-        --sense_f {params.sense_f} \
-        --sense_b {params.sense_b} \
-        --background {params.background_type} \
-        --out {output.peaks} \
-        --out_folder {params.plot_dir} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule GN_peaks_split_sets_crosslink:
-    """
-        Split peaks into training and test set
-    """
-    input:
-        genome_fasta = config["genome"],
-        all_regions = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_total_regions.csv"
-            ),
-        reads = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_total_peaks.csv"
-            ),
-        chromosomes_g = os.path.join(
-            config["output_dir"],
-            "STAR_index",
-            "chrNameLength.txt"
-            ),
-
-    output:
-        training = temp(os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "motif_analysis_crossvalidation",
-            "{run}",
-            "crosslink",
-            "training.fasta"
-            )),
-        test = temp(os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "motif_analysis_crossvalidation",
-            "{run}",
-            "crosslink",
-            "test.fasta"
-            )),
-        training_bg = temp(os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "motif_analysis_crossvalidation",
-            "{run}",
-            "crosslink",
-            "training_bg.fasta"
-            )),
-        test_bg = temp(os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "motif_analysis_crossvalidation",
-            "{run}",
-            "crosslink",
-            "test_bg.fasta"
-            )),
-        dirtemp = temp(
-            directory(
-                os.path.join(
-                config["output_dir"],
-                "GN",
-                "{experiment}",
-                "motif_analysis_crossvalidation",
-                "{run}",
-                "crosslink",
-                "tmpdir")
-            )
-        ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "mk_peaks_split_sets.py"
-            ),
-        output_dir = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "motif_analysis_crossvalidation",
-            "{run}",
-            "crosslink"
-            ),
-        genome_tag = config["genome_tag"],
-        peak_size = 50,
-        
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    threads: 1
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "GN",
-            "{experiment}",
-            "motif_analysis_crossvalidation",
-            "peaks_split_sets__{run}_crosslink.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "GN",
-            "{experiment}",
-            "motif_analysis_crossvalidation",
-            "peaks_split_sets__{run}_crosslink.stderr.log"
-            ),
-
-    shell:
-        "(mkdir -p {params.output_dir}; \
-        mkdir -p {output.dirtemp}; \
-        python {params.script} \
-        --peaks_file {input.reads} \
-        --genome_fasta {input.genome_fasta} \
-        --all_regions {input.all_regions} \
-        --genome_tag {params.genome_tag} \
-        --tempdir {output.dirtemp} \
-        --chromosomes_g {input.chromosomes_g} \
-        --peak_size {params.peak_size} \
-        --out_folder {params.output_dir} \
-        --crosslink_type 'crosslink' \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule GN_peaks_split_sets:
-    """
-        Split peaks into training and test set
-    """
-    input:
-        genome_fasta = config["genome"],
-        all_regions = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_total_regions.csv"
-            ),
-        reads = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_total_peaks.csv"
-            ),
-        chromosomes_g = os.path.join(
-            config["output_dir"],
-            "STAR_index",
-            "chrNameLength.txt"
-            ),
-
-    output:
-        training = temp(os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "motif_analysis_crossvalidation",
-            "{run}",
-            "peak_center",
-            "training.fasta"
-            )),
-        test = temp(os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "motif_analysis_crossvalidation",
-            "{run}",
-            "peak_center",
-            "test.fasta"
-            )),
-        training_bg = temp(os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "motif_analysis_crossvalidation",
-            "{run}",
-            "peak_center",
-            "training_bg.fasta"
-            )),
-        test_bg = temp(os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "motif_analysis_crossvalidation",
-            "{run}",
-            "peak_center",
-            "test_bg.fasta"
-            )),
-        dirtemp = temp(
-            directory(
-                os.path.join(
-                config["output_dir"],
-                "GN",
-                "{experiment}",
-                "motif_analysis_crossvalidation",
-                "{run}",
-                "peak_center",
-                "tmpdir")
-            )
-        ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "mk_peaks_split_sets.py"
-            ),
-        output_dir = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "motif_analysis_crossvalidation",
-            "{run}",
-            "peak_center"
-            ),
-        genome_tag = config["genome_tag"],
-        peak_size = 50
-        
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    threads: 1
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "GN",
-            "{experiment}",
-            "motif_analysis_crossvalidation",
-            "peaks_split_sets__{run}_peak_center.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "GN",
-            "{experiment}",
-            "motif_analysis_crossvalidation",
-            "peaks_split_sets__{run}_peak_center.stderr.log"
-            ),
-
-    shell:
-        "(mkdir -p {params.output_dir}; \
-        mkdir -p {output.dirtemp}; \
-        python {params.script} \
-        --peaks_file {input.reads} \
-        --genome_fasta {input.genome_fasta} \
-        --all_regions {input.all_regions} \
-        --genome_tag {params.genome_tag} \
-        --tempdir {output.dirtemp} \
-        --chromosomes_g {input.chromosomes_g} \
-        --peak_size {params.peak_size} \
-        --out_folder {params.output_dir} \
-        --crosslink_type 'peak_center' \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-rule GN_peaks_split_sets_crosslink_all_motifs:
-    """
-        Get all peaks crosslink
-    """
-    input:
-        genome_fasta = config["genome"],
-        all_regions = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_total_regions.csv"
-            ),
-        reads = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_total_peaks.csv"
-            ),
-        chromosomes_g = os.path.join(
-            config["output_dir"],
-            "STAR_index",
-            "chrNameLength.txt"
-            ),
-
-    output:
-        test = temp(
-            os.path.join(
-                config["output_dir"],
-                "GN",
-                "{experiment}",
-                "motif_analysis_final",
-                "crosslink",
-                "test.fasta")
-            ),
-        test_bg = temp(
-            os.path.join(
-                config["output_dir"],
-                "GN",
-                "{experiment}",
-                "motif_analysis_final",
-                "crosslink",
-                "test_bg.fasta")
-            ),
-        dirtemp = temp(
-            directory(
-                os.path.join(
-                config["output_dir"],
-                "GN",
-                "{experiment}",
-                "motif_analysis_final",
-                "crosslink",
-                "tmpdir")
-            )
-        ),
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "total_enrichment_peakstofasta.py"
-            ),
-        output_dir = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "motif_analysis_final",
-            "crosslink",
-            ),
-        genome_tag = config["genome_tag"],
-        peak_size = 50,
-        
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    threads: 1
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "GN",
-            "{experiment}",
-            "motif_analysis_final",
-            "peaks_split_sets__all_motifs_crosslink.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "GN",
-            "{experiment}",
-            "motif_analysis_final",
-            "peaks_split_sets__all_motifs_crosslink.stderr.log"
-            ),
-
-    shell:
-        "(mkdir -p {params.output_dir}; \
-        mkdir -p {output.dirtemp}; \
-        python {params.script} \
-        --peaks_file {input.reads} \
-        --genome_fasta {input.genome_fasta} \
-        --all_regions {input.all_regions} \
-        --genome_tag {params.genome_tag} \
-        --tempdir {output.dirtemp} \
-        --chromosomes_g {input.chromosomes_g} \
-        --peak_size {params.peak_size} \
-        --out_folder {params.output_dir} \
-        --crosslink_type 'crosslink' \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule GN_peaks_split_sets_all_motifs:
-    """
-        get all peaks crosslink
-    """
-    input:
-        genome_fasta = config["genome"],
-        all_regions = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_total_regions.csv"
-            ),
-        reads = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "{experiment}_total_peaks.csv"
-            ),
-        chromosomes_g = os.path.join(
-            config["output_dir"],
-            "STAR_index",
-            "chrNameLength.txt"
-            ),
-
-    output:
-        test = temp(
-            os.path.join(
-                config["output_dir"],
-                "GN",
-                "{experiment}",
-                "motif_analysis_final",
-                "peak_center",
-                "test.fasta")
-            ),
-        test_bg = temp(
-            os.path.join(
-                config["output_dir"],
-                "GN",
-                "{experiment}",
-                "motif_analysis_final",
-                "peak_center",
-                "test_bg.fasta")
-            ),
-        dirtemp = temp(
-            directory(
-                os.path.join(
-                config["output_dir"],
-                "GN",
-                "{experiment}",
-                "motif_analysis_final",
-                "peak_center",
-                "tmpdir")
-            )
-        ),
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "total_enrichment_peakstofasta.py"
-            ),
-        output_dir = os.path.join(
-            config["output_dir"],
-            "GN",
-            "{experiment}",
-            "motif_analysis_final",
-            "peak_center",
-            ),
-        genome_tag = config["genome_tag"],
-        peak_size = 50
-        
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    threads: 1
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "GN",
-            "{experiment}",
-            "motif_analysis_final",
-            "peaks_split_sets__all_motifs_standard.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "GN",
-            "{experiment}",
-            "motif_analysis_final",
-            "peaks_split_sets__all_motifs_standard.stderr.log"
-            ),
-
-    shell:
-        "(mkdir -p {params.output_dir}; \
-        mkdir -p {output.dirtemp}; \
-        python {params.script} \
-        --peaks_file {input.reads} \
-        --genome_fasta {input.genome_fasta} \
-        --all_regions {input.all_regions} \
-        --genome_tag {params.genome_tag} \
-        --tempdir {output.dirtemp} \
-        --chromosomes_g {input.chromosomes_g} \
-        --peak_size {params.peak_size} \
-        --out_folder {params.output_dir} \
-        --crosslink_type 'peak_center' \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
diff --git a/workflow/rules/rcrunch_genomic_se.smk b/workflow/rules/rcrunch_genomic_se.smk
new file mode 100644
index 0000000..4670e88
--- /dev/null
+++ b/workflow/rules/rcrunch_genomic_se.smk
@@ -0,0 +1,151 @@
+import os
+
+rule GN_map_STAR_se:
+    """ map reads to genome"""
+    input:
+        genome = os.path.join(
+            config["output_dir"],
+            "STAR_index",
+            "chrNameLength.txt"
+            ),
+        reads = lambda wildcards:
+            expand(
+                os.path.join(
+                    config["output_dir"],
+                    "cutadapt",
+                    "{sample}_mate1.se.cutadapt_{format}.fastq.gz"),
+                sample=wildcards.sample,
+                format=config[wildcards.sample]['format']
+            ),
+    output:
+        bamfile = os.path.join(
+            config["output_dir"],
+            "GN",
+            "alignment",
+            "{sample}",
+            "{sample}.se.bam"
+            ),
+        logfile = os.path.join(
+            config["output_dir"],
+            "GN",
+            "alignment",
+            "{sample}",
+            "{sample}.se.Log.final.out"),
+    params:
+        cluster_log_path = config["cluster_log"],
+        sample_id = "{sample}",
+        genome = os.path.join(
+            config["output_dir"],
+            "STAR_index"
+            ),
+        outFileNamePrefix = os.path.join(
+            config["output_dir"],
+            "GN",
+            "alignment",
+            "{sample}",
+            "{sample}.se.",
+            ),
+        multimappers = config['multimappers']
+    shadow: "full"
+
+    singularity:
+        "docker://zavolab/star:2.6.0a"
+
+    threads: 12
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "GN",
+            "{sample}",
+            "map_STAR.se.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "GN",
+            "{sample}",
+            "map_STAR.se.stderr.log"
+            ),
+
+    shell:
+        "(STAR \
+        --runMode alignReads \
+        --runThreadN {threads} \
+        --genomeDir {params.genome} \
+        --readFilesIn {input.reads} \
+        --readFilesCommand zcat \
+        --outSAMunmapped None  \
+        --outFilterMultimapNmax {params.multimappers} \
+        --outFilterMultimapScoreRange 1 \
+        --outFileNamePrefix {params.outFileNamePrefix} \
+        --outSAMattributes All \
+        --outStd BAM_Unsorted \
+        --outSAMtype BAM Unsorted \
+        --outFilterScoreMinOverLread 0.2 \
+        --outFilterMatchNminOverLread 0.2 \
+        --outFilterMismatchNoverLmax 0.1 \
+        --outFilterType BySJout \
+        --outReadsUnmapped Fastx \
+        --outSAMattrRGline ID:rcrunch SM:{params.sample_id} \
+        --alignEndsType EndToEnd > {output.bamfile} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule GN_umi_collapse_se:
+    """
+        Duplicate removal when there are UMIs
+    """
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "GN",
+            "remove_ncRNAs",
+            "{sample}.filtered.sorted.bam"
+            ),
+        bai = os.path.join(
+            config["output_dir"],
+            "GN",
+            "remove_ncRNAs",
+            "{sample}.filtered.sorted.bam.bai"
+            ),
+
+    output:
+        bam = temp(os.path.join(
+            config["output_dir"],
+            "GN",
+            "remove_duplicates",
+            "{sample}.umis.se.bam"
+            )),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        metrics_file = os.path.join(
+            config["output_dir"],
+            "GN",
+            "remove_duplicates",
+            "{sample}.umis.se.metrics"
+            ),
+
+    singularity:
+        "docker://zavolab/umi-tools:0.5.4"
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "GN",
+            "{sample}",
+            "umi_collapse.se.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "GN",
+            "{sample}",
+            "umi_collapse.se.stderr.log"
+            ),
+
+    shell:
+        "(umi_tools \
+        dedup \
+        -I {input.bam} \
+        -S {output.bam} \
+        ) 1> {log.stdout} 2> {log.stderr}"
diff --git a/workflow/rules/rcrunch_preprocessing.smk b/workflow/rules/rcrunch_preprocessing.smk
new file mode 100644
index 0000000..7a1fd3b
--- /dev/null
+++ b/workflow/rules/rcrunch_preprocessing.smk
@@ -0,0 +1,111 @@
+import os
+rule create_index:
+    """
+        Create an index of the human genome
+    """
+    input:
+        genome = config["genome"],
+        gtf = config["gtf"]
+
+    output:
+        chromosome_info = os.path.join(
+            config["output_dir"],
+            "STAR_index",
+            "chrNameLength.txt"
+            ),
+        chromosomes_names = os.path.join(
+            config["output_dir"],
+            "STAR_index",
+            "chrName.txt"
+            ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        output_dir = os.path.join(
+            config["output_dir"],
+            "STAR_index"
+            ),
+        outFileNamePrefix = os.path.join(
+            config["output_dir"],
+            "STAR_index/STAR_"
+            ),
+        sjdbOverhang = config["sjdbOverhang"],
+
+    singularity:
+        "docker://zavolab/star:2.6.0a"
+
+    threads: 12
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "preprocessing",
+            "STAR_index.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "preprocessing",
+            "STAR_index.stderr.log"
+            ),
+
+    shell:
+        "(mkdir -p {params.output_dir}; \
+        chmod -R 777 {params.output_dir}; \
+        STAR \
+        --runMode genomeGenerate \
+        --sjdbOverhang {params.sjdbOverhang} \
+        --genomeDir {params.output_dir} \
+        --genomeFastaFiles {input.genome} \
+        --runThreadN {threads} \
+        --outFileNamePrefix {params.outFileNamePrefix} \
+        --sjdbGTFfile {input.gtf}) 1> {log.stdout} 2> {log.stderr}"
+
+rule create_csv_from_ensembl_gtf:
+    """
+        Create a csv based on the gtf file of ensembl
+    """
+    input:
+        gtf = config["gtf"]
+
+    output:
+        output = os.path.join(
+            config["output_dir"],
+            "ensembl_csv",
+            "ensembl.csv"),
+
+        flag = os.path.join(
+            config["output_dir"],
+            "ensembl_csv",
+            "done"),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_ensembl__gtf_to_csv.py"
+            ),
+
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0"
+
+    threads: 8
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "preprocessing",
+            "ensembl_csv.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "preprocessing",
+            "ensembl_csv.stderr.log"
+            ),
+
+    shell:
+        "(python {params.script} \
+        --gtf_file {input.gtf} \
+        --flag {output.flag} \
+        --out_file {output.output}) 1> {log.stdout} 2> {log.stderr}"
+
diff --git a/workflow/rules/rcrunch_preprocessing_pe.smk b/workflow/rules/rcrunch_preprocessing_pe.smk
index 054bdc6..bdbaac2 100644
--- a/workflow/rules/rcrunch_preprocessing_pe.smk
+++ b/workflow/rules/rcrunch_preprocessing_pe.smk
@@ -2,120 +2,8 @@
 # Preprocessing subworkflow for paired-end data. First step of analysis
 # ________________________________________________________________________________
 import os
-rule create_index:
-    """
-        Create an index of the human genome
-    """
-    input:
-        genome = config["genome"],
-        gtf = config["gtf"]
-
-    output:
-        chromosome_info = os.path.join(
-            config["output_dir"],
-            "STAR_index",
-            "chrNameLength.txt"
-            ),
-        chromosomes_names = os.path.join(
-            config["output_dir"],
-            "STAR_index",
-            "chrName.txt"
-            ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        output_dir = os.path.join(
-            config["output_dir"],
-            "STAR_index"
-            ),
-        outFileNamePrefix = os.path.join(
-            config["output_dir"],
-            "STAR_index/STAR_"
-            ),
-        sjdbOverhang = config["sjdbOverhang"],
-
-    singularity:
-        "docker://zavolab/star:2.6.0a"
-
-    threads: 12
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "preprocessing",
-            "STAR_index.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "preprocessing",
-            "STAR_index.stderr.log"
-            ),
-
-    shell:
-        "(mkdir -p {params.output_dir}; \
-        chmod -R 777 {params.output_dir}; \
-        STAR \
-        --runMode genomeGenerate \
-        --sjdbOverhang {params.sjdbOverhang} \
-        --genomeDir {params.output_dir} \
-        --genomeFastaFiles {input.genome} \
-        --runThreadN {threads} \
-        --outFileNamePrefix {params.outFileNamePrefix} \
-        --sjdbGTFfile {input.gtf}) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule create_csv_from_ensembl_gtf:
-    """
-        Create a csv based on the gtf file of ensembl
-    """
-    input:
-        gtf = config["gtf"]
-
-    output:
-        output = os.path.join(
-            config["output_dir"],
-            "ensembl_csv",
-            "ensembl.csv"
-            ),
-        flag = os.path.join(
-            config["output_dir"],
-            "ensembl_csv",
-            "done"
-            ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "mk_ensembl__gtf_to_csv.py"
-            ),
-
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    threads: 8
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "preprocessing",
-            "ensembl_csv.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "preprocessing",
-            "ensembl_csv.stderr.log"
-            ),
-
-    shell:
-        "(python {params.script} \
-        --gtf_file {input.gtf} \
-        --flag {output.flag} \
-        --out_file {output.output}) 1> {log.stdout} 2> {log.stderr}"
-
 
-rule cutadapt:
+rule cutadapt_pe:
     """
         Trim 3' and 5' adapters
     """
@@ -140,12 +28,12 @@ rule cutadapt:
         reads1 = os.path.join(
             config["output_dir"],
             "cutadapt",
-            "{sample}_mate1.cutadapt_standard.fastq.gz"),
+            "{sample}_mate1.pe.cutadapt_standard.fastq.gz"),
 
         reads2 = os.path.join(
             config["output_dir"],
             "cutadapt",
-            "{sample}_mate2.cutadapt_standard.fastq.gz")
+            "{sample}_mate2.pe.cutadapt_standard.fastq.gz")
 
     params:
         cluster_log_path = config["cluster_log"],
@@ -159,12 +47,12 @@ rule cutadapt:
         stdout = os.path.join(
             config["local_log"],
             "preprocessing",
-            "cutadapt__{sample}.stdout.log"
+            "cutadapt__{sample}.pe.stdout.log"
             ),
         stderr = os.path.join(
             config["local_log"],
             "preprocessing",
-            "cutadapt__{sample}.stderr.log"
+            "cutadapt__{sample}.pe.stderr.log"
             )
 
 
@@ -186,7 +74,7 @@ rule cutadapt:
         {input.reads2}) 1> {log.stdout} 2> {log.stderr}"
 
 
-rule umi_tools_format:
+rule umi_tools_format_pe:
     """
         Create a UMI-tools barcode collapse format for any ENCODE samples.
     """
@@ -194,24 +82,24 @@ rule umi_tools_format:
         reads1 = os.path.join(
             config["output_dir"],
             "cutadapt",
-            "{sample}_mate1.cutadapt_standard.fastq.gz"
+            "{sample}_mate1.pe.cutadapt_standard.fastq.gz"
             ),
         reads2 = os.path.join(
             config["output_dir"],
             "cutadapt",
-            "{sample}_mate2.cutadapt_standard.fastq.gz"
+            "{sample}_mate2.pe.cutadapt_standard.fastq.gz"
             ),
 
     output:
         reads1 = os.path.join(
             config["output_dir"],
             "cutadapt",
-            "{sample}_mate1.cutadapt_encode.fastq.gz"
+            "{sample}_mate1.pe.cutadapt_encode.fastq.gz"
             ),
         reads2 = os.path.join(
             config["output_dir"],
             "cutadapt",
-            "{sample}_mate2.cutadapt_encode.fastq.gz"
+            "{sample}_mate2.pe.cutadapt_encode.fastq.gz"
             ),
 
     params:
@@ -233,12 +121,12 @@ rule umi_tools_format:
         stdout = os.path.join(
             config["local_log"],
             "preprocessing",
-            "umi_format__{sample}.stdout.log"
+            "umi_format__{sample}.pe.stdout.log"
             ),
         stderr = os.path.join(
             config["local_log"],
             "preprocessing",
-            "umi_format__{sample}.stderr.log"
+            "umi_format__{sample}.pe.stderr.log"
             ),
 
     shell:
diff --git a/workflow/rules/rcrunch_preprocessing_se.smk b/workflow/rules/rcrunch_preprocessing_se.smk
index 01504fb..2346aa4 100644
--- a/workflow/rules/rcrunch_preprocessing_se.smk
+++ b/workflow/rules/rcrunch_preprocessing_se.smk
@@ -2,115 +2,8 @@
 # Preprocessing subworkflow for paired-end data. First step of analysis
 # ________________________________________________________________________________
 import os
-rule create_index:
-    """
-        Create an index of the human genome
-    """
-    input:
-        genome = config["genome"],
-        gtf = config["gtf"]
-
-    output:
-        chromosome_info = os.path.join(
-            config["output_dir"],
-            "STAR_index",
-            "chrNameLength.txt"),
-
-        chromosomes_names = os.path.join(
-            config["output_dir"],
-            "STAR_index",
-            "chrName.txt")
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        output_dir = os.path.join(
-            config["output_dir"],
-            "STAR_index"),
-        outFileNamePrefix = os.path.join(
-            config["output_dir"],
-            "STAR_index/STAR_"),
-        sjdbOverhang = config["sjdbOverhang"]
-
-    singularity:
-        "docker://zavolab/star:2.6.0a"
-
-    threads: 12
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "preprocessing",
-            "STAR_index.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "preprocessing",
-            "STAR_index.stderr.log"
-            ),
-
-    shell:
-        "(mkdir -p {params.output_dir}; \
-        chmod -R 777 {params.output_dir}; \
-        STAR \
-        --runMode genomeGenerate \
-        --sjdbOverhang {params.sjdbOverhang} \
-        --genomeDir {params.output_dir} \
-        --genomeFastaFiles {input.genome} \
-        --runThreadN {threads} \
-        --outFileNamePrefix {params.outFileNamePrefix} \
-        --sjdbGTFfile {input.gtf}) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule create_csv_from_ensembl_gtf:
-    """
-        Create a csv based on the gtf file of ensembl
-    """
-    input:
-        gtf = config["gtf"]
-
-    output:
-        output = os.path.join(
-            config["output_dir"],
-            "ensembl_csv",
-            "ensembl.csv"),
-
-        flag = os.path.join(
-            config["output_dir"],
-            "ensembl_csv",
-            "done"),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "mk_ensembl__gtf_to_csv.py"
-            ),
-
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0"
-
-    threads: 8
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "preprocessing",
-            "ensembl_csv.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "preprocessing",
-            "ensembl_csv.stderr.log"
-            ),
-
-    shell:
-        "(python {params.script} \
-        --gtf_file {input.gtf} \
-        --flag {output.flag} \
-        --out_file {output.output}) 1> {log.stdout} 2> {log.stderr}"
 
-rule cutadapt:
+rule cutadapt_se:
     """
         Trim 3' and 5' adapters
     """
@@ -124,8 +17,8 @@ rule cutadapt:
     output:
         reads = os.path.join(
             config["output_dir"],
-            "cutadapt_replicates",
-            "{sample}.cutadapt_standard.fastq.gz")
+            "cutadapt",
+            "{sample}_mate1.se.cutadapt_standard.fastq.gz")
 
     params:
         cluster_log_path = config["cluster_log"],
@@ -139,16 +32,16 @@ rule cutadapt:
         stdout = os.path.join(
             config["local_log"],
             "preprocessing",
-            "cutadapt__{sample}.stdout.log"
+            "cutadapt__{sample}.se.stdout.log"
             ),
         stderr = os.path.join(
             config["local_log"],
             "preprocessing",
-            "cutadapt__{sample}.stderr.log"
+            "cutadapt__{sample}.se.stderr.log"
             )
 
     shell:
-        "cutadapt \
+        "(cutadapt \
         -e 0.1 \
         -O 1 \
         -j {threads} \
@@ -160,21 +53,21 @@ rule cutadapt:
         {input.reads}) 1> {log.stdout} 2> {log.stderr}"
 
 
-rule umi_tools_format:
+rule umi_tools_format_se:
     """
         Create a UMI-tools barcode collapse format for any ENCODE samples.
     """
     input:
         reads = os.path.join(
             config["output_dir"],
-            "cutadapt_replicates2",
-            "{sample}.cutadapt_standard.fastq.gz")
+            "cutadapt",
+            "{sample}_mate1.se.cutadapt_standard.fastq.gz")
 
     output:
         reads = os.path.join(
             config["output_dir"],
-            "cutadapt_replicates2",
-            "{sample}.cutadapt_encode.fastq.gz")
+            "cutadapt",
+            "{sample}_mate1.se.cutadapt_encode.fastq.gz")
     params:
         cluster_log_path = config["cluster_log"],
         script = os.path.join(
@@ -186,7 +79,7 @@ rule umi_tools_format:
             config[wildcards.sample]['format']
         
     singularity:
-        "docker://zavolab/rcrunch_python:1.0"
+        "docker://zavolab/rcrunch_python:1.0.5"
 
     threads: 1
 
@@ -194,12 +87,12 @@ rule umi_tools_format:
         stdout = os.path.join(
             config["local_log"],
             "preprocessing",
-            "umi_format__{sample}.stdout.log"
+            "umi_format__{sample}.se.stdout.log"
             ),
         stderr = os.path.join(
             config["local_log"],
             "preprocessing",
-            "umi_format__{sample}.stderr.log"
+            "umi_format__{sample}.se.stderr.log"
             ),
     shell:
         "(python {params.script} \
diff --git a/workflow/rules/rcrunch_transcriptomic.smk b/workflow/rules/rcrunch_transcriptomic.smk
new file mode 100644
index 0000000..b153535
--- /dev/null
+++ b/workflow/rules/rcrunch_transcriptomic.smk
@@ -0,0 +1,2349 @@
+# --------------------------------------------------------------------------------
+# RCRUNCH
+# Author : Katsantoni Maria
+# Company: Mihaela Zavolan group, Biozentrum, Basel
+# --------------------------------------------------------------------------------
+# Transcriptomic RCRUNCH subpipeline for CLIP data
+# This approach is based on the hypothesis that the RBP is binding the mature
+# mRNA and thus the sequence of the mature mRNA is important for the binding.
+# ________________________________________________________________________________
+import os
+
+rule TR_initial_index:
+    """
+        Index genome bamfile using samtools.
+    """
+    input:
+        bam = lambda wildcards: expand(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "initial_map",
+                "{sample}",
+                "{sample}.{mates}.bam"),
+            sample=wildcards.sample,
+            mates=get_mates(wildcards.sample)),
+        log = lambda wildcards: expand(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "initial_map",
+                "{sample}",
+                "{sample}.{mates}.Log.final.out"),
+            sample=wildcards.sample,
+            mates=get_mates(wildcards.sample))
+
+    output:
+        bam = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "initial_map",
+                "{sample}.sorted.bam")
+                ),
+        bai = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "initial_map",
+                "{sample}.sorted.bam.bai")
+                ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        prefix = os.path.join(
+            config["output_dir"],
+            "TR",
+            "initial_map",
+            "{sample}_temp"),
+
+    singularity:
+        "docker://zavolab/samtools:1.8"
+
+    threads: 1
+
+    log:
+       stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "preprocessing",
+            "{sample}_initial_index.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "preprocessing",
+            "{sample}_initial_index.stderr.log"),
+
+    shell:
+        "(samtools sort \
+        -T {params.prefix} \
+        -@ {threads} \
+        {input.bam} > {output.bam}; \
+        samtools index \
+        {output.bam} {output.bai}) \
+         1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_flag_ncRNA_reads:
+    """
+        Make txt file of reads mapping to ncRNAs.
+        If a multimapper maps to a ncRNA, all reads are excluded.
+        Custom script.
+    """
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "initial_map",
+            "{sample}.sorted.bam"),
+        bai = os.path.join(
+            config["output_dir"],
+            "TR",
+            "initial_map",
+            "{sample}.sorted.bam.bai"),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_filter_ncRNAs.py"),
+        ncRNAs = config['ncRNAs'],
+        ncRNA_biotypes = expand(config['ncRNA_biotypes']),
+        paired = lambda wildcards:
+            config[wildcards.sample]['paired'],
+        sense = lambda wildcards:
+            config[wildcards.sample]['sense'],
+
+    output:
+        outfile = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "remove_ncRNAs",
+                "{sample}.ncrna.txt")
+                ),
+        flag = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "remove_ncRNAs",
+                "{sample}_done")
+                ),
+
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "preprocessing",
+            "{sample}_flag_ncRNA_reads.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "preprocessing",
+            "{sample}_flag_ncRNA_reads.stderr.log"),
+
+    shell:
+        "(python {params.script} \
+        --bamfile {input.bam} \
+        --RNA_central {params.ncRNAs} \
+        --outfile {output.outfile} \
+        --paired {params.paired} \
+        --sense {params.sense} \
+        --ncRNAs {params.ncRNA_biotypes} \
+        --flag {output.flag} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_remove_ncRNA_reads:
+    """
+        Remove the reads mapping to rRNAs using picard.
+    """
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "initial_map",
+            "{sample}.sorted.bam"),
+        read_names = os.path.join(
+            config["output_dir"],
+            "TR",
+            "remove_ncRNAs",
+            "{sample}.ncrna.txt"),
+
+    output:
+        reads = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "remove_ncRNAs",
+                "{sample}.filtered.bam")
+                ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+
+    singularity:
+        "docker://zavolab/picard:2.18.9"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "preprocessing",
+            "{sample}_remove_ncRNA_reads.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "preprocessing",
+            "{sample}_remove_ncRNA_reads.stderr.log"),
+
+    shell:
+        "(java -jar /usr/local/bin/picard.jar FilterSamReads \
+        I={input.bam} \
+        O={output.reads} \
+        READ_LIST_FILE={input.read_names} \
+        FILTER=excludeReadList) \
+        1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_index_ncRNA_rm:
+    """
+        Index bamfile without the rRNA-mapped reads using samtools.
+    """
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "remove_ncRNAs",
+            "{sample}.filtered.bam"),
+
+    output:
+        bai = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "remove_ncRNAs",
+                "{sample}.filtered.bam.bai")
+                ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+
+    singularity:
+        "docker://zavolab/samtools:1.8"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "preprocessing",
+            "{sample}_index_ncRNA_rm.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "preprocessing",
+            "{sample}_index_ncRNA_rm.stderr.log"),
+
+    shell:
+        "(samtools index \
+        {input.bam} {output.bai} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_flag_duplicates:
+    """
+        Flag duplicate reads using the STAR flags.
+    """
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "remove_ncRNAs",
+            "{sample}.filtered.bam"),
+        bai = os.path.join(
+            config["output_dir"],
+            "TR",
+            "remove_ncRNAs",
+            "{sample}.filtered.bam.bai"),
+
+    output:
+        bam = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "flag_duplicates",
+                "{sample}.duplicates.Processed.out.bam")
+                ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        outFileNamePrefix = os.path.join(
+            config["output_dir"],
+            "TR",
+            "flag_duplicates",
+            "{sample}.duplicates."),
+
+    singularity:
+        "docker://zavolab/star:2.6.0a"
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "preprocessing",
+            "{sample}_flag_duplicates.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "preprocessing",
+            "{sample}_flag_duplicates.stderr.log"),
+
+    shell:
+        "(STAR \
+        --inputBAMfile {input.bam} \
+        --bamRemoveDuplicatesType UniqueIdenticalNotMulti \
+        --runMode inputAlignmentsFromBAM \
+        --outFileNamePrefix {params.outFileNamePrefix} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_remove_duplicates:
+    """
+        Remove reads flagged as duplicates by STAR.
+        Custom script.
+    """
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "flag_duplicates",
+            "{sample}.duplicates.Processed.out.bam"),
+
+    output:
+        bam = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "remove_duplicates",
+                "{sample}.duplicates.bam")
+                ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_filter_duplicates.py"),
+
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "preprocessing",
+            "{sample}_remove_duplicates.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "preprocessing",
+            "{sample}_remove_duplicates.stderr.log"),
+
+    shell:
+        "(python {params.script} \
+        --bamfile {input.bam} \
+        --outfile {output.bam} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_no_duplicate_removal:
+    """
+        No removal of duplicates
+    """
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "remove_ncRNAs",
+            "{sample}.filtered.bam"),
+        bai = os.path.join(
+            config["output_dir"],
+            "TR",
+            "remove_ncRNAs",
+            "{sample}.filtered.bam.bai"),
+
+    output:
+        bam = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "remove_duplicates",
+                "{sample}.with_duplicates.bam")
+                ),
+        bai = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "remove_duplicates",
+                "{sample}.with_duplicates.bam.bai")
+                ),
+    
+    params:
+        cluster_log_path = config["cluster_log"],
+
+    singularity:
+        "docker://bash:5.0.16"
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "preprocessing",
+            "{sample}_no_duplicate_removal.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "preprocessing",
+            "{sample}_no_duplicate_removal.stderr.log"),
+
+    shell:
+        "(cp {input.bam} {output.bam}; \
+        cp {input.bai} {output.bai}; \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+rule TR_merge:
+    input:
+        bam = lambda wildcards:
+            expand(
+                os.path.join(
+                    config["output_dir"],
+                    "TR",
+                    "remove_duplicates",
+                    "{sample}.{dup_type}.{mates}.bam"),
+                sample=config[wildcards.experiment][wildcards.name],
+                dup_type=config[config[wildcards.experiment][wildcards.name][0]]['dup_type'],
+                mates=get_mates(config[wildcards.experiment][wildcards.name][0])),
+
+    output:
+        bam = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "{experiment}_{name}_merge_samples.bam")
+                ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+
+    singularity:
+        "docker://zavolab/samtools:1.8"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_merge.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_merge.stderr.log"),
+
+    shell:
+        "(samtools merge \
+        {output.bam} {input.bam}; \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_sort_merged:
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}_merge_samples.bam"
+            ),
+
+    output:
+        bam = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "{experiment}_{name}_merge_samples.sorted.bam")
+                ),
+        bai = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "{experiment}_{name}_merge_samples.sorted.bam.bai")
+                ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        prefix = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{name}_merge_samples"),
+
+    singularity:
+        "docker://zavolab/samtools:1.8"
+
+    threads: 8
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_sort_merged.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_sort_merged.stderr.log"),
+
+    shell:
+        "(samtools sort \
+        -T {params.prefix} \
+        -@ {threads} \
+        {input.bam} > {output.bam}; \
+        samtools index \
+        {output.bam} {output.bai} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_sortname_merged:
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}_merge_samples.bam"),
+
+    output:
+        bam = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "{experiment}_{name}_merge_samples.sortedname.bam")
+                ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        prefix = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{name}_merge_samples_sortname"),
+
+    singularity:
+        "docker://zavolab/samtools:1.8"
+
+    threads: 8
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_sortname_merged.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_sortname_merged.stderr.log"),
+
+    shell:
+        "(samtools sort -n \
+        -T {params.prefix} \
+        -@ {threads} \
+        {input.bam} > {output.bam}; \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_read_frequencies:
+    """
+        Calculate occurence of reads that are multimappers.
+        Custom script.
+    """
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}_merge_samples.sorted.bam"),
+        bai = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}_merge_samples.sorted.bam.bai"),
+
+    output:
+        frequencies = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "{experiment}_{name}.frequencies.csv")
+                ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_bam_get_read_frequencies.py"),
+        paired = lambda wildcards:
+                    config[config[wildcards.experiment][wildcards.name][0]]['paired'],
+
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_read_frequencies.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_read_frequencies.stderr.log"),
+        
+    shell:
+        "(python {params.script} \
+        --bamfile {input.bam} \
+        --paired {params.paired} \
+        --outfile {output.frequencies}; \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+
+rule TR_salmon_index:
+    """
+        Build salmon index based on transcript annotation.
+        Required for transcript quantification done by salmon.
+    """
+    input:
+        transcriptome = config["transcriptome"]
+
+    output:
+        index = directory(os.path.join(
+            config["output_dir"],
+            "TR_salmon_index")
+            ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+
+    singularity:
+        "docker://zavolab/salmon:0.11.0"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "salmon_index.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "salmon_index.stderr.log"),
+
+    shell:
+        "(salmon index \
+        -t {input.transcriptome} \
+        -i {output.index} \
+        --type fmd; \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_customised_transcriptome:
+    """
+        Build customised transcriptome keeping
+        the most expressed isoform per gene.
+        Custom script.
+    """
+    input:
+        transcriptome = config["transcriptome"],
+        ensembl_csv = os.path.join(
+            config["output_dir"],
+            "ensembl_csv",
+            "ensembl.csv"),
+        ensembl_gtf = config["gtf"],
+        quantification = lambda wildcards: expand(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "tr_quantification_{mates}_{name}",
+                "quant.sf"),
+            experiment=wildcards.experiment,
+            name=wildcards.name,
+            mates=get_mates(config[wildcards.experiment][wildcards.name][0])),
+    output:
+        output_gtf = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}.transcriptome.gtf"),
+        output_fasta = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}.transcriptome.fa"),
+        info = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}.info.csv"),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_most_expressed_transcriptome.py"),
+
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_customised_transcriptome.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_customised_transcriptome.stderr.log"),
+
+    shell:
+        "(python {params.script} \
+        --quantification_file {input.quantification} \
+        --transcriptome {input.transcriptome}  \
+        --ensembl_csv {input.ensembl_csv} \
+        --ensembl_gtf {input.ensembl_gtf} \
+        --out_gtf {output.output_gtf} \
+        --transcript_info {output.info} \
+        --out_fasta {output.output_fasta} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_transcriptome_index:
+    """
+        Create index of the customised transcriptome using STAR.
+    """
+    input:
+        tr_fasta = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}.transcriptome.fa"),
+        tr_gtf = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}.transcriptome.gtf"),
+
+    output:
+        output = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{name}_STAR_index",
+            "chrNameLength.txt"),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        output_dir = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{name}_STAR_index"),
+        outFileNamePrefix = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{name}_STAR_index/STAR_"),
+        sjdbOverhang = config["sjdbOverhang"]
+
+    singularity:
+        "docker://zavolab/star:2.6.0a"
+
+    threads: 12
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_transcriptome_index.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_transcriptome_index.stderr.log"),
+
+    shell:
+        "(chmod -R 777 {params.output_dir}; \
+        STAR \
+        --runMode genomeGenerate \
+        --genomeSAindexNbases 12 \
+        --genomeChrBinNbits 12 \
+        --genomeDir {params.output_dir} \
+        --genomeFastaFiles {input.tr_fasta} \
+        --runThreadN {threads} \
+        --outFileNamePrefix {params.outFileNamePrefix} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_index_map_to_transcriptome:
+    """
+        Index the transcriptomic alignment.
+    """
+    input:
+        bam = lambda wildcards: expand(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "tr_map",
+                "{name}",
+                "transcriptome.{mates}.bam"
+                ),
+            experiment=wildcards.experiment,
+            name=wildcards.name,
+            mates=get_mates(config[wildcards.experiment][wildcards.name][0])),
+        log = lambda wildcards: expand(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "tr_map",
+                "{name}",
+                "transcriptome.{mates}.Log.final.out"
+                ),
+            experiment=wildcards.experiment,
+            name=wildcards.name,
+            mates=get_mates(config[wildcards.experiment][wildcards.name][0])),
+
+    output:
+        bam = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "tr_map_sort",
+                "{name}",
+                "transcriptome.sorted.bam")
+                ),
+        bai = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "tr_map_sort",
+                "{name}",
+                "transcriptome.sorted.bam.bai")
+                ),
+    
+    params:
+        cluster_log_path = config["cluster_log"],
+        prefix = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "tr_map",
+            "{name}",
+            "transcriptome_temp"),
+
+    singularity:
+        "docker://zavolab/samtools:1.8"
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_index_map_to_transcriptome.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_index_map_to_transcriptome.stderr.log"),
+
+    shell:
+        "(samtools sort \
+        -T {params.prefix} \
+        -@ {threads} \
+        {input.bam} > {output.bam}; \
+        samtools index \
+        {output.bam} {output.bai} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_index_map_to_genome:
+    """
+        Index the genomic alignment.
+    """
+    input:
+        bam = lambda wildcards: expand(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "gn_map",
+                "{name}",
+                "genome.{mates}.bam"),
+            experiment=wildcards.experiment,
+            name=wildcards.name,
+            mates=get_mates(config[wildcards.experiment][wildcards.name][0])),
+        log = lambda wildcards: expand(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "gn_map",
+                "{name}",
+                "genome.{mates}.Log.final.out"),
+            experiment=wildcards.experiment,
+            name=wildcards.name,
+            mates=get_mates(config[wildcards.experiment][wildcards.name][0])),
+
+    output:
+        bam = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "gn_map_sort",
+                "{name}",
+                "genome.sorted.bam")
+                ),
+        bai = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "gn_map_sort",
+                "{name}",
+                "genome.sorted.bam.bai")
+                ),
+    params:
+        cluster_log_path = config["cluster_log"],
+        prefix = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "gn_map_sort",
+            "{name}",
+            "genome_temp"),
+
+    singularity:
+        "docker://zavolab/samtools:1.8"
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_index_map_to_genome.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_index_map_to_genome.stderr.log"),
+
+    shell:
+        "(samtools sort \
+        -T {params.prefix} \
+        -@ {threads} \
+        {input.bam} > {output.bam}; \
+        samtools index \
+        {output.bam} {output.bai} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_assign_to_gn_tr:
+    """
+        Remove reads that map to '-' strand in transcriptome
+    """
+    input:
+        ensembl_csv = os.path.join(
+            config["output_dir"],
+            "ensembl_csv",
+            "ensembl.csv"),
+        chromosome_info = os.path.join(
+            config["output_dir"],
+            "STAR_index",
+            "chrNameLength.txt"),
+        gn_bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "gn_map_sort",
+            "{name}",
+            "genome.sorted.bam"),
+        gn_bai = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "gn_map_sort",
+            "{name}",
+            "genome.sorted.bam.bai"),
+        tr_bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "tr_map_sort",
+            "{name}",
+            "transcriptome.sorted.bam"),
+        tr_bai = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "tr_map_sort",
+            "{name}",
+            "transcriptome.sorted.bam.bai"),
+
+    output:
+        tr_bam = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "{experiment}_{name}_transcriptome.bam")
+                ),
+        gn_bam = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "{experiment}_{name}_genome.bam")
+                ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_assign_read_to_gn_tr.py"),
+        out_folder = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{name}_assign_to_gn_tr_temp"),
+        paired = lambda wildcards:
+                    config[config[wildcards.experiment][wildcards.name][0]]['paired'],
+        sense = lambda wildcards:
+                    config[config[wildcards.experiment][wildcards.name][0]]['sense'],
+
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    threads: 12
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_assign_to_gn_tr.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_assign_to_gn_tr.stderr.log"),
+
+    shell:
+        "(mkdir -p {params.out_folder} ;\
+        python {params.script} \
+        --annotation {input.ensembl_csv}  \
+        --tr_bam {input.tr_bam} \
+        --gn_bam {input.gn_bam} \
+        --out_folder {params.out_folder} \
+        --filtered_tr_bam {output.tr_bam} \
+        --filtered_gn_bam {output.gn_bam} \
+        --paired {params.paired} \
+        --sense {params.sense} \
+        --chromosome_info {input.chromosome_info} \
+        --threads {threads} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_transcriptome_filter_sort:
+    """
+        Sort and index transcriptomic reads after
+        filtering wrong strand reads and
+        those that map better to genome
+    """
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}_transcriptome.bam"),
+
+    output:
+        bam = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "{experiment}_{name}_transcriptome.sorted.bam")
+                ),
+        bai = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "{experiment}_{name}_transcriptome.sorted.bam.bai")
+                ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        prefix = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{name}_sort_samples"),
+
+    singularity:
+        "docker://zavolab/samtools:1.8"
+
+    threads: 8
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_transcriptome_filter_sort.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_transcriptome_filter_sort.stderr.log"),
+
+    shell:
+        "(samtools sort \
+        -T {params.prefix} \
+        -@ {threads} \
+        {input.bam} > {output.bam}; \
+        samtools index \
+        {output.bam} {output.bai} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_genome_filter_sort:
+    """
+        Sort and index genomic reads after filtering wrong strand reads and
+        those that map better to genome
+    """
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}_genome.bam"),
+
+    output:
+        bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}_genome.sorted.bam"),
+        bai =  os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}_genome.sorted.bam.bai"),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        prefix = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{name}_gn_sort_samples"),
+
+    singularity:
+        "docker://zavolab/samtools:1.8"
+
+    threads: 8
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_genome_filter_sort.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_genome_filter_sort.stderr.log"),
+
+    shell:
+        "(samtools sort \
+        -T {params.prefix} \
+        -@ {threads} \
+        {input.bam} > {output.bam}; \
+        samtools index \
+        {output.bam} {output.bai} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_bam_to_bed:
+    """
+        Convert bamfile to bedfile using bedtools
+    """
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}_genome.sorted.bam"),
+
+    output:
+        bed = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "{experiment}_{name}_genome.bed")
+                ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+
+    singularity:
+        "docker://zavolab/bedtools:2.27.0"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_bam_to_bed.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_bam_to_bed.stderr.log"),
+
+    shell:
+        "(bedtools bamtobed \
+        -i {input.bam} > {output.bed}; \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_bam_to_bed_transcriptome:
+    """
+        Convert bamfile to bedfile using bedtools
+    """
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}_transcriptome.sorted.bam"),
+
+    output:
+        bed = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "{experiment}_{name}_transcriptome.bed")
+                ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+
+    singularity:
+        "docker://zavolab/bedtools:2.27.0"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_bam_to_bed_transcriptome.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_bam_to_bed_transcriptome.stderr.log"),
+
+    shell:
+        "(bedtools bamtobed \
+        -i {input.bam} > {output.bed}; \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_genome_coverage:
+    """
+        Count number of fragments in windows 
+        of specific size at the genome.
+        Custom script.
+    """
+    input:
+        chromosome_info = os.path.join(
+            config["output_dir"],
+            "STAR_index",
+            "chrNameLength.txt"),
+        fg_bed = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_replicates_genome.bed"),
+        bg_bed = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_smis_genome.bed"),
+        fg_frequencies = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_replicates.frequencies.csv"),
+        bg_frequencies = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_smis.frequencies.csv"),
+
+    output:
+        coverage = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "{experiment}.genome_coverage.bed")
+                ),
+    
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "sliding_windows.py"),
+        output_dir = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}"),
+        window_f = lambda wildcards:
+            config[wildcards.experiment]["window_f"],
+        window_b = lambda wildcards:
+            config[wildcards.experiment]["window_b"],
+        step_size = lambda wildcards:
+            config[wildcards.experiment]["step_size"],
+        sense_f = lambda wildcards:
+            config[config[wildcards.experiment]["replicates"][0]]['sense'],
+        sense_b = lambda wildcards:
+            config[config[wildcards.experiment]["smis"][0]]['sense'],
+        paired_f = lambda wildcards:
+            config[config[wildcards.experiment]["replicates"][0]]['paired'],
+        paired_b = lambda wildcards:
+            config[config[wildcards.experiment]["smis"][0]]['paired'],
+        prefix = "{experiment}.genome_coverage",
+        background_type = lambda wildcards:
+            config[wildcards.experiment]["background_type"],
+
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    threads: 8
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "genome_coverage.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "genome_coverage.stderr.log"),
+
+    shell:
+        "(python {params.script} \
+        --bed_foreground {input.fg_bed} \
+        --bed_background {input.bg_bed} \
+        --foreground_frequencies {input.fg_frequencies} \
+        --background_frequencies {input.bg_frequencies} \
+        --out {params.output_dir} \
+        --prefix {params.prefix} \
+        --chromosomes {input.chromosome_info} \
+        --sense_f {params.sense_f} \
+        --sense_b {params.sense_b} \
+        --paired_f {params.paired_f} \
+        --paired_b {params.paired_b} \
+        --window_f {params.window_f} \
+        --window_b {params.window_b} \
+        --step_size {params.step_size} \
+        --data_type genome \
+        --background {params.background_type} \
+        --cutoff 1 \
+        --threads {threads}; \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_transcriptome_coverage:
+    """
+        Count number of fragments in windows 
+        of specific size at
+        the customised transcriptome.
+        Custom script.
+    """
+    input:
+        chromosome_info = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "replicates_STAR_index",
+            "chrNameLength.txt"),
+        fg_bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_replicates_transcriptome.sorted.bam"),
+        fg_bai = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_replicates_transcriptome.sorted.bam.bai"),
+        bg_bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_smis_transcriptome.sorted.bam"),
+        bg_bai = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_smis_transcriptome.sorted.bam.bai"),
+        fg_frequencies = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_replicates.frequencies.csv"),
+        bg_frequencies = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_smis.frequencies.csv"),
+
+    output:
+        coverage = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "{experiment}.transcriptome_coverage.bed")
+                ),
+            
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_bam_count_sliding_windows.py"),
+        output_dir = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}"),
+        window_f = lambda wildcards:
+            config[wildcards.experiment]["window_f"],
+        window_b = lambda wildcards:
+            config[wildcards.experiment]["window_b"],
+        step_size = lambda wildcards:
+            config[wildcards.experiment]["step_size"],
+        sense_f = lambda wildcards:
+            config[config[wildcards.experiment]["replicates"][0]]['sense'],
+        sense_b = lambda wildcards:
+            config[config[wildcards.experiment]["smis"][0]]['sense'],
+        paired_f = lambda wildcards:
+            config[config[wildcards.experiment]["replicates"][0]]['paired'],
+        paired_b = lambda wildcards:
+            config[config[wildcards.experiment]["smis"][0]]['paired'],
+        prefix = "{experiment}.transcriptome_coverage",
+        background_type = lambda wildcards:
+            config[wildcards.experiment]["background_type"]
+
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    threads: 8
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "transcriptome_coverage.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "transcriptome_coverage.stderr.log"),
+
+    shell:
+        "(python {params.script} \
+        --bam_foreground {input.fg_bam} \
+        --bam_background {input.bg_bam} \
+        --bam_foreground_frequencies {input.fg_frequencies} \
+        --bam_background_frequencies {input.bg_frequencies} \
+        --out {params.output_dir} \
+        --prefix {params.prefix} \
+        --chromosomes {input.chromosome_info} \
+        --sense_f {params.sense_f} \
+        --sense_b {params.sense_b} \
+        --paired_f {params.paired_f} \
+        --paired_b {params.paired_b} \
+        --window_f {params.window_f} \
+        --window_b {params.window_b} \
+        --step_size {params.step_size} \
+        --data_type transcriptome \
+        --background {params.background_type} \
+        --cutoff 1 \
+        --threads {threads}; \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_merge_windows:
+    """
+        Merge windows from foreground/ background.
+        Custom script.
+    """
+    input:
+        tr_coverage = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}.transcriptome_coverage.bed"),
+        gn_coverage = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}.genome_coverage.bed"),
+
+    output:
+        merged_coverage = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "{experiment}.total_coverage.bed")
+            ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_merge_fg_bg.py"),
+
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "merge_windows.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "merge_windows.stderr.log"),
+
+    shell:
+        "(python {params.script} \
+        --transcriptome {input.tr_coverage} \
+        --genome {input.gn_coverage} \
+        --out {output.merged_coverage} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_enriched_regions:
+    """
+        Obtain enriched regions for binding based on EM using
+        bg for noise estimation. Modified and adapted from:
+        Berger, Severin M., et al. "Crunch: Integrated processing and modeling of
+        ChIP-seq data in terms of regulatory motifs."
+        Genome research (2019): gr-239319
+    Custom script.
+   """
+    input:
+        coverage = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}.total_coverage.bed"),
+
+    output:
+        enriched_regions = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_enriched_regions.csv"),
+        parameters = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_parameters.csv"),
+        all_regions = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_total_regions.csv"),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_find_enriched_regions.py"),
+        plot_dir = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "enriched_regions_plots"),
+        fdr_cutoff = config['FDR']
+
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "enriched_regions.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "enriched_regions.stderr.log"),
+
+    shell:
+        "(mkdir -p {params.plot_dir}; \
+        python {params.script} \
+        --windows_table {input.coverage} \
+        --out {output.enriched_regions} \
+        --all_regions {output.all_regions} \
+        --out_parameters {output.parameters} \
+        --out_folder {params.plot_dir} \
+        --fdr_cutoff {params.fdr_cutoff} \
+         ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_fit_peaks:
+    """
+        Fit individual binding events in the enriched regions.
+        Modifed and adapted from:
+        Berger, Severin M., et al. "Crunch: Integrated processing and
+        modeling of ChIP-seq data in terms of regulatory motifs."
+        Genome research (2019): gr-239319.
+        Custom script.
+    """
+    input:
+        fg_gn_bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_replicates_genome.sorted.bam"),
+        fg_gn_bai = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_replicates_genome.sorted.bam.bai"),
+        bg_gn_bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_smis_genome.sorted.bam"),
+        bg_gn_bai = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_smis_genome.sorted.bam.bai"),
+        fg_tr_bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_replicates_transcriptome.sorted.bam"),
+        fg_tr_bai = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_replicates_transcriptome.sorted.bam.bai"),
+        bg_tr_bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_smis_transcriptome.sorted.bam"),
+        bg_tr_bai = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_smis_transcriptome.sorted.bam.bai"),
+        fg_frequencies = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_replicates.frequencies.csv"),
+        bg_frequencies = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_smis.frequencies.csv"),
+        enriched_regions = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_enriched_regions.csv"),
+        parameters = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_parameters.csv"),
+        chromosomes_g = os.path.join(
+            config["output_dir"],
+            "STAR_index",
+            "chrNameLength.txt"),
+        chromosomes_t = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "replicates_STAR_index",
+            "chrNameLength.txt"),
+
+    output:
+        peaks = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_total_peaks.csv"),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_define_peaks.py"),
+        plot_dir = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "fit_peaks_plots"),
+        fragment_size = config["fragment_size"],
+        window_size = lambda wildcards:
+            config[wildcards.experiment]['window_f'],
+        paired_f = lambda wildcards:
+            config[config[wildcards.experiment]["replicates"][0]]['paired'],
+        paired_b = lambda wildcards:
+            config[config[wildcards.experiment]["smis"][0]]['paired'],
+        sense_f = lambda wildcards:
+            config[config[wildcards.experiment]["replicates"][0]]['sense'],
+        sense_b = lambda wildcards:
+            config[config[wildcards.experiment]["smis"][0]]['sense'],
+
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    threads: 12
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "fit_peaks.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "fit_peaks.stderr.log"),
+
+    shell:
+        "(mkdir -p {params.plot_dir}; \
+        python {params.script} \
+        --enriched_regions {input.enriched_regions} \
+        --parameters {input.parameters} \
+        --fragment_size {params.fragment_size} \
+        --window_size {params.window_size} \
+        --chromosomes_g {input.chromosomes_g} \
+        --bam_foreground_g {input.fg_gn_bam} \
+        --bam_background_g {input.bg_gn_bam} \
+        --chromosomes_t {input.chromosomes_t} \
+        --bam_foreground_t {input.fg_tr_bam} \
+        --bam_background_t {input.bg_tr_bam} \
+        --bam_fg_fq {input.fg_frequencies} \
+        --bam_bg_fq {input.bg_frequencies} \
+        --paired_f {params.paired_f} \
+        --paired_b {params.paired_b} \
+        --sense_f {params.sense_f} \
+        --sense_b {params.sense_b} \
+        --out {output.peaks} \
+        --out_folder {params.plot_dir} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_peaks_split_sets_crosslink:
+    """
+        Split the significant peaks into training and test
+        for subsequent motif analysis
+    """
+    input:
+        genome_fasta = config["genome"],
+        transcriptome_fasta = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_replicates.transcriptome.fa"),
+        all_regions = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_total_regions.csv"),
+        reads = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_total_peaks.csv"),
+        chromosomes_g = os.path.join(
+            config["output_dir"],
+            "STAR_index",
+            "chrNameLength.txt"),
+        chromosomes_t = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "replicates_STAR_index",
+            "chrNameLength.txt"),
+
+    output:
+        training = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "motif_analysis_crossvalidation",
+                "{run}",
+                "crosslink",
+                "training.fasta")
+            ),
+        test = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "motif_analysis_crossvalidation",
+                "{run}",
+                "crosslink",
+                "test.fasta")
+                ),
+        training_bg = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "motif_analysis_crossvalidation",
+                "{run}",
+                "crosslink",
+                "training_bg.fasta")
+                ),
+        test_bg = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "motif_analysis_crossvalidation",
+                "{run}",
+                "crosslink",
+                "test_bg.fasta")
+                ),
+        dirtemp = temp(
+            directory(
+                os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "motif_analysis_crossvalidation",
+                "{run}",
+                "crosslink",
+                "tmpdir")
+            )
+        ),
+
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_peaks_split_sets.py"),
+        output_dir = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "motif_analysis_crossvalidation",
+            "{run}",
+            "crosslink"),
+        genome_tag = config["genome_tag"],
+        peak_size = 50
+
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "motif_analysis_crossvalidation",
+            "{run}_peaks_split_sets_crosslink.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "motif_analysis_crossvalidation",
+            "{run}_peaks_split_sets_crosslink.stderr.log"),
+
+    shell:
+        "(mkdir -p {output.dirtemp}; \
+        python {params.script} \
+        --peaks_file {input.reads} \
+        --genome_fasta {input.genome_fasta} \
+        --all_regions {input.all_regions} \
+        --genome_tag {params.genome_tag} \
+        --tempdir {output.dirtemp} \
+        --transcriptome_fasta {input.transcriptome_fasta} \
+        --chromosomes_g {input.chromosomes_g} \
+        --chromosomes_t {input.chromosomes_t} \
+        --crosslink_type 'crosslink' \
+        --peak_size {params.peak_size} \
+        --out_folder {params.output_dir} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_peaks_split_sets:
+    """
+        Split the significant peaks into training and test
+        for subsequent motif analysis
+    """
+    input:
+        genome_fasta = config["genome"],
+        transcriptome_fasta = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_replicates.transcriptome.fa"),
+        all_regions = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_total_regions.csv"),
+        reads = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_total_peaks.csv"),
+        chromosomes_g = os.path.join(
+            config["output_dir"],
+            "STAR_index",
+            "chrNameLength.txt"),
+        chromosomes_t = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "replicates_STAR_index",
+            "chrNameLength.txt"),
+
+    output:
+        training = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "motif_analysis_crossvalidation",
+                "{run}",
+                "peak_center",
+                "training.fasta")
+                ),
+        test = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "motif_analysis_crossvalidation",
+                "{run}",
+                "peak_center",
+                "test.fasta")
+                ),
+        training_bg = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "motif_analysis_crossvalidation",
+                "{run}",
+                "peak_center",
+                "training_bg.fasta")
+                ),
+        test_bg = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "motif_analysis_crossvalidation",
+                "{run}",
+                "peak_center",
+                "test_bg.fasta")
+                ),
+        dirtemp = temp(
+            directory(
+                os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "motif_analysis_crossvalidation",
+                "{run}",
+                "peak_center",
+                "tmpdir")
+            )
+        ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_peaks_split_sets.py"),
+        output_dir = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "motif_analysis_crossvalidation",
+            "{run}",
+            "peak_center"),
+        genome_tag = config["genome_tag"],
+        peak_size = 50
+
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "motif_analysis_crossvalidation",
+            "{run}_peaks_split_sets_peak_center.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "motif_analysis_crossvalidation",
+            "{run}_peaks_split_sets_peak_center.stderr.log"),
+
+    shell:
+        "(mkdir -p {output.dirtemp}; \
+        python {params.script} \
+        --peaks_file {input.reads} \
+        --genome_fasta {input.genome_fasta} \
+        --all_regions {input.all_regions} \
+        --tempdir {output.dirtemp} \
+        --genome_tag {params.genome_tag} \
+        --transcriptome_fasta {input.transcriptome_fasta} \
+        --chromosomes_g {input.chromosomes_g} \
+        --chromosomes_t {input.chromosomes_t} \
+        --crosslink_type 'peak_center' \
+        --out_folder {params.output_dir} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+rule TR_peaks_split_sets_crosslink_all_motifs:
+    """
+        Get all peaks crosslink
+    """
+    input:
+        transcriptome_fasta = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_replicates.transcriptome.fa"),
+        genome_fasta = config["genome"],
+        all_regions = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_total_regions.csv"
+            ),
+        reads = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_total_peaks.csv"
+            ),
+        chromosomes_g = os.path.join(
+            config["output_dir"],
+            "STAR_index",
+            "chrNameLength.txt"
+            ),
+        chromosomes_t = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "replicates_STAR_index",
+            "chrNameLength.txt"),
+
+    output:
+        test = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "motif_analysis_final",
+                "crosslink",
+                "test.fasta")
+            ),
+        test_bg = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "motif_analysis_final",
+                "crosslink",
+                "test_bg.fasta")
+            ),
+        dirtemp = temp(
+            directory(
+                os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "motif_analysis_final",
+                "crosslink",
+                "tmpdir")
+            )
+        ),
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "total_enrichment_peakstofasta.py"
+            ),
+        output_dir = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "motif_analysis_final",
+            "crosslink",
+            ),
+        genome_tag = config["genome_tag"],
+        peak_size = 50,
+        
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "motif_analysis_final",
+            "peaks_split_sets__all_motifs_crosslink.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "motif_analysis_final",
+            "peaks_split_sets__all_motifs_crosslink.stderr.log"
+            ),
+
+    shell:
+        "(mkdir -p {params.output_dir}; \
+        mkdir -p {output.dirtemp}; \
+        python {params.script} \
+        --peaks_file {input.reads} \
+        --genome_fasta {input.genome_fasta} \
+        --all_regions {input.all_regions} \
+        --genome_tag {params.genome_tag} \
+        --tempdir {output.dirtemp} \
+        --transcriptome_fasta {input.transcriptome_fasta} \
+        --chromosomes_g {input.chromosomes_g} \
+        --chromosomes_t {input.chromosomes_t} \
+        --peak_size {params.peak_size} \
+        --out_folder {params.output_dir} \
+        --crosslink_type 'crosslink' \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_peaks_split_sets_all_motifs:
+    """
+        get all peaks crosslink
+    """
+    input:
+        transcriptome_fasta = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_replicates.transcriptome.fa"),
+        genome_fasta = config["genome"],
+        all_regions = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_total_regions.csv"
+            ),
+        reads = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_total_peaks.csv"
+            ),
+        chromosomes_g = os.path.join(
+            config["output_dir"],
+            "STAR_index",
+            "chrNameLength.txt"
+            ),
+        chromosomes_t = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "replicates_STAR_index",
+            "chrNameLength.txt"),
+
+
+    output:
+        test = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "motif_analysis_final",
+                "peak_center",
+                "test.fasta")
+            ),
+        test_bg = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "motif_analysis_final",
+                "peak_center",
+                "test_bg.fasta")
+            ),
+        dirtemp = temp(
+            directory(
+                os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "motif_analysis_final",
+                "peak_center",
+                "tmpdir")
+            )
+        ),
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "total_enrichment_peakstofasta.py"
+            ),
+        output_dir = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "motif_analysis_final",
+            "peak_center",
+            ),
+        genome_tag = config["genome_tag"],
+        peak_size = 50
+        
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "motif_analysis_final",
+            "peaks_split_sets__all_motifs_standard.stdout.log"
+            ),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "motif_analysis_final",
+            "peaks_split_sets__all_motifs_standard.stderr.log"
+            ),
+
+    shell:
+        "(mkdir -p {params.output_dir}; \
+        mkdir -p {output.dirtemp}; \
+        python {params.script} \
+        --peaks_file {input.reads} \
+        --genome_fasta {input.genome_fasta} \
+        --all_regions {input.all_regions} \
+        --genome_tag {params.genome_tag} \
+        --tempdir {output.dirtemp} \
+        --transcriptome_fasta {input.transcriptome_fasta} \
+        --chromosomes_g {input.chromosomes_g} \
+        --chromosomes_t {input.chromosomes_t} \
+        --peak_size {params.peak_size} \
+        --out_folder {params.output_dir} \
+        --crosslink_type 'peak_center' \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
diff --git a/workflow/rules/rcrunch_transcriptomic_pe.smk b/workflow/rules/rcrunch_transcriptomic_pe.smk
index 92fedef..9208e35 100644
--- a/workflow/rules/rcrunch_transcriptomic_pe.smk
+++ b/workflow/rules/rcrunch_transcriptomic_pe.smk
@@ -9,7 +9,7 @@
 # ________________________________________________________________________________
 import os
 
-rule TR_initial_map_to_genome:
+rule TR_initial_map_to_genome_pe:
     """
         Initial mapping to the genome using STAR.
     """
@@ -24,18 +24,18 @@ rule TR_initial_map_to_genome:
                 os.path.join(
                     config["output_dir"],
                     "cutadapt",
-                    "{sample}_mate1.cutadapt_{format}.fastq.gz"),
+                    "{sample}_mate1.pe.cutadapt_{format}.fastq.gz"),
                 sample=wildcards.sample,
-                format=config[wildcards.sample]['format']
+                format=config[wildcards.sample]['format'],
                 ),
         reads2 = lambda wildcards:
             expand(
                 os.path.join(
                     config["output_dir"],
                     "cutadapt",
-                    "{sample}_mate2.cutadapt_{format}.fastq.gz"),
+                    "{sample}_mate2.pe.cutadapt_{format}.fastq.gz"),
                 sample=wildcards.sample,
-                format=config[wildcards.sample]['format']
+                format=config[wildcards.sample]['format'],
                 ),
 
     output:
@@ -45,25 +45,14 @@ rule TR_initial_map_to_genome:
                 "TR",
                 "initial_map",
                 "{sample}",
-                "{sample}.bam")
-                ),
+                "{sample}.pe.bam")),
         logfile = temp(
             os.path.join(
                 config["output_dir"],
                 "TR",
                 "initial_map",
                 "{sample}",
-                "{sample}Log.final.out")
-                ),
-        outdir = temp(
-            directory(
-                os.path.join(
-                    config["output_dir"],
-                    "TR",
-                    "initial_map",
-                    "{sample}")
-                    )
-                ),
+                "{sample}.pe.Log.final.out")),
 
     params:
         cluster_log_path = config["cluster_log"],
@@ -76,9 +65,11 @@ rule TR_initial_map_to_genome:
             "TR",
             "initial_map",
             "{sample}",
-            "{sample}"),
+            "{sample}.pe."),
         multimappers = config["multimappers"]
 
+    shadow: "full"
+
     singularity:
         "docker://zavolab/star:2.6.0a"
 
@@ -89,12 +80,12 @@ rule TR_initial_map_to_genome:
             config["local_log"],
             "TR",
             "preprocessing",
-            "{sample}_initial_map_to_genome.stdout.log"),
+            "{sample}_initial_map_to_genome.pe.stdout.log"),
         stderr = os.path.join(
             config["local_log"],
             "TR",
             "preprocessing",
-            "{sample}_initial_map_to_genome.stderr.log"),
+            "{sample}_initial_map_to_genome.pe.stderr.log"),
 
     shell:
         "(STAR \
@@ -120,178 +111,163 @@ rule TR_initial_map_to_genome:
         ) 1> {log.stdout} 2> {log.stderr}"
 
 
-rule TR_initial_index:
+
+
+rule TR_umi_collapse_pe:
     """
-        Index genome bamfile using samtools.
+        Alternative duplicate removal when UMIs are present using umitools.
     """
     input:
-        outdir = os.path.join(
+        bam = os.path.join(
             config["output_dir"],
             "TR",
-            "initial_map",
-            "{sample}"),
-        bam = os.path.join(
+            "remove_ncRNAs",
+            "{sample}.filtered.bam"),
+        bai = os.path.join(
             config["output_dir"],
             "TR",
-            "initial_map",
-            "{sample}",
-            "{sample}.bam"),
+            "remove_ncRNAs",
+            "{sample}.filtered.bam.bai"),
 
     output:
-        bam = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "initial_map",
-                "{sample}.sorted.bam")
-                ),
-        bai = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "initial_map",
-                "{sample}.sorted.bam.bai")
-                ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        prefix = os.path.join(
+        bam = temp(os.path.join(
             config["output_dir"],
             "TR",
-            "initial_map",
-            "{sample}_temp"),
+            "remove_duplicates",
+            "{sample}.umis.pe.bam")
+            ),
 
-    singularity:
-        "docker://zavolab/samtools:1.8"
+    params:
+        cluster_log_path = config["cluster_log"]
 
-    threads: 1
+    singularity:
+        "docker://zavolab/umi-tools:0.5.4"
 
     log:
-       stdout = os.path.join(
+        stdout = os.path.join(
             config["local_log"],
             "TR",
             "preprocessing",
-            "{sample}_initial_index.stdout.log"),
+            "{sample}_umi_collapse.pe.stdout.log"),
         stderr = os.path.join(
             config["local_log"],
             "TR",
             "preprocessing",
-            "{sample}_initial_index.stderr.log"),
+            "{sample}_umi_collapse.pe.stderr.log"),
 
     shell:
-        "(samtools sort \
-        -T {params.prefix} \
-        -@ {threads} \
-        {input.bam} > {output.bam}; \
-        samtools index \
-        {output.bam} {output.bai}) \
-         1> {log.stdout} 2> {log.stderr}"
+        "(umi_tools \
+        dedup \
+        -I {input.bam} \
+        --paired \
+        -S {output.bam} \
+        ) 1> {log.stdout} 2> {log.stderr}"
 
 
-rule TR_flag_ncRNA_reads:
+rule TR_bam_to_fastq_pe:
     """
-        Make txt file of reads mapping to ncRNAs.
-        If a multimapper maps to a ncRNA, all reads are excluded.
-        Custom script.
+        Convert initially mapped reads back to fastq files to do mappings at the
+        transcriptome and genome level, using bedtools.
     """
     input:
         bam = os.path.join(
             config["output_dir"],
             "TR",
-            "initial_map",
-            "{sample}.sorted.bam"),
-        bai = os.path.join(
+            "{experiment}",
+            "{experiment}_{name}_merge_samples.sortedname.bam"),
+        bam_coord = os.path.join(
             config["output_dir"],
             "TR",
-            "initial_map",
-            "{sample}.sorted.bam.bai"),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "mk_filter_ncRNAs.py"),
-        ncRNAs = config['ncRNAs'],
-        ncRNA_biotypes = expand(config['ncRNA_biotypes']),
-        paired = lambda wildcards:
-            config[wildcards.sample]['paired'],
-        sense = lambda wildcards:
-            config[wildcards.sample]['sense'],
+            "{experiment}",
+            "{experiment}_{name}_merge_samples.sorted.bam"),
 
     output:
-        outfile = temp(
+        fastq_mate1 = temp(
             os.path.join(
                 config["output_dir"],
                 "TR",
-                "remove_ncRNAs",
-                "{sample}.ncrna.txt")
+                "{experiment}",
+                "{experiment}_{name}.mate1.pe.unfiltered.fastq")
                 ),
-        flag = temp(
+        fastq_mate2 = temp(
             os.path.join(
                 config["output_dir"],
                 "TR",
-                "remove_ncRNAs",
-                "{sample}_done")
+                "{experiment}",
+                "{experiment}_{name}.mate2.pe.unfiltered.fastq")
                 ),
 
+    params:
+        cluster_log_path = config["cluster_log"],
+
     singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
+        "docker://zavolab/bedtools:2.27.0"
+
+    threads: 1
 
     log:
         stdout = os.path.join(
             config["local_log"],
             "TR",
-            "preprocessing",
-            "{sample}_flag_ncRNA_reads.stdout.log"),
+            "{experiment}",
+            "{name}_bam_to_fastq.pe.stdout.log"),
         stderr = os.path.join(
             config["local_log"],
             "TR",
-            "preprocessing",
-            "{sample}_flag_ncRNA_reads.stderr.log"),
+            "{experiment}",
+            "{name}_bam_to_fastq.pe.stderr.log"),
 
     shell:
-        "(python {params.script} \
-        --bamfile {input.bam} \
-        --RNA_central {params.ncRNAs} \
-        --outfile {output.outfile} \
-        --paired {params.paired} \
-        --sense {params.sense} \
-        --ncRNAs {params.ncRNA_biotypes} \
-        --flag {output.flag} \
+        "(bedtools bamtofastq \
+        -i {input.bam} \
+        -fq {output.fastq_mate1} \
+        -fq2 {output.fastq_mate2}; \
         ) 1> {log.stdout} 2> {log.stderr}"
 
 
-rule TR_remove_ncRNA_reads:
+rule TR_bam_to_fastq_filtered_pe:
     """
-        Remove the reads mapping to rRNAs using picard.
+        Remove duplicate reads after converting bam to fastq
+        (when there are multimappers the reads will appear
+        multiple times in the fastq).
     """
     input:
-        bam = os.path.join(
+        mate1 = os.path.join(
             config["output_dir"],
             "TR",
-            "initial_map",
-            "{sample}.sorted.bam"),
-        read_names = os.path.join(
+            "{experiment}",
+            "{experiment}_{name}.mate1.pe.unfiltered.fastq"),
+        mate2 = os.path.join(
             config["output_dir"],
             "TR",
-            "remove_ncRNAs",
-            "{sample}.ncrna.txt"),
+            "{experiment}",
+            "{experiment}_{name}.mate2.pe.unfiltered.fastq"),
 
     output:
-        reads = temp(
+        mate1 = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "{experiment}_{name}.mate1.pe.fastq.gz")
+                ),
+        mate2 = temp(
             os.path.join(
                 config["output_dir"],
                 "TR",
-                "remove_ncRNAs",
-                "{sample}.filtered.bam")
+                "{experiment}",
+                "{experiment}_{name}.mate2.pe.fastq.gz")
                 ),
 
     params:
         cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_fastq_keep_unique_reads.py"),
 
     singularity:
-        "docker://zavolab/picard:2.18.9"
+        "docker://zavolab/rcrunch_python:1.0.5"
 
     threads: 1
 
@@ -299,2600 +275,284 @@ rule TR_remove_ncRNA_reads:
         stdout = os.path.join(
             config["local_log"],
             "TR",
-            "preprocessing",
-            "{sample}_remove_ncRNA_reads.stdout.log"),
+            "{experiment}",
+            "{name}_bam_to_fastq_filtered.pe.stdout.log"),
         stderr = os.path.join(
             config["local_log"],
             "TR",
-            "preprocessing",
-            "{sample}_remove_ncRNA_reads.stderr.log"),
+            "{experiment}",
+            "{name}_bam_to_fastq_filtered.pe.stderr.log"),
 
     shell:
-        "(java -jar /usr/local/bin/picard.jar FilterSamReads \
-        I={input.bam} \
-        O={output.reads} \
-        READ_LIST_FILE={input.read_names} \
-        FILTER=excludeReadList) \
-        1> {log.stdout} 2> {log.stderr}"
+        "(python {params.script} \
+        --fastq_in {input.mate1} \
+        --fastq_out {output.mate1} \
+        --fastq_in2 {input.mate2} \
+        --fastq_out2 {output.mate2}; \
+        ) 1> {log.stdout} 2> {log.stderr}"
 
 
-rule TR_index_ncRNA_rm:
+rule TR_tr_quantification_pe:
     """
-        Index bamfile without the rRNA-mapped reads using samtools.
+        Quantification of transcript isoforms using Salmon.
     """
     input:
-        bam = os.path.join(
+        transcriptome = os.path.join(
+            config["output_dir"],
+            "TR_salmon_index"),
+        mate1 = os.path.join(
             config["output_dir"],
             "TR",
-            "remove_ncRNAs",
-            "{sample}.filtered.bam"),
+            "{experiment}",
+            "{experiment}_{name}.mate1.pe.fastq.gz"),
+        mate2 = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}.mate2.pe.fastq.gz"),
 
     output:
-        bai = temp(
+        quantification = temp(
             os.path.join(
                 config["output_dir"],
                 "TR",
-                "remove_ncRNAs",
-                "{sample}.filtered.bam.bai")
+                "{experiment}",
+                "tr_quantification_pe_{name}",
+                "quant.sf")
                 ),
-
     params:
         cluster_log_path = config["cluster_log"],
+        library = lambda wildcards: get_library_type(
+            config[config[wildcards.experiment][wildcards.name][0]]['mate2'],
+            config[config[wildcards.experiment][wildcards.name][0]]['sense']),
+        outdir = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "tr_quantification_pe_{name}"),
+
+    shadow: "full"
 
     singularity:
-        "docker://zavolab/samtools:1.8"
+        "docker://zavolab/salmon:0.11.0"
 
-    threads: 1
+    threads: 8
 
     log:
         stdout = os.path.join(
             config["local_log"],
             "TR",
-            "preprocessing",
-            "{sample}_index_ncRNA_rm.stdout.log"),
+            "{experiment}",
+            "{name}_tr_quantification.pe.stdout.log"),
         stderr = os.path.join(
             config["local_log"],
             "TR",
-            "preprocessing",
-            "{sample}_index_ncRNA_rm.stderr.log"),
+            "{experiment}",
+            "{name}_tr_quantification.pe.stderr.log"),
 
     shell:
-        "(samtools index \
-        {input.bam} {output.bai} \
+        "(salmon quant \
+        -i {input.transcriptome} \
+        -l {params.library} \
+        -1 {input.mate1} \
+        -2 {input.mate2} \
+        -o {params.outdir} \
+        -p {threads} \
         ) 1> {log.stdout} 2> {log.stderr}"
 
 
-rule TR_flag_duplicates:
+rule TR_map_to_transcriptome_pe:
     """
-        Flag duplicate reads using the STAR flags.
+        Map to the transcriptome treating each transcript
+        as a "chromosome", using STAR.
     """
     input:
-        bam = os.path.join(
+        transcriptome = os.path.join(
             config["output_dir"],
             "TR",
-            "remove_ncRNAs",
-            "{sample}.filtered.bam"),
-        bai = os.path.join(
+            "{experiment}",
+            "replicates_STAR_index",
+            "chrNameLength.txt"),
+        mate1 = os.path.join(
             config["output_dir"],
             "TR",
-            "remove_ncRNAs",
-            "{sample}.filtered.bam.bai"),
+            "{experiment}",
+            "{experiment}_{name}.mate1.pe.fastq.gz"),
+        mate2 = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}.mate2.pe.fastq.gz"),
 
     output:
         bam = temp(
             os.path.join(
                 config["output_dir"],
                 "TR",
-                "flag_duplicates",
-                "{sample}.duplicates.Processed.out.bam")
+                "{experiment}",
+                "tr_map",
+                "{name}",
+                "transcriptome.pe.bam")
+                ),
+        logfile = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "tr_map",
+                "{name}",
+                "transcriptome.pe.Log.final.out")
                 ),
-
     params:
         cluster_log_path = config["cluster_log"],
+        transcriptome = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "replicates_STAR_index"),
+        experiment_id = "{experiment}_{name}_transcriptome",
+        fragment_size = config["fragment_size"],
         outFileNamePrefix = os.path.join(
             config["output_dir"],
             "TR",
-            "flag_duplicates",
-            "{sample}.duplicates."),
-
+            "{experiment}",
+            "tr_map",
+            "{name}",
+            "transcriptome.pe."),
+        multimappers = config['multimappers'] + 10,
+    
+    shadow: "full"
+    
     singularity:
         "docker://zavolab/star:2.6.0a"
 
+    threads: 12
+
     log:
         stdout = os.path.join(
             config["local_log"],
             "TR",
-            "preprocessing",
-            "{sample}_flag_duplicates.stdout.log"),
+            "{experiment}",
+            "{name}_map_to_transcriptome.pe.stdout.log"),
         stderr = os.path.join(
             config["local_log"],
             "TR",
-            "preprocessing",
-            "{sample}_flag_duplicates.stderr.log"),
+            "{experiment}",
+            "{name}_map_to_transcriptome.pe.stderr.log"),
 
     shell:
         "(STAR \
-        --inputBAMfile {input.bam} \
-        --bamRemoveDuplicatesType UniqueIdenticalNotMulti \
-        --runMode inputAlignmentsFromBAM \
+        --runMode alignReads \
+        --runThreadN {threads} \
+        --genomeDir {params.transcriptome} \
+        --readFilesIn {input.mate1} {input.mate2} \
+        --readFilesCommand zcat \
+        --outSAMunmapped None  \
+        --outFilterMultimapNmax {params.multimappers} \
+        --outFilterMultimapScoreRange 1 \
         --outFileNamePrefix {params.outFileNamePrefix} \
+        --outFilterScoreMinOverLread 0.9 \
+        --outFilterMatchNminOverLread 0.9 \
+        --outFilterMismatchNoverLmax 0.05 \
+        --outSAMattributes All \
+        --outStd BAM_Unsorted \
+        --outSAMtype BAM Unsorted \
+        --outFilterType BySJout \
+        --outReadsUnmapped None \
+        --outSAMattrRGline ID:rcrunch SM:{params.experiment_id} \
+        --alignIntronMax 1 \
+        --alignEndsType EndToEnd > {output.bam}; \
         ) 1> {log.stdout} 2> {log.stderr}"
 
 
-rule TR_remove_duplicates:
+
+rule TR_map_to_genome_pe:
     """
-        Remove reads flagged as duplicates by STAR.
-        Custom script.
+        Initial mapping to the genome using STAR.
     """
     input:
-        bam = os.path.join(
+        genome = os.path.join(
+            config["output_dir"],
+            "STAR_index",
+            "chrNameLength.txt"),
+        mate1 = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}.mate1.pe.fastq.gz"),
+        mate2 = os.path.join(
             config["output_dir"],
             "TR",
-            "flag_duplicates",
-            "{sample}.duplicates.Processed.out.bam"),
+            "{experiment}",
+            "{experiment}_{name}.mate2.pe.fastq.gz"),
 
     output:
-        bam = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "remove_duplicates",
-                "{sample}.duplicates.bam")
-                ),
-
+        bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "gn_map",
+            "{name}",
+            "genome.pe.bam"),
+        logfile = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "gn_map",
+            "{name}",
+            "genome.pe.Log.final.out"),
     params:
         cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "mk_filter_duplicates.py"),
-
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "preprocessing",
-            "{sample}_remove_duplicates.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "preprocessing",
-            "{sample}_remove_duplicates.stderr.log"),
-
-    shell:
-        "(python {params.script} \
-        --bamfile {input.bam} \
-        --outfile {output.bam} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_no_duplicate_removal:
-    """
-        No removal of duplicates
-    """
-    input:
-        bam = os.path.join(
-            config["output_dir"],
-            "TR",
-            "remove_ncRNAs",
-            "{sample}.filtered.bam"),
-        bai = os.path.join(
-            config["output_dir"],
-            "TR",
-            "remove_ncRNAs",
-            "{sample}.filtered.bam.bai"),
-
-    output:
-        bam = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "remove_duplicates",
-                "{sample}.with_duplicates.bam")
-                ),
-        bai = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "remove_duplicates",
-                "{sample}.with_duplicates.bam.bai")
-                ),
-    
-    params:
-        cluster_log_path = config["cluster_log"],
-
-    singularity:
-        "docker://bash:5.0.16"
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "preprocessing",
-            "{sample}_no_duplicate_removal.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "preprocessing",
-            "{sample}_no_duplicate_removal.stderr.log"),
-
-    shell:
-        "(cp {input.bam} {output.bam}; \
-        cp {input.bai} {output.bai}; \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_umi_collapse:
-    """
-        Alternative duplicate removal when UMIs are present using umitools.
-    """
-    input:
-        bam = os.path.join(
-            config["output_dir"],
-            "TR",
-            "remove_ncRNAs",
-            "{sample}.filtered.bam"),
-        bai = os.path.join(
-            config["output_dir"],
-            "TR",
-            "remove_ncRNAs",
-            "{sample}.filtered.bam.bai"),
-
-    output:
-        bam = temp(os.path.join(
-            config["output_dir"],
-            "TR",
-            "remove_duplicates",
-            "{sample}.umis.bam")
-            ),
-
-    params:
-        cluster_log_path = config["cluster_log"]
-
-    singularity:
-        "docker://zavolab/umi-tools:0.5.4"
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "preprocessing",
-            "{sample}_umi_collapse.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "preprocessing",
-            "{sample}_umi_collapse.stderr.log"),
-
-    shell:
-        "(umi_tools \
-        dedup \
-        -I {input.bam} \
-        --paired \
-        -S {output.bam} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_merge:
-    input:
-        bam = lambda wildcards:
-            expand(
-                os.path.join(
-                    config["output_dir"],
-                    "TR",
-                    "remove_duplicates",
-                    "{sample}.{dup_type}.bam"),
-                sample=config[wildcards.experiment][wildcards.name],
-                dup_type=config[config[wildcards.experiment][wildcards.name][0]]['dup_type'],
-                ),
-
-    output:
-        bam = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "{experiment}_{name}_merge_samples.bam")
-                ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-
-    singularity:
-        "docker://zavolab/samtools:1.8"
-
-    threads: 1
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_merge.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_merge.stderr.log"),
-
-    shell:
-        "(samtools merge \
-        {output.bam} {input.bam}; \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_sort_merged:
-    input:
-        bam = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}_merge_samples.bam"
-            ),
-
-    output:
-        bam = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "{experiment}_{name}_merge_samples.sorted.bam")
-                ),
-        bai = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "{experiment}_{name}_merge_samples.sorted.bam.bai")
-                ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        prefix = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{name}_merge_samples"),
-
-    singularity:
-        "docker://zavolab/samtools:1.8"
-
-    threads: 8
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_sort_merged.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_sort_merged.stderr.log"),
-
-    shell:
-        "(samtools sort \
-        -T {params.prefix} \
-        -@ {threads} \
-        {input.bam} > {output.bam}; \
-        samtools index \
-        {output.bam} {output.bai} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_sortname_merged:
-    input:
-        bam = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}_merge_samples.bam"),
-
-    output:
-        bam = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "{experiment}_{name}_merge_samples.sortedname.bam")
-                ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        prefix = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{name}_merge_samples_sortname"),
-
-    singularity:
-        "docker://zavolab/samtools:1.8"
-
-    threads: 8
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_sortname_merged.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_sortname_merged.stderr.log"),
-
-    shell:
-        "(samtools sort -n \
-        -T {params.prefix} \
-        -@ {threads} \
-        {input.bam} > {output.bam}; \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_read_frequencies:
-    """
-        Calculate occurence of reads that are multimappers.
-        Custom script.
-    """
-    input:
-        bam = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}_merge_samples.sorted.bam"),
-        bai = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}_merge_samples.sorted.bam.bai"),
-
-    output:
-        frequencies = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "{experiment}_{name}.frequencies.csv")
-                ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "mk_bam_get_read_frequencies.py"),
-        paired = lambda wildcards:
-                    config[config[wildcards.experiment][wildcards.name][0]]['paired'],
-
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    threads: 1
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_read_frequencies.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_read_frequencies.stderr.log"),
-        
-    shell:
-        "(python {params.script} \
-        --bamfile {input.bam} \
-        --paired {params.paired} \
-        --outfile {output.frequencies}; \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_bam_to_fastq:
-    """
-        Convert initially mapped reads back to fastq files to do mappings at the
-        transcriptome and genome level, using bedtools.
-    """
-    input:
-        bam = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}_merge_samples.sortedname.bam"),
-        bam_coord = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}_merge_samples.sorted.bam"),
-
-    output:
-        fastq_mate1 = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "{experiment}_{name}.mate1.unfiltered.fastq")
-                ),
-        fastq_mate2 = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "{experiment}_{name}.mate2.unfiltered.fastq")
-                ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-
-    singularity:
-        "docker://zavolab/bedtools:2.27.0"
-
-    threads: 1
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_bam_to_fastq.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_bam_to_fastq.stderr.log"),
-
-    shell:
-        "(bedtools bamtofastq \
-        -i {input.bam} \
-        -fq {output.fastq_mate1} \
-        -fq2 {output.fastq_mate2}; \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_bam_to_fastq_filtered:
-    """
-        Remove duplicate reads after converting bam to fastq
-        (when there are multimappers the reads will appear
-        multiple times in the fastq).
-    """
-    input:
-        mate1 = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}.mate1.unfiltered.fastq"),
-        mate2 = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}.mate2.unfiltered.fastq"),
-
-    output:
-        mate1 = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "{experiment}_{name}.mate1.fastq.gz")
-                ),
-        mate2 = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "{experiment}_{name}.mate2.fastq.gz")
-                ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "mk_fastq_keep_unique_reads.py"),
-
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    threads: 1
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_bam_to_fastq_filtered.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_bam_to_fastq_filtered.stderr.log"),
-
-    shell:
-        "(python {params.script} \
-        --fastq_in {input.mate1} \
-        --fastq_out {output.mate1} \
-        --fastq_in2 {input.mate2} \
-        --fastq_out2 {output.mate2}; \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_salmon_index:
-    """
-        Build salmon index based on transcript annotation.
-        Required for transcript quantification done by salmon.
-    """
-    input:
-        transcriptome = config["transcriptome"]
-
-    output:
-        index = directory(os.path.join(
-            config["output_dir"],
-            "TR_salmon_index")
-            ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-
-    singularity:
-        "docker://zavolab/salmon:0.11.0"
-
-    threads: 1
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "salmon_index.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "salmon_index.stderr.log"),
-
-    shell:
-        "(salmon index \
-        -t {input.transcriptome} \
-        -i {output.index} \
-        --type fmd; \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_tr_quantification:
-    """
-        Quantification of transcript isoforms using Salmon.
-    """
-    input:
-        transcriptome = os.path.join(
-            config["output_dir"],
-            "TR_salmon_index"),
-        mate1 = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}.mate1.fastq.gz"),
-        mate2 = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}.mate2.fastq.gz"),
-
-    output:
-        quantification = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "tr_quantification_{name}",
-                "quant.sf")
-                ),
-        outdir = temp(
-            directory(
-                os.path.join(
-                    config["output_dir"],
-                    "TR",
-                    "{experiment}",
-                    "tr_quantification_{name}")
-                    )
-                ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-
-    singularity:
-        "docker://zavolab/salmon:0.11.0"
-
-    threads: 8
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_tr_quantification.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_tr_quantification.stderr.log"),
-
-    shell:
-        "(salmon quant \
-        -i {input.transcriptome} \
-        -l ISR \
-        -1 {input.mate1} \
-        -2 {input.mate2} \
-        -o {output.outdir} \
-        -p {threads} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_customised_transcriptome:
-    """
-        Build customised transcriptome keeping
-        the most expressed isoform per gene.
-        Custom script.
-    """
-    input:
-        transcriptome = config["transcriptome"],
-        ensembl_csv = os.path.join(
-            config["output_dir"],
-            "ensembl_csv",
-            "ensembl.csv"),
-        ensembl_gtf = config["gtf"],
-        quantification = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "tr_quantification_{name}",
-            "quant.sf"),
-        outdir = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "tr_quantification_{name}"),
-
-    output:
-        output_gtf = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}.transcriptome.gtf"),
-        output_fasta = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}.transcriptome.fa"),
-        info = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}.info.csv"),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "mk_most_expressed_transcriptome.py"),
-
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    threads: 1
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_customised_transcriptome.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_customised_transcriptome.stderr.log"),
-
-    shell:
-        "(python {params.script} \
-        --quantification_file {input.quantification} \
-        --transcriptome {input.transcriptome}  \
-        --ensembl_csv {input.ensembl_csv} \
-        --ensembl_gtf {input.ensembl_gtf} \
-        --out_gtf {output.output_gtf} \
-        --transcript_info {output.info} \
-        --out_fasta {output.output_fasta} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_transcriptome_index:
-    """
-        Create index of the customised transcriptome using STAR.
-    """
-    input:
-        tr_fasta = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}.transcriptome.fa"),
-        tr_gtf = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}.transcriptome.gtf"),
-
-    output:
-        output = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{name}_STAR_index",
-            "chrNameLength.txt"),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        output_dir = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{name}_STAR_index"),
-        outFileNamePrefix = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{name}_STAR_index/STAR_"),
-        sjdbOverhang = config["sjdbOverhang"]
-
-    singularity:
-        "docker://zavolab/star:2.6.0a"
-
-    threads: 12
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_transcriptome_index.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_transcriptome_index.stderr.log"),
-
-    shell:
-        "(chmod -R 777 {params.output_dir}; \
-        STAR \
-        --runMode genomeGenerate \
-        --genomeSAindexNbases 12 \
-        --genomeChrBinNbits 12 \
-        --genomeDir {params.output_dir} \
-        --genomeFastaFiles {input.tr_fasta} \
-        --runThreadN {threads} \
-        --outFileNamePrefix {params.outFileNamePrefix} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_map_to_transcriptome:
-    """
-        Map to the transcriptome treating each transcript
-        as a "chromosome", using STAR.
-    """
-    input:
-        transcriptome = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "replicates_STAR_index",
-            "chrNameLength.txt"),
-        mate1 = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}.mate1.fastq.gz"),
-        mate2 = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}.mate2.fastq.gz"),
-
-    output:
-        bam = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "tr_map",
-                "{name}",
-                "transcriptome.bam")
-                ),
-        logfile = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "tr_map",
-                "{name}",
-                "transcriptomeLog.final.out")
-                ),
-        outdir = temp(
-            directory(
-                os.path.join(
-                    config["output_dir"],
-                    "TR",
-                    "{experiment}",
-                    "tr_map",
-                    "{name}")
-                    )
-                ),
-    params:
-        cluster_log_path = config["cluster_log"],
-        transcriptome = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "replicates_STAR_index"),
-        experiment_id = "{experiment}_{name}_transcriptome",
-        fragment_size = config["fragment_size"],
-        outFileNamePrefix = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "tr_map",
-            "{name}",
-            "transcriptome"),
-        multimappers = config['multimappers'] + 10,
-
-    singularity:
-        "docker://zavolab/star:2.6.0a"
-
-    threads: 12
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_map_to_transcriptome.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_map_to_transcriptome.stderr.log"),
-
-    shell:
-        "(STAR \
-        --runMode alignReads \
-        --runThreadN {threads} \
-        --genomeDir {params.transcriptome} \
-        --readFilesIn {input.mate1} {input.mate2} \
-        --readFilesCommand zcat \
-        --outSAMunmapped None  \
-        --outFilterMultimapNmax {params.multimappers} \
-        --outFilterMultimapScoreRange 1 \
-        --outFileNamePrefix {params.outFileNamePrefix} \
-        --outFilterScoreMinOverLread 0.9 \
-        --outFilterMatchNminOverLread 0.9 \
-        --outFilterMismatchNoverLmax 0.05 \
-        --outSAMattributes All \
-        --outStd BAM_Unsorted \
-        --outSAMtype BAM Unsorted \
-        --outFilterType BySJout \
-        --outReadsUnmapped None \
-        --outSAMattrRGline ID:rcrunch SM:{params.experiment_id} \
-        --alignIntronMax 1 \
-        --alignEndsType EndToEnd > {output.bam}; \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_index_map_to_transcriptome:
-    """
-        Index the transcriptomic alignment.
-    """
-    input:
-        bam = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "tr_map",
-            "{name}",
-            "transcriptome.bam"
-            ),
-        outdir = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "tr_map",
-            "{name}")
-
-    output:
-        bam = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "tr_map_sort",
-                "{name}",
-                "transcriptome.sorted.bam")
-                ),
-        bai = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "tr_map_sort",
-                "{name}",
-                "transcriptome.sorted.bam.bai")
-                ),
-    
-    params:
-        cluster_log_path = config["cluster_log"],
-        prefix = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "tr_map",
-            "{name}",
-            "transcriptome_temp"),
-
-    singularity:
-        "docker://zavolab/samtools:1.8"
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_index_map_to_transcriptome.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_index_map_to_transcriptome.stderr.log"),
-
-    shell:
-        "(samtools sort \
-        -T {params.prefix} \
-        -@ {threads} \
-        {input.bam} > {output.bam}; \
-        samtools index \
-        {output.bam} {output.bai} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_map_to_genome:
-    """
-        Initial mapping to the genome using STAR.
-    """
-    input:
-        genome = os.path.join(
-            config["output_dir"],
-            "STAR_index",
-            "chrNameLength.txt"),
-        mate1 = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}.mate1.fastq.gz"),
-        mate2 = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}.mate2.fastq.gz"),
-
-    output:
-        bam = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "gn_map",
-            "{name}",
-            "genome.bam"),
-        logfile = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "gn_map",
-            "{name}",
-            "genomeLog.final.out"),
-        outdir = temp(
-            directory(
-                os.path.join(
-                    config["output_dir"],
-                    "TR",
-                    "{experiment}",
-                    "gn_map",
-                    "{name}")
-                    )
-                ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        experiment_id = "{experiment}_{name}_genome",
-        genome = os.path.join(
-            config["output_dir"],
-            "STAR_index"),
-        outFileNamePrefix = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "gn_map",
-            "{name}",
-            "genome"),
-        multimappers = config["multimappers"]
-
-    singularity:
-        "docker://zavolab/star:2.6.0a"
-
-    threads: 12
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_map_to_genome.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_map_to_genome.stderr.log"),
-
-    shell:
-        "(STAR \
-        --runMode alignReads \
-        --runThreadN {threads} \
-        --genomeDir {params.genome} \
-        --readFilesIn {input.mate1} {input.mate2} \
-        --readFilesCommand zcat \
-        --outSAMunmapped None  \
-        --outFilterMultimapNmax {params.multimappers} \
-        --outFilterMultimapScoreRange 1 \
-        --outFilterScoreMinOverLread 0.9 \
-        --outFilterMatchNminOverLread 0.9 \
-        --outFilterMismatchNoverLmax 0.05 \
-        --outFileNamePrefix {params.outFileNamePrefix} \
-        --outSAMattributes All \
-        --outStd BAM_Unsorted \
-        --outSAMtype BAM Unsorted \
-        --outFilterType BySJout \
-        --outReadsUnmapped None \
-        --outSAMattrRGline ID:rcrunch SM:{params.experiment_id} \
-        --alignEndsType EndToEnd > {output.bam}; \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_index_map_to_genome:
-    """
-        Index the genomic alignment.
-    """
-    input:
-        outdir = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "gn_map",
-            "{name}"),
-        bam = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "gn_map",
-            "{name}",
-            "genome.bam"),
-
-    output:
-        bam = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "gn_map_sort",
-                "{name}",
-                "genome.sorted.bam")
-                ),
-        bai = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "gn_map_sort",
-                "{name}",
-                "genome.sorted.bam.bai")
-                ),
-    params:
-        cluster_log_path = config["cluster_log"],
-        prefix = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "gn_map_sort",
-            "{name}",
-            "genome_temp"),
-
-    singularity:
-        "docker://zavolab/samtools:1.8"
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_index_map_to_genome.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_index_map_to_genome.stderr.log"),
-
-    shell:
-        "(samtools sort \
-        -T {params.prefix} \
-        -@ {threads} \
-        {input.bam} > {output.bam}; \
-        samtools index \
-        {output.bam} {output.bai} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_assign_to_gn_tr:
-    """
-        Remove reads that map to '-' strand in transcriptome
-    """
-    input:
-        ensembl_csv = os.path.join(
-            config["output_dir"],
-            "ensembl_csv",
-            "ensembl.csv"),
-        chromosome_info = os.path.join(
-            config["output_dir"],
-            "STAR_index",
-            "chrNameLength.txt"),
-        gn_bam = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "gn_map_sort",
-            "{name}",
-            "genome.sorted.bam"),
-        gn_bai = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "gn_map_sort",
-            "{name}",
-            "genome.sorted.bam.bai"),
-        tr_bam = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "tr_map_sort",
-            "{name}",
-            "transcriptome.sorted.bam"),
-        tr_bai = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "tr_map_sort",
-            "{name}",
-            "transcriptome.sorted.bam.bai"),
-
-    output:
-        tr_bam = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "{experiment}_{name}_transcriptome.bam")
-                ),
-        gn_bam = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "{experiment}_{name}_genome.bam")
-                ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "mk_assign_read_to_gn_tr.py"),
-        out_folder = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{name}_assign_to_gn_tr_temp"),
-        paired = lambda wildcards:
-                    config[config[wildcards.experiment][wildcards.name][0]]['paired'],
-        sense = lambda wildcards:
-                    config[config[wildcards.experiment][wildcards.name][0]]['sense'],
-
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    threads: 12
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_assign_to_gn_tr.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_assign_to_gn_tr.stderr.log"),
-
-    shell:
-        "(mkdir -p {params.out_folder} ;\
-        python {params.script} \
-        --annotation {input.ensembl_csv}  \
-        --tr_bam {input.tr_bam} \
-        --gn_bam {input.gn_bam} \
-        --out_folder {params.out_folder} \
-        --filtered_tr_bam {output.tr_bam} \
-        --filtered_gn_bam {output.gn_bam} \
-        --paired {params.paired} \
-        --sense {params.sense} \
-        --chromosome_info {input.chromosome_info} \
-        --threads {threads} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_transcriptome_filter_sort:
-    """
-        Sort and index transcriptomic reads after
-        filtering wrong strand reads and
-        those that map better to genome
-    """
-    input:
-        bam = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}_transcriptome.bam"),
-
-    output:
-        bam = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "{experiment}_{name}_transcriptome.sorted.bam")
-                ),
-        bai = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "{experiment}_{name}_transcriptome.sorted.bam.bai")
-                ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        prefix = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{name}_sort_samples"),
-
-    singularity:
-        "docker://zavolab/samtools:1.8"
-
-    threads: 8
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_transcriptome_filter_sort.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_transcriptome_filter_sort.stderr.log"),
-
-    shell:
-        "(samtools sort \
-        -T {params.prefix} \
-        -@ {threads} \
-        {input.bam} > {output.bam}; \
-        samtools index \
-        {output.bam} {output.bai} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_genome_filter_sort:
-    """
-        Sort and index genomic reads after filtering wrong strand reads and
-        those that map better to genome
-    """
-    input:
-        bam = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}_genome.bam"),
-
-    output:
-        bam = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}_genome.sorted.bam"),
-        bai =  os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}_genome.sorted.bam.bai"),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        prefix = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{name}_gn_sort_samples"),
-
-    singularity:
-        "docker://zavolab/samtools:1.8"
-
-    threads: 8
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_genome_filter_sort.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_genome_filter_sort.stderr.log"),
-
-    shell:
-        "(samtools sort \
-        -T {params.prefix} \
-        -@ {threads} \
-        {input.bam} > {output.bam}; \
-        samtools index \
-        {output.bam} {output.bai} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_bam_to_bed:
-    """
-        Convert bamfile to bedfile using bedtools
-    """
-    input:
-        bam = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}_genome.sorted.bam"),
-
-    output:
-        bed = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "{experiment}_{name}_genome.bed")
-                ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-
-    singularity:
-        "docker://zavolab/bedtools:2.27.0"
-
-    threads: 1
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_bam_to_bed.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_bam_to_bed.stderr.log"),
-
-    shell:
-        "(bedtools bamtobed \
-        -i {input.bam} > {output.bed}; \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_bam_to_bed_transcriptome:
-    """
-        Convert bamfile to bedfile using bedtools
-    """
-    input:
-        bam = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_{name}_transcriptome.sorted.bam"),
-
-    output:
-        bed = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "{experiment}_{name}_transcriptome.bed")
-                ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-
-    singularity:
-        "docker://zavolab/bedtools:2.27.0"
-
-    threads: 1
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_bam_to_bed_transcriptome.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "{name}_bam_to_bed_transcriptome.stderr.log"),
-
-    shell:
-        "(bedtools bamtobed \
-        -i {input.bam} > {output.bed}; \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_genome_coverage:
-    """
-        Count number of fragments in windows 
-        of specific size at the genome.
-        Custom script.
-    """
-    input:
-        chromosome_info = os.path.join(
-            config["output_dir"],
-            "STAR_index",
-            "chrNameLength.txt"),
-        fg_bed = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_replicates_genome.bed"),
-        bg_bed = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_smis_genome.bed"),
-        fg_frequencies = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_replicates.frequencies.csv"),
-        bg_frequencies = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_smis.frequencies.csv"),
-
-    output:
-        coverage = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "{experiment}.genome_coverage.bed")
-                ),
-    
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "sliding_windows.py"),
-        output_dir = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}"),
-        window_f = lambda wildcards:
-            config[wildcards.experiment]["window_f"],
-        window_b = lambda wildcards:
-            config[wildcards.experiment]["window_b"],
-        step_size = lambda wildcards:
-            config[wildcards.experiment]["step_size"],
-        sense_f = lambda wildcards:
-            config[config[wildcards.experiment]["replicates"][0]]['sense'],
-        sense_b = lambda wildcards:
-            config[config[wildcards.experiment]["smis"][0]]['sense'],
-        paired_f = lambda wildcards:
-            config[config[wildcards.experiment]["replicates"][0]]['paired'],
-        paired_b = lambda wildcards:
-            config[config[wildcards.experiment]["smis"][0]]['paired'],
-        prefix = "{experiment}.genome_coverage",
-        background_type = lambda wildcards:
-            config[wildcards.experiment]["background_type"],
-
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    threads: 8
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "genome_coverage.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "genome_coverage.stderr.log"),
-
-    shell:
-        "(python {params.script} \
-        --bed_foreground {input.fg_bed} \
-        --bed_background {input.bg_bed} \
-        --foreground_frequencies {input.fg_frequencies} \
-        --background_frequencies {input.bg_frequencies} \
-        --out {params.output_dir} \
-        --prefix {params.prefix} \
-        --chromosomes {input.chromosome_info} \
-        --sense_f {params.sense_f} \
-        --sense_b {params.sense_b} \
-        --paired_f {params.paired_f} \
-        --paired_b {params.paired_b} \
-        --window_f {params.window_f} \
-        --window_b {params.window_b} \
-        --step_size {params.step_size} \
-        --data_type genome \
-        --background {params.background_type} \
-        --cutoff 1 \
-        --threads {threads}; \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_transcriptome_coverage:
-    """
-        Count number of fragments in windows 
-        of specific size at
-        the customised transcriptome.
-        Custom script.
-    """
-    input:
-        chromosome_info = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "replicates_STAR_index",
-            "chrNameLength.txt"),
-        fg_bam = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_replicates_transcriptome.sorted.bam"),
-        fg_bai = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_replicates_transcriptome.sorted.bam.bai"),
-        bg_bam = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_smis_transcriptome.sorted.bam"),
-        bg_bai = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_smis_transcriptome.sorted.bam.bai"),
-        fg_frequencies = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_replicates.frequencies.csv"),
-        bg_frequencies = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_smis.frequencies.csv"),
-
-    output:
-        coverage = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "{experiment}.transcriptome_coverage.bed")
-                ),
-            
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "mk_bam_count_sliding_windows.py"),
-        output_dir = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}"),
-        window_f = lambda wildcards:
-            config[wildcards.experiment]["window_f"],
-        window_b = lambda wildcards:
-            config[wildcards.experiment]["window_b"],
-        step_size = lambda wildcards:
-            config[wildcards.experiment]["step_size"],
-        sense_f = lambda wildcards:
-            config[config[wildcards.experiment]["replicates"][0]]['sense'],
-        sense_b = lambda wildcards:
-            config[config[wildcards.experiment]["smis"][0]]['sense'],
-        paired_f = lambda wildcards:
-            config[config[wildcards.experiment]["replicates"][0]]['paired'],
-        paired_b = lambda wildcards:
-            config[config[wildcards.experiment]["smis"][0]]['paired'],
-        prefix = "{experiment}.transcriptome_coverage",
-        background_type = lambda wildcards:
-            config[wildcards.experiment]["background_type"]
-
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    threads: 8
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "transcriptome_coverage.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "transcriptome_coverage.stderr.log"),
-
-    shell:
-        "(python {params.script} \
-        --bam_foreground {input.fg_bam} \
-        --bam_background {input.bg_bam} \
-        --bam_foreground_frequencies {input.fg_frequencies} \
-        --bam_background_frequencies {input.bg_frequencies} \
-        --out {params.output_dir} \
-        --prefix {params.prefix} \
-        --chromosomes {input.chromosome_info} \
-        --sense_f {params.sense_f} \
-        --sense_b {params.sense_b} \
-        --paired_f {params.paired_f} \
-        --paired_b {params.paired_b} \
-        --window_f {params.window_f} \
-        --window_b {params.window_b} \
-        --step_size {params.step_size} \
-        --data_type transcriptome \
-        --background {params.background_type} \
-        --cutoff 1 \
-        --threads {threads}; \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_merge_windows:
-    """
-        Merge windows from foreground/ background.
-        Custom script.
-    """
-    input:
-        tr_coverage = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}.transcriptome_coverage.bed"),
-        gn_coverage = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}.genome_coverage.bed"),
-
-    output:
-        merged_coverage = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "{experiment}.total_coverage.bed")
-            ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "mk_merge_fg_bg.py"),
-
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    threads: 1
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "merge_windows.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "merge_windows.stderr.log"),
-
-    shell:
-        "(python {params.script} \
-        --transcriptome {input.tr_coverage} \
-        --genome {input.gn_coverage} \
-        --out {output.merged_coverage} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_enriched_regions:
-    """
-        Obtain enriched regions for binding based on EM using
-        bg for noise estimation. Modified and adapted from:
-        Berger, Severin M., et al. "Crunch: Integrated processing and modeling of
-        ChIP-seq data in terms of regulatory motifs."
-        Genome research (2019): gr-239319
-    Custom script.
-   """
-    input:
-        coverage = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}.total_coverage.bed"),
-
-    output:
-        enriched_regions = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_enriched_regions.csv"),
-        parameters = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_parameters.csv"),
-        all_regions = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_total_regions.csv"),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "mk_find_enriched_regions.py"),
-        plot_dir = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "enriched_regions_plots"),
-        fdr_cutoff = config['FDR']
-
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    threads: 1
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "enriched_regions.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "enriched_regions.stderr.log"),
-
-    shell:
-        "(mkdir -p {params.plot_dir}; \
-        python {params.script} \
-        --windows_table {input.coverage} \
-        --out {output.enriched_regions} \
-        --all_regions {output.all_regions} \
-        --out_parameters {output.parameters} \
-        --out_folder {params.plot_dir} \
-        --fdr_cutoff {params.fdr_cutoff} \
-         ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_fit_peaks:
-    """
-        Fit individual binding events in the enriched regions.
-        Modifed and adapted from:
-        Berger, Severin M., et al. "Crunch: Integrated processing and
-        modeling of ChIP-seq data in terms of regulatory motifs."
-        Genome research (2019): gr-239319.
-        Custom script.
-    """
-    input:
-        fg_gn_bam = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_replicates_genome.sorted.bam"),
-        fg_gn_bai = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_replicates_genome.sorted.bam.bai"),
-        bg_gn_bam = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_smis_genome.sorted.bam"),
-        bg_gn_bai = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_smis_genome.sorted.bam.bai"),
-        fg_tr_bam = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_replicates_transcriptome.sorted.bam"),
-        fg_tr_bai = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_replicates_transcriptome.sorted.bam.bai"),
-        bg_tr_bam = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_smis_transcriptome.sorted.bam"),
-        bg_tr_bai = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_smis_transcriptome.sorted.bam.bai"),
-        fg_frequencies = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_replicates.frequencies.csv"),
-        bg_frequencies = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_smis.frequencies.csv"),
-        enriched_regions = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_enriched_regions.csv"),
-        parameters = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_parameters.csv"),
-        chromosomes_g = os.path.join(
-            config["output_dir"],
-            "STAR_index",
-            "chrNameLength.txt"),
-        chromosomes_t = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "replicates_STAR_index",
-            "chrNameLength.txt"),
-
-    output:
-        peaks = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_total_peaks.csv"),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "mk_define_peaks.py"),
-        plot_dir = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "fit_peaks_plots"),
-        fragment_size = config["fragment_size"],
-        window_size = lambda wildcards:
-            config[wildcards.experiment]['window_f'],
-        paired_f = lambda wildcards:
-            config[config[wildcards.experiment]["replicates"][0]]['paired'],
-        paired_b = lambda wildcards:
-            config[config[wildcards.experiment]["smis"][0]]['paired'],
-        sense_f = lambda wildcards:
-            config[config[wildcards.experiment]["replicates"][0]]['sense'],
-        sense_b = lambda wildcards:
-            config[config[wildcards.experiment]["smis"][0]]['sense'],
-
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    threads: 12
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "fit_peaks.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "fit_peaks.stderr.log"),
-
-    shell:
-        "(mkdir -p {params.plot_dir}; \
-        python {params.script} \
-        --enriched_regions {input.enriched_regions} \
-        --parameters {input.parameters} \
-        --fragment_size {params.fragment_size} \
-        --window_size {params.window_size} \
-        --chromosomes_g {input.chromosomes_g} \
-        --bam_foreground_g {input.fg_gn_bam} \
-        --bam_background_g {input.bg_gn_bam} \
-        --chromosomes_t {input.chromosomes_t} \
-        --bam_foreground_t {input.fg_tr_bam} \
-        --bam_background_t {input.bg_tr_bam} \
-        --bam_fg_fq {input.fg_frequencies} \
-        --bam_bg_fq {input.bg_frequencies} \
-        --paired_f {params.paired_f} \
-        --paired_b {params.paired_b} \
-        --sense_f {params.sense_f} \
-        --sense_b {params.sense_b} \
-        --out {output.peaks} \
-        --out_folder {params.plot_dir} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_peaks_split_sets_crosslink:
-    """
-        Split the significant peaks into training and test
-        for subsequent motif analysis
-    """
-    input:
-        genome_fasta = config["genome"],
-        transcriptome_fasta = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_replicates.transcriptome.fa"),
-        all_regions = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_total_regions.csv"),
-        reads = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_total_peaks.csv"),
-        chromosomes_g = os.path.join(
-            config["output_dir"],
-            "STAR_index",
-            "chrNameLength.txt"),
-        chromosomes_t = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "replicates_STAR_index",
-            "chrNameLength.txt"),
-
-    output:
-        training = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "motif_analysis_crossvalidation",
-                "{run}",
-                "crosslink",
-                "training.fasta")
-            ),
-        test = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "motif_analysis_crossvalidation",
-                "{run}",
-                "crosslink",
-                "test.fasta")
-                ),
-        training_bg = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "motif_analysis_crossvalidation",
-                "{run}",
-                "crosslink",
-                "training_bg.fasta")
-                ),
-        test_bg = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "motif_analysis_crossvalidation",
-                "{run}",
-                "crosslink",
-                "test_bg.fasta")
-                ),
-        dirtemp = temp(
-            directory(
-                os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "motif_analysis_crossvalidation",
-                "{run}",
-                "crosslink",
-                "tmpdir")
-            )
-        ),
-
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "mk_peaks_split_sets.py"),
-        output_dir = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "motif_analysis_crossvalidation",
-            "{run}",
-            "crosslink"),
-        genome_tag = config["genome_tag"],
-        peak_size = 50
-
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    threads: 1
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "motif_analysis_crossvalidation",
-            "{run}_peaks_split_sets_crosslink.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "motif_analysis_crossvalidation",
-            "{run}_peaks_split_sets_crosslink.stderr.log"),
-
-    shell:
-        "(mkdir -p {output.dirtemp}; \
-        python {params.script} \
-        --peaks_file {input.reads} \
-        --genome_fasta {input.genome_fasta} \
-        --all_regions {input.all_regions} \
-        --genome_tag {params.genome_tag} \
-        --tempdir {output.dirtemp} \
-        --transcriptome_fasta {input.transcriptome_fasta} \
-        --chromosomes_g {input.chromosomes_g} \
-        --chromosomes_t {input.chromosomes_t} \
-        --crosslink_type 'crosslink' \
-        --peak_size {params.peak_size} \
-        --out_folder {params.output_dir} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_peaks_split_sets:
-    """
-        Split the significant peaks into training and test
-        for subsequent motif analysis
-    """
-    input:
-        genome_fasta = config["genome"],
-        transcriptome_fasta = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_replicates.transcriptome.fa"),
-        all_regions = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_total_regions.csv"),
-        reads = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_total_peaks.csv"),
-        chromosomes_g = os.path.join(
-            config["output_dir"],
-            "STAR_index",
-            "chrNameLength.txt"),
-        chromosomes_t = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "replicates_STAR_index",
-            "chrNameLength.txt"),
-
-    output:
-        training = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "motif_analysis_crossvalidation",
-                "{run}",
-                "peak_center",
-                "training.fasta")
-                ),
-        test = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "motif_analysis_crossvalidation",
-                "{run}",
-                "peak_center",
-                "test.fasta")
-                ),
-        training_bg = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "motif_analysis_crossvalidation",
-                "{run}",
-                "peak_center",
-                "training_bg.fasta")
-                ),
-        test_bg = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "motif_analysis_crossvalidation",
-                "{run}",
-                "peak_center",
-                "test_bg.fasta")
-                ),
-        dirtemp = temp(
-            directory(
-                os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "motif_analysis_crossvalidation",
-                "{run}",
-                "peak_center",
-                "tmpdir")
-            )
-        ),
-
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "mk_peaks_split_sets.py"),
-        output_dir = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "motif_analysis_crossvalidation",
-            "{run}",
-            "peak_center"),
-        genome_tag = config["genome_tag"],
-        peak_size = 50
-
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    threads: 1
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "motif_analysis_crossvalidation",
-            "{run}_peaks_split_sets_peak_center.stdout.log"),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "motif_analysis_crossvalidation",
-            "{run}_peaks_split_sets_peak_center.stderr.log"),
-
-    shell:
-        "(mkdir -p {output.dirtemp}; \
-        python {params.script} \
-        --peaks_file {input.reads} \
-        --genome_fasta {input.genome_fasta} \
-        --all_regions {input.all_regions} \
-        --tempdir {output.dirtemp} \
-        --genome_tag {params.genome_tag} \
-        --transcriptome_fasta {input.transcriptome_fasta} \
-        --chromosomes_g {input.chromosomes_g} \
-        --chromosomes_t {input.chromosomes_t} \
-        --crosslink_type 'peak_center' \
-        --out_folder {params.output_dir} \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-rule TR_peaks_split_sets_crosslink_all_motifs:
-    """
-        Get all peaks crosslink
-    """
-    input:
-        transcriptome_fasta = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_replicates.transcriptome.fa"),
-        genome_fasta = config["genome"],
-        all_regions = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_total_regions.csv"
-            ),
-        reads = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_total_peaks.csv"
-            ),
-        chromosomes_g = os.path.join(
-            config["output_dir"],
-            "STAR_index",
-            "chrNameLength.txt"
-            ),
-        chromosomes_t = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "replicates_STAR_index",
-            "chrNameLength.txt"),
-
-    output:
-        test = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "motif_analysis_final",
-                "crosslink",
-                "test.fasta")
-            ),
-        test_bg = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "motif_analysis_final",
-                "crosslink",
-                "test_bg.fasta")
-            ),
-        dirtemp = temp(
-            directory(
-                os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "motif_analysis_final",
-                "crosslink",
-                "tmpdir")
-            )
-        ),
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "total_enrichment_peakstofasta.py"
-            ),
-        output_dir = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "motif_analysis_final",
-            "crosslink",
-            ),
-        genome_tag = config["genome_tag"],
-        peak_size = 50,
-        
-    singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
-
-    threads: 1
-
-    log:
-        stdout = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "motif_analysis_final",
-            "peaks_split_sets__all_motifs_crosslink.stdout.log"
-            ),
-        stderr = os.path.join(
-            config["local_log"],
-            "TR",
-            "{experiment}",
-            "motif_analysis_final",
-            "peaks_split_sets__all_motifs_crosslink.stderr.log"
-            ),
-
-    shell:
-        "(mkdir -p {params.output_dir}; \
-        mkdir -p {output.dirtemp}; \
-        python {params.script} \
-        --peaks_file {input.reads} \
-        --genome_fasta {input.genome_fasta} \
-        --all_regions {input.all_regions} \
-        --genome_tag {params.genome_tag} \
-        --tempdir {output.dirtemp} \
-        --transcriptome_fasta {input.transcriptome_fasta} \
-        --chromosomes_g {input.chromosomes_g} \
-        --chromosomes_t {input.chromosomes_t} \
-        --peak_size {params.peak_size} \
-        --out_folder {params.output_dir} \
-        --crosslink_type 'crosslink' \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
-
-rule TR_peaks_split_sets_all_motifs:
-    """
-        get all peaks crosslink
-    """
-    input:
-        transcriptome_fasta = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_replicates.transcriptome.fa"),
-        genome_fasta = config["genome"],
-        all_regions = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_total_regions.csv"
-            ),
-        reads = os.path.join(
-            config["output_dir"],
-            "TR",
-            "{experiment}",
-            "{experiment}_total_peaks.csv"
-            ),
-        chromosomes_g = os.path.join(
-            config["output_dir"],
-            "STAR_index",
-            "chrNameLength.txt"
-            ),
-        chromosomes_t = os.path.join(
+        experiment_id = "{experiment}_{name}_genome",
+        genome = os.path.join(
             config["output_dir"],
-            "TR",
-            "{experiment}",
-            "replicates_STAR_index",
-            "chrNameLength.txt"),
-
-
-    output:
-        test = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "motif_analysis_final",
-                "peak_center",
-                "test.fasta")
-            ),
-        test_bg = temp(
-            os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "motif_analysis_final",
-                "peak_center",
-                "test_bg.fasta")
-            ),
-        dirtemp = temp(
-            directory(
-                os.path.join(
-                config["output_dir"],
-                "TR",
-                "{experiment}",
-                "motif_analysis_final",
-                "peak_center",
-                "tmpdir")
-            )
-        ),
-    params:
-        cluster_log_path = config["cluster_log"],
-        script = os.path.join(
-            workflow.basedir,
-            "scripts",
-            "total_enrichment_peakstofasta.py"
-            ),
-        output_dir = os.path.join(
+            "STAR_index"),
+        outFileNamePrefix = os.path.join(
             config["output_dir"],
             "TR",
             "{experiment}",
-            "motif_analysis_final",
-            "peak_center",
-            ),
-        genome_tag = config["genome_tag"],
-        peak_size = 50
-        
+            "gn_map",
+            "{name}",
+            "genome.pe."),
+        multimappers = config["multimappers"]
+    shadow: "full"
+    
     singularity:
-        "docker://zavolab/rcrunch_python:1.0.5"
+        "docker://zavolab/star:2.6.0a"
 
-    threads: 1
+    threads: 12
 
     log:
         stdout = os.path.join(
             config["local_log"],
             "TR",
             "{experiment}",
-            "motif_analysis_final",
-            "peaks_split_sets__all_motifs_standard.stdout.log"
-            ),
+            "{name}_map_to_genome.pe.stdout.log"),
         stderr = os.path.join(
             config["local_log"],
             "TR",
             "{experiment}",
-            "motif_analysis_final",
-            "peaks_split_sets__all_motifs_standard.stderr.log"
-            ),
+            "{name}_map_to_genome.pe.stderr.log"),
 
     shell:
-        "(mkdir -p {params.output_dir}; \
-        mkdir -p {output.dirtemp}; \
-        python {params.script} \
-        --peaks_file {input.reads} \
-        --genome_fasta {input.genome_fasta} \
-        --all_regions {input.all_regions} \
-        --genome_tag {params.genome_tag} \
-        --tempdir {output.dirtemp} \
-        --transcriptome_fasta {input.transcriptome_fasta} \
-        --chromosomes_g {input.chromosomes_g} \
-        --chromosomes_t {input.chromosomes_t} \
-        --peak_size {params.peak_size} \
-        --out_folder {params.output_dir} \
-        --crosslink_type 'peak_center' \
-        ) 1> {log.stdout} 2> {log.stderr}"
-
+        "(STAR \
+        --runMode alignReads \
+        --runThreadN {threads} \
+        --genomeDir {params.genome} \
+        --readFilesIn {input.mate1} {input.mate2} \
+        --readFilesCommand zcat \
+        --outSAMunmapped None  \
+        --outFilterMultimapNmax {params.multimappers} \
+        --outFilterMultimapScoreRange 1 \
+        --outFilterScoreMinOverLread 0.9 \
+        --outFilterMatchNminOverLread 0.9 \
+        --outFilterMismatchNoverLmax 0.05 \
+        --outFileNamePrefix {params.outFileNamePrefix} \
+        --outSAMattributes All \
+        --outStd BAM_Unsorted \
+        --outSAMtype BAM Unsorted \
+        --outFilterType BySJout \
+        --outReadsUnmapped None \
+        --outSAMattrRGline ID:rcrunch SM:{params.experiment_id} \
+        --alignEndsType EndToEnd > {output.bam}; \
+        ) 1> {log.stdout} 2> {log.stderr}"
\ No newline at end of file
diff --git a/workflow/rules/rcrunch_transcriptomic_se.smk b/workflow/rules/rcrunch_transcriptomic_se.smk
new file mode 100644
index 0000000..24eb64c
--- /dev/null
+++ b/workflow/rules/rcrunch_transcriptomic_se.smk
@@ -0,0 +1,508 @@
+# --------------------------------------------------------------------------------
+# RCRUNCH
+# Author : Katsantoni Maria
+# Company: Mihaela Zavolan group, Biozentrum, Basel
+# --------------------------------------------------------------------------------
+# Transcriptomic RCRUNCH subpipeline for paired-end CLIP data
+# This approach is based on the hypothesis that the RBP is binding the mature
+# mRNA and thus the sequence of the mature mRNA is important for the binding.
+# ________________________________________________________________________________
+import os
+
+rule TR_initial_map_to_genome_se:
+    """
+        Initial mapping to the genome using STAR.
+    """
+    input:
+        genome = os.path.join(
+            config["output_dir"],
+            "STAR_index",
+            "chrNameLength.txt"
+            ),
+        reads = lambda wildcards:
+            expand(
+                os.path.join(
+                    config["output_dir"],
+                    "cutadapt",
+                    "{sample}_mate1.se.cutadapt_{format}.fastq.gz"),
+                sample=wildcards.sample,
+                format=config[wildcards.sample]['format']
+                ),
+
+    output:
+        bam = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "initial_map",
+                "{sample}",
+                "{sample}.se.bam")
+                ),
+        logfile = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "initial_map",
+                "{sample}",
+                "{sample}.se.Log.final.out")
+                ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        sample_id = "{sample}",
+        genome = os.path.join(
+            config["output_dir"],
+            "STAR_index"),
+        outFileNamePrefix = os.path.join(
+            config["output_dir"],
+            "TR",
+            "initial_map",
+            "{sample}",
+            "{sample}.se."),
+        multimappers = config["multimappers"]
+    
+    shadow: "full"
+    
+    singularity:
+        "docker://zavolab/star:2.6.0a"
+
+    threads: 12
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "preprocessing",
+            "{sample}_initial_map_to_genome.se.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "preprocessing",
+            "{sample}_initial_map_to_genome.se.stderr.log"),
+
+    shell:
+        "(STAR \
+        --runMode alignReads \
+        --runThreadN {threads} \
+        --genomeDir {params.genome} \
+        --readFilesIn {input.reads} \
+        --readFilesCommand zcat \
+        --outSAMunmapped None  \
+        --outFilterMultimapNmax {params.multimappers} \
+        --outFilterMultimapScoreRange 1 \
+        --outFileNamePrefix {params.outFileNamePrefix} \
+        --outSAMattributes All \
+        --outStd BAM_Unsorted \
+        --outSAMtype BAM Unsorted \
+        --outFilterScoreMinOverLread 0.9 \
+        --outFilterMatchNminOverLread 0.9 \
+        --outFilterMismatchNoverLmax 0.05 \
+        --outFilterType BySJout \
+        --outReadsUnmapped None \
+        --outSAMattrRGline ID:rcrunch SM:{params.sample_id} \
+        --alignEndsType EndToEnd > {output.bam}; \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_umi_collapse_se:
+    """
+        Alternative duplicate removal when UMIs are present using umitools.
+    """
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "remove_ncRNAs",
+            "{sample}.filtered.bam"),
+        bai = os.path.join(
+            config["output_dir"],
+            "TR",
+            "remove_ncRNAs",
+            "{sample}.filtered.bam.bai"),
+
+    output:
+        bam = temp(os.path.join(
+            config["output_dir"],
+            "TR",
+            "remove_duplicates",
+            "{sample}.umis.se.bam")
+            ),
+
+    params:
+        cluster_log_path = config["cluster_log"]
+
+    singularity:
+        "docker://zavolab/umi-tools:0.5.4"
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "preprocessing",
+            "{sample}_umi_collapse.se.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "preprocessing",
+            "{sample}_umi_collapse.se.stderr.log"),
+
+    shell:
+        "(umi_tools \
+        dedup \
+        -I {input.bam} \
+        -S {output.bam} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_bam_to_fastq_se:
+    """
+        Convert initially mapped reads back to fastq files to do mappings at the
+        transcriptome and genome level, using bedtools.
+    """
+    input:
+        bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}_merge_samples.sortedname.bam"),
+        bam_coord = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}_merge_samples.sorted.bam"),
+
+    output:
+        fastq_mate1 = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "{experiment}_{name}.mate1.se.unfiltered.fastq")
+                )
+    params:
+        cluster_log_path = config["cluster_log"],
+
+    singularity:
+        "docker://zavolab/bedtools:2.27.0"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_bam_to_fastq.se.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_bam_to_fastq.se.stderr.log"),
+
+    shell:
+        "(bedtools bamtofastq \
+        -i {input.bam} \
+        -fq {output.fastq_mate1};) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_bam_to_fastq_filtered_se:
+    """
+        Remove duplicate reads after converting bam to fastq
+        (when there are multimappers the reads will appear
+        multiple times in the fastq).
+    """
+    input:
+        mate1 = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}.mate1.se.unfiltered.fastq"),
+
+    output:
+        mate1 = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "{experiment}_{name}.mate1.se.fastq.gz")
+                ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        script = os.path.join(
+            workflow.basedir,
+            "scripts",
+            "mk_fastq_keep_unique_reads.py"),
+
+    singularity:
+        "docker://zavolab/rcrunch_python:1.0.5"
+
+    threads: 1
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_bam_to_fastq_filtered.se.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_bam_to_fastq_filtered.se.stderr.log"),
+
+    shell:
+        "(python {params.script} \
+        --fastq_in {input.mate1} \
+        --fastq_out {output.mate1};) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_tr_quantification_se:
+    """
+        Quantification of transcript isoforms using Salmon.
+    """
+    input:
+        transcriptome = os.path.join(
+            config["output_dir"],
+            "TR_salmon_index"),
+        reads = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}.mate1.se.fastq.gz"),
+
+    output:
+        quantification = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "tr_quantification_se_{name}",
+                "quant.sf")
+                ),
+
+    params:
+        cluster_log_path = config["cluster_log"],
+        library = lambda wildcards: get_library_type(
+            config[wildcards.experiment][wildcards.name][0],
+            config[config[wildcards.experiment][wildcards.name][0]]['sense']),
+        outdir = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "tr_quantification_se_{name}"),
+
+    shadow: "full"
+    singularity:
+        "docker://zavolab/salmon:0.11.0"
+
+    threads: 8
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_tr_quantification.se.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_tr_quantification.se.stderr.log"),
+
+    shell:
+        "(salmon quant \
+        -i {input.transcriptome} \
+        -l {params.library} \
+        -r {input.reads} \
+        -o {params.outdir} \
+        -p {threads} \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+rule TR_map_to_transcriptome_se:
+    """
+        Map to the transcriptome treating each transcript
+        as a "chromosome", using STAR.
+    """
+    input:
+        transcriptome = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "replicates_STAR_index",
+            "chrNameLength.txt"),
+        mate1 = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}.mate1.se.fastq.gz"),
+
+    output:
+        bam = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "tr_map",
+                "{name}",
+                "transcriptome.se.bam")
+                ),
+        logfile = temp(
+            os.path.join(
+                config["output_dir"],
+                "TR",
+                "{experiment}",
+                "tr_map",
+                "{name}",
+                "transcriptome.se.Log.final.out")
+                ),
+    params:
+        cluster_log_path = config["cluster_log"],
+        transcriptome = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "replicates_STAR_index"),
+        experiment_id = "{experiment}_{name}_transcriptome",
+        fragment_size = config["fragment_size"],
+        outFileNamePrefix = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "tr_map",
+            "{name}",
+            "transcriptome.se."),
+        multimappers = config['multimappers'] + 10,
+    
+    shadow: "full"
+    
+    singularity:
+        "docker://zavolab/star:2.6.0a"
+
+    threads: 12
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_map_to_transcriptome.se.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_map_to_transcriptome.se.stderr.log"),
+
+    shell:
+        "(STAR \
+        --runMode alignReads \
+        --runThreadN {threads} \
+        --genomeDir {params.transcriptome} \
+        --readFilesIn {input.mate1} \
+        --readFilesCommand zcat \
+        --outSAMunmapped None  \
+        --outFilterMultimapNmax {params.multimappers} \
+        --outFilterMultimapScoreRange 1 \
+        --outFileNamePrefix {params.outFileNamePrefix} \
+        --outFilterScoreMinOverLread 0.9 \
+        --outFilterMatchNminOverLread 0.9 \
+        --outFilterMismatchNoverLmax 0.05 \
+        --outSAMattributes All \
+        --outStd BAM_Unsorted \
+        --outSAMtype BAM Unsorted \
+        --outFilterType BySJout \
+        --outReadsUnmapped None \
+        --outSAMattrRGline ID:rcrunch SM:{params.experiment_id} \
+        --alignIntronMax 1 \
+        --alignEndsType EndToEnd > {output.bam}; \
+        ) 1> {log.stdout} 2> {log.stderr}"
+
+
+
+rule TR_map_to_genome_se:
+    """
+        Initial mapping to the genome using STAR.
+    """
+    input:
+        genome = os.path.join(
+            config["output_dir"],
+            "STAR_index",
+            "chrNameLength.txt"),
+        mate1 = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "{experiment}_{name}.mate1.se.fastq.gz"),
+
+    output:
+        bam = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "gn_map",
+            "{name}",
+            "genome.se.bam"),
+        logfile = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "gn_map",
+            "{name}",
+            "genome.se.Log.final.out"),
+    params:
+        cluster_log_path = config["cluster_log"],
+        experiment_id = "{experiment}_{name}_genome",
+        genome = os.path.join(
+            config["output_dir"],
+            "STAR_index"),
+        outFileNamePrefix = os.path.join(
+            config["output_dir"],
+            "TR",
+            "{experiment}",
+            "gn_map",
+            "{name}",
+            "genome.se."),
+        multimappers = config["multimappers"]
+    
+    shadow: "full"
+    
+    singularity:
+        "docker://zavolab/star:2.6.0a"
+
+    threads: 12
+
+    log:
+        stdout = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_map_to_genome.se.stdout.log"),
+        stderr = os.path.join(
+            config["local_log"],
+            "TR",
+            "{experiment}",
+            "{name}_map_to_genome.se.stderr.log"),
+
+    shell:
+        "(STAR \
+        --runMode alignReads \
+        --runThreadN {threads} \
+        --genomeDir {params.genome} \
+        --readFilesIn {input.mate1} \
+        --readFilesCommand zcat \
+        --outSAMunmapped None  \
+        --outFilterMultimapNmax {params.multimappers} \
+        --outFilterMultimapScoreRange 1 \
+        --outFilterScoreMinOverLread 0.9 \
+        --outFilterMatchNminOverLread 0.9 \
+        --outFilterMismatchNoverLmax 0.05 \
+        --outFileNamePrefix {params.outFileNamePrefix} \
+        --outSAMattributes All \
+        --outStd BAM_Unsorted \
+        --outSAMtype BAM Unsorted \
+        --outFilterType BySJout \
+        --outReadsUnmapped None \
+        --outSAMattrRGline ID:rcrunch SM:{params.experiment_id} \
+        --alignEndsType EndToEnd > {output.bam}; \
+        ) 1> {log.stdout} 2> {log.stderr}"
\ No newline at end of file

From 0783378e17f4365c539960401396a5acb0f08d01 Mon Sep 17 00:00:00 2001
From: BIOPZ-Katsantoni Maria <maria.katsantoni@unibas.ch>
Date: Fri, 13 Jan 2023 11:58:45 +0100
Subject: [PATCH 3/4] feat: addition of testing for single, paired end and TR
 in addition to GN

---
 .../performance_evaluation.py                 | 148 +++++++++---------
 1 file changed, 76 insertions(+), 72 deletions(-)

diff --git a/test/test_singularity_execution/performance_evaluation.py b/test/test_singularity_execution/performance_evaluation.py
index fb3940b..0196c7f 100755
--- a/test/test_singularity_execution/performance_evaluation.py
+++ b/test/test_singularity_execution/performance_evaluation.py
@@ -12,80 +12,84 @@ def main():
     __doc__ = "Evaluate the run based on expected values"
     outpath = os.path.dirname(os.path.realpath(__file__))
     os.chdir(outpath)
-    a = pd.read_csv(
-        'results/GN/PUM2_K562_ENCSR661ICQ_2/PUM2_K562_ENCSR661ICQ_2_total_peaks.csv',
-        sep='\t', header=0, index_col=None)
-    expected = pd.read_csv(
-        'means_crosslink.tsv',
-        sep='\t', header=0, index_col=None)
-    a['center'] = a['start'] + a['crosslink']
-
-    for index, row in a.iterrows():
-        distance = (
-            row['center'] - expected['crosslink'][expected['strand'] == row['strand']]).abs().min()
-        if distance <= 50:
-            a.loc[index, 'distance'] = distance
-        else:
-            a.loc[index, 'distance'] = np.nan
-    rmsd = np.sqrt(np.sum((a["distance"]**2)) / len(a[~a.distance.isna()]))
-    outlier_percentage = len(a[a.distance.isna()])/len(a) * 100
-
-
-    # expected  = pd.read_csv(
-    #     'means_peak_center.tsv',
-    #     sep='\t', header=0, index_col=None)
-    # a['center'] = a['start'] +  a['mi']
-
-    # for index, row in a.iterrows():
-    #     distance = (row['center'] - expected['peak_center'][expected['strand'] == row['strand']]).abs().min()
-    #     if distance <= 50:
-    #         a.loc[index, 'distance'] = distance
-    #     else:
-    #         a.loc[index, 'distance'] = np.nan
-    # sys.stdout.write(f'rmsd of peak_center as center: {np.sqrt(np.sum((a["distance"]**2)) / len(a[~a.distance.isna()]))}\n'
-    # )
-    # sys.stdout.write(f'percentage of outlier peaks: {len(a[a.distance.isna()])/len(a) * 100} %\n'
-    # )
-
-    # add the new test best pwm
-    path = f'results/GN/PUM2_K562_ENCSR661ICQ_2/motif_analysis_final/peak_center/motif_enrichment.tsv'
-    a = pd.read_csv(path, sep='\t', header=0, index_col=0)
-    b = a.loc[a['mean_enrichment'][a['motif'].str.contains('trimmed')].idxmax(), 'motif']
-    run = b.split('_')[-1].split('.')[0]
-    name = path.split('/')[-4]
-    path_new = path.replace(
-        'motif_analysis_final',
-        f'motif_analysis_crossvalidation/{run}').replace('motif_enrichment.tsv', f'wlib/{b}')
-    shutil.copy(path_new, f'wlib/{name}')
-
-    df = pd.DataFrame()
-    for path1 in glob.glob('wlib/*'):
-        name1 = path1.split('/')[-1]
-        for path2 in glob.glob('wlib/*'):
-            name2 = path2.split('/')[-1]
-            sim = getsimilarity(path1, path2)
-            df.loc[name1, name2] = sim
-    mean_sim = np.mean([df.loc['PUM2_K562_ENCSR661ICQ_2', :].mean(), df.loc[:, 'PUM2_K562_ENCSR661ICQ_2'].mean()])
-    
-    sys.stdout.write(f'rmsd of crosslink centers: {rmsd}\n')
-    sys.stdout.write(f'percentage of outlier peaks: {outlier_percentage} % \n')
-    sys.stdout.write(f'motif similarity: {mean_sim}\n')
-    if rmsd < 1.5:
-        sys.stdout.write('Rmsd is low. Test passed. 1/3\n')
-    else:
-        sys.stdout.write('Rmsd seems to be too high 1/3\n')
-    if outlier_percentage < 5:
-        sys.stdout.write('Few outliers detected. Test passed. 2/3\n')
-    else:
-        sys.stdout.write('Rmsd seems to be too high 2/3\n')
-    if mean_sim > 0.75:
-        sys.stdout.write('Motif similarity is high.Test passed 3/3\n')
-    else:
-        sys.stdout.write('Similarity seems to be low 3/3\n')
+    for runtype in ['GN', 'TR']:
+        for sample in ["PUM2_K562_ENCSR661ICQ_2_paired_end", "PUM2_K562_ENCSR661ICQ_2_single_end"]:
+            a = pd.read_csv(
+                f'results/{runtype}/{sample}/{sample}_total_peaks.csv',
+                sep='\t', header=0, index_col=None)
+            expected = pd.read_csv(
+                'means_crosslink.tsv',
+                sep='\t', header=0, index_col=None)
+            a['center'] = a['start'] + a['crosslink']
+
+            for index, row in a.iterrows():
+                distance = (
+                    row['center'] - expected['crosslink'][expected['strand'] == row['strand']]).abs().min()
+                if distance <= 50:
+                    a.loc[index, 'distance'] = distance
+                else:
+                    a.loc[index, 'distance'] = np.nan
+            if runtype == "GN":
+                rmsd = np.sqrt(np.sum((a["distance"]**2)) / len(a[~a.distance.isna()]))
+                outlier_percentage = len(a[a.distance.isna()])/len(a) * 100
+
+
+            # expected  = pd.read_csv(
+            #     'means_peak_center.tsv',
+            #     sep='\t', header=0, index_col=None)
+            # a['center'] = a['start'] +  a['mi']
+
+            # for index, row in a.iterrows():
+            #     distance = (row['center'] - expected['peak_center'][expected['strand'] == row['strand']]).abs().min()
+            #     if distance <= 50:
+            #         a.loc[index, 'distance'] = distance
+            #     else:
+            #         a.loc[index, 'distance'] = np.nan
+            # sys.stdout.write(f'rmsd of peak_center as center: {np.sqrt(np.sum((a["distance"]**2)) / len(a[~a.distance.isna()]))}\n'
+            # )
+            # sys.stdout.write(f'percentage of outlier peaks: {len(a[a.distance.isna()])/len(a) * 100} %\n'
+            # )
+
+            # add the new test best pwm
+            path = f'results/{runtype}/{sample}/motif_analysis_final/peak_center/motif_enrichment.tsv'
+            a = pd.read_csv(path, sep='\t', header=0, index_col=0)
+            b = a.loc[a['mean_enrichment'][a['motif'].str.contains('trimmed')].idxmax(), 'motif']
+            run = b.split('_')[-1].split('.')[0]
+            name = path.split('/')[-4]
+            path_new = path.replace(
+                'motif_analysis_final',
+                f'motif_analysis_crossvalidation/{run}').replace('motif_enrichment.tsv', f'wlib/{b}')
+            shutil.copy(path_new, f'wlib/PUM2_K562_ENCSR661ICQ_2')
+
+            df = pd.DataFrame()
+            for path1 in glob.glob('wlib/*'):
+                name1 = path1.split('/')[-1]
+                for path2 in glob.glob('wlib/*'):
+                    name2 = path2.split('/')[-1]
+                    sim = getsimilarity(path1, path2)
+                    df.loc[name1, name2] = sim
+            mean_sim = np.mean([df.loc['PUM2_K562_ENCSR661ICQ_2', :].mean(), df.loc[:, 'PUM2_K562_ENCSR661ICQ_2'].mean()])
+            sys.stdout.write(f'{runtype} {sample} evaluation\n') 
+            if runtype == "GN":
+                sys.stdout.write(f'rmsd of crosslink centers: {rmsd}\n')
+                sys.stdout.write(f'percentage of outlier peaks: {outlier_percentage} % \n')
+            
+            sys.stdout.write(f'motif similarity: {mean_sim}\n')
+            if runtype == "GN":
+                if rmsd < 1.5:
+                    sys.stdout.write('Rmsd is low. Peaks are highly overlapping. Test passed. 1/3\n')
+                else:
+                    sys.stdout.write('Rmsd seems to be too high. Peaks are far apart. 1/3\n')
+                if outlier_percentage < 5:
+                    sys.stdout.write('Few outliers detected. Test passed. 2/3\n')
+                else:
+                    sys.stdout.write('Too many peaks that are not found in the test runs 2/3\n')
+            if mean_sim > 0.75:
+                sys.stdout.write('Motif similarity is high.Test passed 3/3\n')
+            else:
+                sys.stdout.write('Motif similarity seems to be low 3/3\n\n')
     return
 
-
-
 def getSimilarityScore(wm1, wm2):
     wm1 = np.array(wm1)
     wm2 = np.array(wm2)

From 8457d7a2e2e50a7487692835ce0dcbedcceb00a4 Mon Sep 17 00:00:00 2001
From: BIOPZ-Katsantoni Maria <maria.katsantoni@unibas.ch>
Date: Mon, 16 Jan 2023 23:23:04 +0100
Subject: [PATCH 4/4] feat: remove paired specification from config and infer
 from input

---
 config.yaml                                   |  2 +-
 test/test_singularity_execution/config.yaml   |  8 ++---
 .../performance_evaluation.py                 | 29 +++++++++++--------
 workflow/rules/common.smk                     |  6 ++--
 workflow/rules/rcrunch_genomic.smk            | 12 ++++----
 workflow/rules/rcrunch_transcriptomic.smk     | 19 ++++++------
 6 files changed, 40 insertions(+), 36 deletions(-)

diff --git a/config.yaml b/config.yaml
index 0b6382a..bb7a088 100644
--- a/config.yaml
+++ b/config.yaml
@@ -105,7 +105,7 @@
                 # if file name is ENCFF041KJT.fastq.gz then add ENCFF041KJT 
                 # files are assumed to be in fastq.gz format
                 mate2: "ENCFF462SCV", 
-                # if file name is ENCFF041KJT.fastq.gz then add ENCFF041KJT 
+                # if file name is ENCFF462SCV.fastq.gz then add ENCFF462SCV 
                 # files are assumed to be in fastq.gz format. 
                 paired: 2, 
                 # leave as is. used for future support of single-end data
diff --git a/test/test_singularity_execution/config.yaml b/test/test_singularity_execution/config.yaml
index 648b1fa..c77289c 100644
--- a/test/test_singularity_execution/config.yaml
+++ b/test/test_singularity_execution/config.yaml
@@ -56,10 +56,10 @@
                               background_type: "standard"}
   
   # Sample specific info - REQUIRED
-  ENCFF462SCV: { mate1: "ENCFF462SCV.chr20", paired: 1, sense: 1, format: "encode", dup_type: "umis",  mate1_3p: "../input_files_test/mate2_3p.fasta", mate1_5p: "../input_files_test/mate2_5p.fasta" }
-  ENCFF495ZPY: { mate1: "ENCFF495ZPY.chr20", paired: 1, sense: 1, format: "encode", dup_type: "umis",  mate1_3p: "../input_files_test/mate2_3p.fasta", mate1_5p: "../input_files_test/mate2_5p.fasta" }
-  ENCFF041KJT_ENCFF462SCV: { mate1: "ENCFF041KJT.chr20", mate2: "ENCFF462SCV.chr20", paired: 2, sense: 2, format: "encode", dup_type: "umis", mate1_3p: "../input_files_test/mate1_3p.fasta", mate1_5p: "../input_files_test/mate1_5p.fasta", mate2_3p: "../input_files_test/mate2_3p.fasta", mate2_5p: "../input_files_test/mate2_5p.fasta" }
-  ENCFF616FCF_ENCFF495ZPY: { mate1: "ENCFF616FCF.chr20", mate2: "ENCFF495ZPY.chr20", paired: 2, sense: 2, format: "encode", dup_type: "umis", mate1_3p: "../input_files_test/mate1_3p.fasta", mate1_5p: "../input_files_test/mate1_5p.fasta", mate2_3p: "../input_files_test/mate2_3p.fasta", mate2_5p: "../input_files_test/mate2_5p.fasta" }
+  ENCFF462SCV: { mate1: "ENCFF462SCV.chr20", sense: 1, format: "encode", dup_type: "umis",  mate1_3p: "../input_files_test/mate2_3p.fasta", mate1_5p: "../input_files_test/mate2_5p.fasta" }
+  ENCFF495ZPY: { mate1: "ENCFF495ZPY.chr20", sense: 1, format: "encode", dup_type: "umis",  mate1_3p: "../input_files_test/mate2_3p.fasta", mate1_5p: "../input_files_test/mate2_5p.fasta" }
+  ENCFF041KJT_ENCFF462SCV: { mate1: "ENCFF041KJT.chr20", mate2: "ENCFF462SCV.chr20", sense: 2, format: "encode", dup_type: "umis", mate1_3p: "../input_files_test/mate1_3p.fasta", mate1_5p: "../input_files_test/mate1_5p.fasta", mate2_3p: "../input_files_test/mate2_3p.fasta", mate2_5p: "../input_files_test/mate2_5p.fasta" }
+  ENCFF616FCF_ENCFF495ZPY: { mate1: "ENCFF616FCF.chr20", mate2: "ENCFF495ZPY.chr20", sense: 2, format: "encode", dup_type: "umis", mate1_3p: "../input_files_test/mate1_3p.fasta", mate1_5p: "../input_files_test/mate1_5p.fasta", mate2_3p: "../input_files_test/mate2_3p.fasta", mate2_5p: "../input_files_test/mate2_5p.fasta" }
   # ----------------------------------------------------------------------------------------------
   # RCRUNCH specific options - REQUIRED - DEFAULT VALUES SUGGESTED 
   method_types : ["GN", "TR"]         # options: GN, TR  if you want BOTH methods, you can specify ['GN', 'TR']
diff --git a/test/test_singularity_execution/performance_evaluation.py b/test/test_singularity_execution/performance_evaluation.py
index 0196c7f..a2e166d 100755
--- a/test/test_singularity_execution/performance_evaluation.py
+++ b/test/test_singularity_execution/performance_evaluation.py
@@ -7,6 +7,7 @@
 import re
 import io
 
+
 def main():
     """ Main function """
     __doc__ = "Evaluate the run based on expected values"
@@ -68,28 +69,32 @@ def main():
                     name2 = path2.split('/')[-1]
                     sim = getsimilarity(path1, path2)
                     df.loc[name1, name2] = sim
-            mean_sim = np.mean([df.loc['PUM2_K562_ENCSR661ICQ_2', :].mean(), df.loc[:, 'PUM2_K562_ENCSR661ICQ_2'].mean()])
-            sys.stdout.write(f'{runtype} {sample} evaluation\n') 
+            mean_sim = np.mean([df.loc['PUM2_K562_ENCSR661ICQ_2', :].mean(),
+                                df.loc[:, 'PUM2_K562_ENCSR661ICQ_2'].mean()])
+            sys.stdout.write(f'''{runtype.replace("GN","Genomic").replace("TR","Transcriptomic")} {"-".join(sample.split("_")[-2:])} evaluation\n''')
+            sys.stdout.write(f'motif similarity: {mean_sim}\n')
             if runtype == "GN":
                 sys.stdout.write(f'rmsd of crosslink centers: {rmsd}\n')
                 sys.stdout.write(f'percentage of outlier peaks: {outlier_percentage} % \n')
             
-            sys.stdout.write(f'motif similarity: {mean_sim}\n')
+
+            if mean_sim > 0.75:
+                sys.stdout.write('Motif similarity is high.Test passed 1/3\n')
+            else:
+                sys.stdout.write('Motif similarity seems to be low 1/3\n')
             if runtype == "GN":
                 if rmsd < 1.5:
-                    sys.stdout.write('Rmsd is low. Peaks are highly overlapping. Test passed. 1/3\n')
+                    sys.stdout.write('Rmsd is low. Peaks are highly overlapping. Test passed. 2/3\n')
                 else:
-                    sys.stdout.write('Rmsd seems to be too high. Peaks are far apart. 1/3\n')
+                    sys.stdout.write('Rmsd seems to be too high. Peaks are far apart. 2/3\n')
                 if outlier_percentage < 5:
-                    sys.stdout.write('Few outliers detected. Test passed. 2/3\n')
+                    sys.stdout.write('Few outliers detected. Test passed. 3/3\n')
                 else:
-                    sys.stdout.write('Too many peaks that are not found in the test runs 2/3\n')
-            if mean_sim > 0.75:
-                sys.stdout.write('Motif similarity is high.Test passed 3/3\n')
-            else:
-                sys.stdout.write('Motif similarity seems to be low 3/3\n\n')
+                    sys.stdout.write('Too many peaks are not found in the test runs 3/3\n')
+            sys.stdout.write('\n')
     return
 
+
 def getSimilarityScore(wm1, wm2):
     wm1 = np.array(wm1)
     wm2 = np.array(wm2)
@@ -123,7 +128,7 @@ def get_wm(path):
 def getsimilarity(wm1_path, wm2_path):
     wm1 = get_wm(wm1_path)
     wm2 = get_wm(wm2_path)
-    similarity =  (
+    similarity = (
         (2 * getSimilarityScore(wm1, wm2)) / (
             getSimilarityScore(wm1, wm1) + getSimilarityScore(wm2, wm2)))
     return similarity
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index fb1b213..43be315 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -12,11 +12,11 @@ def get_mates_number(sample):
     """Get library type"""
     try:
         if config[sample]['mate2']:
-            return 2
+            return '2'
         else:
-            return 1
+            return '1'
     except KeyError:
-        return 2
+        return '1'
 
 def get_library_type(sample, sense):
     try:
diff --git a/workflow/rules/rcrunch_genomic.smk b/workflow/rules/rcrunch_genomic.smk
index e117c45..9d38ce3 100644
--- a/workflow/rules/rcrunch_genomic.smk
+++ b/workflow/rules/rcrunch_genomic.smk
@@ -112,7 +112,7 @@ rule GN_flag_ncRNA_reads:
         ncRNAs = config['ncRNAs'],
         ncRNA_biotypes = expand(config['ncRNA_biotypes']),
         paired = lambda wildcards:
-            config[wildcards.sample]['paired'],
+            get_mates_number(wildcards.sample),
         sense = lambda wildcards:
             config[wildcards.sample]['sense'],
         
@@ -583,7 +583,7 @@ rule GN_read_frequencies:
     params:
         cluster_log_path = config["cluster_log"],
         paired = lambda wildcards:
-            config[config[wildcards.experiment][wildcards.name][0]]['paired'],
+            get_mates_number(config[wildcards.experiment][wildcards.name][0]),
         script = os.path.join(
             workflow.basedir,
             "scripts",
@@ -730,9 +730,9 @@ rule GN_genome_coverage:
         sense_b = lambda wildcards:
             config[config[wildcards.experiment]["smis"][0]]['sense'],
         paired_f = lambda wildcards:
-            config[config[wildcards.experiment]["replicates"][0]]['paired'],
+            get_mates_number(config[wildcards.experiment]["replicates"][0]),
         paired_b = lambda wildcards:
-            config[config[wildcards.experiment]["smis"][0]]['paired'],
+            get_mates_number(config[wildcards.experiment]["smis"][0]),
         prefix = "{experiment}.gn_coverage",
         background_type = lambda wildcards:
             config[wildcards.experiment]["background_type"],
@@ -932,9 +932,9 @@ rule GN_fit_peaks:
         window_size = lambda wildcards:
             config[wildcards.experiment]['window_f'],
         paired_f = lambda wildcards:
-            config[config[wildcards.experiment]["replicates"][0]]['paired'],
+            get_mates_number(config[wildcards.experiment]["replicates"][0]),
         paired_b = lambda wildcards:
-            config[config[wildcards.experiment]["smis"][0]]['paired'],
+            get_mates_number(config[wildcards.experiment]["smis"][0]),
         sense_f = lambda wildcards:
             config[config[wildcards.experiment]["replicates"][0]]['sense'],
         sense_b = lambda wildcards:
diff --git a/workflow/rules/rcrunch_transcriptomic.smk b/workflow/rules/rcrunch_transcriptomic.smk
index b153535..c432351 100644
--- a/workflow/rules/rcrunch_transcriptomic.smk
+++ b/workflow/rules/rcrunch_transcriptomic.smk
@@ -111,7 +111,7 @@ rule TR_flag_ncRNA_reads:
         ncRNAs = config['ncRNAs'],
         ncRNA_biotypes = expand(config['ncRNA_biotypes']),
         paired = lambda wildcards:
-            config[wildcards.sample]['paired'],
+            get_mates_number(wildcards.sample),
         sense = lambda wildcards:
             config[wildcards.sample]['sense'],
 
@@ -611,8 +611,7 @@ rule TR_read_frequencies:
             "scripts",
             "mk_bam_get_read_frequencies.py"),
         paired = lambda wildcards:
-                    config[config[wildcards.experiment][wildcards.name][0]]['paired'],
-
+            get_mates_number(config[wildcards.experiment][wildcards.name][0]),
     singularity:
         "docker://zavolab/rcrunch_python:1.0.5"
 
@@ -1060,7 +1059,7 @@ rule TR_assign_to_gn_tr:
             "{experiment}",
             "{name}_assign_to_gn_tr_temp"),
         paired = lambda wildcards:
-                    config[config[wildcards.experiment][wildcards.name][0]]['paired'],
+            get_mates_number(config[wildcards.experiment][wildcards.name][0]),
         sense = lambda wildcards:
                     config[config[wildcards.experiment][wildcards.name][0]]['sense'],
 
@@ -1374,9 +1373,9 @@ rule TR_genome_coverage:
         sense_b = lambda wildcards:
             config[config[wildcards.experiment]["smis"][0]]['sense'],
         paired_f = lambda wildcards:
-            config[config[wildcards.experiment]["replicates"][0]]['paired'],
+            get_mates_number(config[wildcards.experiment]["replicates"][0]),
         paired_b = lambda wildcards:
-            config[config[wildcards.experiment]["smis"][0]]['paired'],
+            get_mates_number(config[wildcards.experiment]["smis"][0]),
         prefix = "{experiment}.genome_coverage",
         background_type = lambda wildcards:
             config[wildcards.experiment]["background_type"],
@@ -1496,9 +1495,9 @@ rule TR_transcriptome_coverage:
         sense_b = lambda wildcards:
             config[config[wildcards.experiment]["smis"][0]]['sense'],
         paired_f = lambda wildcards:
-            config[config[wildcards.experiment]["replicates"][0]]['paired'],
+            get_mates_number(config[wildcards.experiment]["replicates"][0]),
         paired_b = lambda wildcards:
-            config[config[wildcards.experiment]["smis"][0]]['paired'],
+            get_mates_number(config[wildcards.experiment]["smis"][0]),
         prefix = "{experiment}.transcriptome_coverage",
         background_type = lambda wildcards:
             config[wildcards.experiment]["background_type"]
@@ -1779,9 +1778,9 @@ rule TR_fit_peaks:
         window_size = lambda wildcards:
             config[wildcards.experiment]['window_f'],
         paired_f = lambda wildcards:
-            config[config[wildcards.experiment]["replicates"][0]]['paired'],
+            get_mates_number(config[wildcards.experiment]["replicates"][0]),
         paired_b = lambda wildcards:
-            config[config[wildcards.experiment]["smis"][0]]['paired'],
+            get_mates_number(config[wildcards.experiment]["smis"][0]),
         sense_f = lambda wildcards:
             config[config[wildcards.experiment]["replicates"][0]]['sense'],
         sense_b = lambda wildcards: