--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/alleyoop.xml	Sun Jan 20 06:51:15 2019 -0500
@@ -0,0 +1,168 @@
+<tool id="alleyoop" name="Alleyoop" version="@TOOL_VERSION@">
+    <description>- post-processing and QC of Slamdunk analyses</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <version_command>alleyoop --version</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+#import re
+#if $reference_source.reference_source_selector == 'history':
+    ln -f -s '$reference_source.ref_file' reference.fa &&
+    ln -f -s '$reference_source.ref_file.fields.path' reference.fa &&
+#end if
+mkdir ./filter &&
+#for $bam in $reads:
+    #set $ext = ""
+    #set $bam_name = re.sub('[^\w\-\.]', '_', str($bam.element_identifier))
+    #if not $bam_name.endswith('.bam')
+        #set $ext = ".bam"
+    #end if
+    ln -s '$bam' './filter/${bam_name}${ext}' &&
+    ln -s '$bam.metadata.bam_index' './filter/${bam_name}${ext}.bai' &&
+#end for
+mkdir ./count &&
+#for $tsv in $count_tsvs:
+    #set $ext = ""
+    #set $tsv_name = re.sub('[^\w\-\.]', '_', str($tsv.element_identifier))
+    #if not $tsv_name.endswith('_tcount.tsv')
+        #set $ext = "_tcount.tsv"
+    #end if
+    ln -s '$tsv' './count/${tsv_name}${ext}' &&
+#end for
+mkdir ./snp &&
+#for $vcf in $variants:
+    #set $ext = ""
+    #set $vcf_name = re.sub('[^\w\-\.]', '_', str($vcf.element_identifier))
+    #if not $vcf_name.endswith('_snp.vcf')
+        #set $ext = "_snp.vcf"
+    #end if
+    ln -s '$vcf' './snp/${vcf_name}${ext}' &&
+#end for
+alleyoop summary -o ./summary.txt -t ./count ./filter/*bam &&
+alleyoop rates -o ./stats -r reference.fa -mq $mq ./filter/*bam &&
+alleyoop utrrates -o ./stats -r reference.fa -b $Reference -t \${GALAXY_SLOTS:-1} -l $l -mq $mq ./filter/*bam &&
+alleyoop tcperreadpos -o ./stats -r reference.fa -s ./snp -t \${GALAXY_SLOTS:-1} -l $l -mq $mq ./filter/*bam &&
+alleyoop tcperutrpos -o ./stats -r reference.fa -s ./snp -t \${GALAXY_SLOTS:-1} -l $l -b $Reference -mq $mq ./filter/*bam
+#if $bams:
+    && alleyoop read-separator -o ./splitbams -s ./snp -r reference.fa ./filter/*bam
+#end if
+    ]]></command>
+    <inputs>
+        <expand macro="reference_files" />
+        <param name="reads" type="data" format="sam,bam" multiple="True" label="Slamdunk BAM files" />
+        <param name="count_tsvs" type="data" format="tabular" multiple="True" label="Slamdunk Count TSV files" />
+        <param name="variants" type="data" format="vcf" multiple="True" label="Slamdunk VCF files" />
+        <param argument="-mq" type="integer" label="Minimum base quality"
+                value="27" min="0"
+                help="Minimum base quality for T>C conversions (default: 27)." />
+        <param argument="-l" type="integer" label="Read length"
+            value="50" min="50" help="Maximum read length (before trimming)." />
+        <param name="bams" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output T>C separated BAM files?" help="If this option is set to Yes, the Alleyoop read-separator module will be run to output BAM files of separated T>C reads from non T>C reads. Default: No"/>
+    </inputs>
+    <outputs>
+         <collection name="outputSummary" type="list" label="${} on ${on_string}: Summary tables">
+            <discover_datasets pattern="(?P&lt;name&gt;.+)\.txt$" format="tabular" directory="." visible="false" />
+        </collection>
+        <collection name="outputStats" type="list" label="${} on ${on_string}: Stats tables">
+            <discover_datasets pattern="(?P&lt;name&gt;.+)\.csv$" format="tabular" directory="./stats" visible="false" />
+        </collection>
+        <collection name="outputTCReads" type="list" label="${} on ${on_string}: TC Reads">
+            <discover_datasets pattern="(?P&lt;name&gt;.+)_TCReads.bam$" format="bam" directory="./splitbams" visible="false" />
+            <filter>bams</filter>
+        </collection>
+        <collection name="outputbkgdReads" type="list" label="${} on ${on_string}: Background Reads">
+            <discover_datasets pattern="(?P&lt;name&gt;.+)_backgroundReads.bam$" format="bam" directory="./splitbams" visible="false" />
+            <filter>bams</filter>
+        </collection>
+    </outputs>
+    <tests>
+        <!-- Ensure default output works -->
+        <test expect_num_outputs="2">
+            <param name="reference_source_selector" value="history" />
+            <param name="ref_file" ftype="fasta" value="ref.fa" />
+            <param name="Reference" ftype="bed" value="actb.bed" />
+            <param name="reads" ftype="bam" value="reads1.bam,reads2.bam" />
+            <param name="count_tsvs" ftype="tabular" value="reads1_tcount.tsv,reads2_tcount.tsv" />
+            <param name="variants" ftype="vcf" value="reads1_snp.vcf,reads2_snp.vcf" />
+            <param name="l" value="100" />
+            <param name="mq" value="27" />
+            <output_collection name="outputSummary" count="2">
+                <element name="summary" ftype="tabular" file="summary.txt" />
+            </output_collection>
+            <output_collection name="outputStats" count="8">
+                <element name="reads1_overallrates" ftype="tabular" file="reads1_overallrates.csv"  />
+            </output_collection>
+        </test>
+        <!-- Ensure BAM output works -->
+        <test expect_num_outputs="4">
+            <param name="reference_source_selector" value="history" />
+            <param name="ref_file" ftype="fasta" value="ref.fa" />
+            <param name="Reference" ftype="bed" value="actb.bed" />
+            <param name="reads" ftype="bam" value="reads1.bam,reads2.bam" />
+            <param name="count_tsvs" ftype="tabular" value="reads1_tcount.tsv,reads2_tcount.tsv" />
+            <param name="variants" ftype="vcf" value="reads1_snp.vcf,reads2_snp.vcf" />
+            <param name="l" value="100" />
+            <param name="mq" value="27" />
+            <param name="bams" value="True" />
+            <output_collection name="outputTCReads" count="2">
+                <element name="reads1" ftype="bam" file="reads1_TCReads.bam" />
+            </output_collection>
+            <output_collection name="outputbkgdReads" count="2">
+                <element name="reads1" ftype="bam" file="reads1_backgroundReads.bam" />
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+SLAMseq is a novel sequencing protocol that directly uncovers 4-thiouridine incorporation events in RNA by high-throughput sequencing. When combined with metabolic labeling protocols, SLAM-seq allows to study the intracellular RNA dynamics, from transcription, RNA processing to RNA stability.
+Original publication: `Herzog et al., Nature Methods, 2017; doi:10.1038/nmeth.4435 <>`_
+Alleyoop (Additional sLamdunk heLpEr tools for anY diagnOstics Or Plots) is a collection of tools for post-processing and running diagnostics on Slamdunk analyses. This tool works on the output of the **Slamdunk** tool and requires all the inputs listed in the table below.
+===============  ==========================================================================================================================================================
+Parameter        Description
+===============  ==========================================================================================================================================================
+**Genome**       The reference fasta file (Genome assembly).
+**Reference**    BED-file containing coordinates for 3' UTRs.
+**Reads**        Slamdunk Filtered BAM files.
+**Counts**       Slamdunk Count TSV files.
+**Variants**     Slandunk VCF files.
+**Read length**  Maximum length of reads (usually 50, 100, 150).
+===============  ==========================================================================================================================================================
+This tool runs the **Alleyoop** *summary*, *rates*, *utrrates*, *tcperreadpos* and *tcperutrpos* modules and outputs:
+* Tab-separated *summary* files from the summary module with mapping and PCA statistics
+* Tab-separated *stats* files from the rates, utrrates, tcperreadpos and tcperutrpos modules
+Optionally, the *read-separator* module can be run to output BAM files of separated T>C and non T>C reads.
+The summary and stats files can be summarised and visualised with MultiQC. An example MultiQC report can be seen here_. For information on these modules see the `Alleyoop documentation`_.
+.. _`Alleyoop documentation`:
+.. _here:
+    ]]></help>
+    <citations>
+        <expand macro="citations" />
+    </citations>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Sun Jan 20 06:51:15 2019 -0500
@@ -0,0 +1,42 @@
+<?xml version="1.0"?>
+    <token name="@TOOL_VERSION@">0.3.3</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">slamdunk</requirement>
+        </requirements>
+    </xml>
+    <xml name="reference_files">
+        <conditional name="reference_source">
+            <param name="reference_source_selector" type="select" label="Reference genome FASTA file" help="Select a built-in FASTA file (if available) or one from the history">
+                <option value="cached">Use a built-in FASTA</option>
+                <option value="history">Use a FASTA from history</option>
+            </param>
+            <when value="cached">
+                <param name="ref_file" type="select" label="Use built-in FASTA" help="Select genome from the list">
+                    <options from_data_table="all_fasta">
+                        <filter type="sort_by" column="2" />
+                        <validator type="no_options" message="No reference genomes are available" />
+                    </options>
+                    <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the FASTA" help="You can upload a FASTA sequence to the history and use it as reference" />
+            </when>
+        </conditional>
+        <param name="Reference" type="data" format="bed" label="Reference 3'UTRs BED file" />
+    </xml>
+    <xml name="citations">
+        <citation type="bibtex">
+ @misc{Neumann2018,
+  author = {Neumann, Tobias},
+  year = {2018},
+  title = Slamdunk},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  url = {},
+        </citation>
+    </xml>
\ No newline at end of file
--- a/slamdunk.xml	Thu Oct 11 20:33:07 2018 -0400
+++ b/slamdunk.xml	Sun Jan 20 06:51:15 2019 -0500
@@ -1,8 +1,9 @@
-<tool id="slamdunk" name="Slamdunk" version="0.3.3">
+<tool id="slamdunk" name="Slamdunk" version="@TOOL_VERSION@+galaxy1">
     <description>- streamlining SLAM-seq analysis with ultra-high sensitivity</description>
-    <requirements>
-        <requirement type="package" version="0.3.3">slamdunk</requirement>
-    </requirements>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
     <version_command>slamdunk --version</version_command>
     <command detect_errors="exit_code"><![CDATA[
     #if $reference_source.reference_source_selector == 'history':
@@ -30,26 +31,8 @@
-        <conditional name="reference_source">
-            <param name="reference_source_selector" type="select" label="Genome" help="Select a built-in FASTA file (if available) or one from the history">
-                <option value="cached">Use a built-in FASTA</option>
-                <option value="history">Use a FASTA from history</option>
-            </param>
-            <when value="cached">
-                <param name="ref_file" type="select" label="Use built-in FASTA" help="Select genome from the list">
-                    <options from_data_table="all_fasta">
-                        <filter type="sort_by" column="2" />
-                        <validator type="no_options" message="No reference genomes are available" />
-                    </options>
-                    <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
-                </param>
-            </when>
-            <when value="history">
-                <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the FASTA" help="You can upload a FASTA sequence to the history and use it as reference" />
-            </when>
-        </conditional>
-        <param name="Reference" type="data" format="bed" />
-        <param name="Reads" type="data" format="fastqsanger,fastqsanger.gz" />
+        <expand macro="reference_files" />
+        <param name="Reads" type="data" format="fastqsanger,fastqsanger.gz" label="FASTQ files"/>
         <section name="multimapper" title="Multimapper recovery"
             <section name="filterbed"
@@ -79,8 +62,6 @@
             <param name="minMQ" type="integer" min="0" value="2"
                 label="Minimum mapping quality"
                 help="Minimum mapping quality to consider alignments (default: 2)." />
-            <param name="minMQ" type="integer" label="Minimum mapping quality" min="0" value="2"
-                help="Minimum mapping quality to consider alignments (default: 2)." />
             <param name="minID" type="float" min="0" value="0.95"
                 label="Minimum alignment identity"
                 help="Minimum alignment-identity to consider alignments (default: 0.95)." />
@@ -105,11 +86,13 @@
             help="Maximum read length (before trimming)." />
-        <data name="outputBam" format="bam" from_work_dir="./out/filter/*.bam" />
-        <data name="outputTsv" format="tabular" from_work_dir="./out/count/*_tcount.tsv" />
+        <data name="outputBam" format="bam" from_work_dir="./out/filter/*.bam" label="${} on ${on_string}: BAM"/>
+        <data name="outputTsv" format="tabular" from_work_dir="./out/count/*_tcount.tsv" label="${} on ${on_string}: Count TSV"/>
+        <data name="outputVcf" format="vcf" from_work_dir="./out/snp/*vcf" label="${} on ${on_string}: VCF"/>
+            <!--Ensure default outputs work -->
             <param name="reference_source_selector" value="history" />
             <param name="ref_file" value="ref.fa" />
             <param name="Reference" value="actb.bed" />
@@ -121,11 +104,13 @@
             <section name="advanced">
                 <param name="minBaseQual" value="27" />
-            <output name="outputTsv" file="reads_slamdunk_mapped_filtered_tcount.tsv"
+            <output name="outputBam" ftype="bam" file="reads1.bam" compare="sim_size"/>
+            <output name="outputTsv" ftype="tabular" file="reads_slamdunk_mapped_filtered_tcount.tsv"
                 lines_diff="2" />
+            <output name="outputVcf" ftype="vcf" file="reads1_snp.vcf" compare="sim_size"/>
-            <!-- test built-in fasta -->
+            <!--Ensure built-in fasta works -->
             <param name="reference_source_selector" value="cached" />
             <param name="Reference" value="actb.bed" />
             <param name="Reads" ftype="fastqsanger" dbkey="hg38" value="reads.fq" />
@@ -136,8 +121,10 @@
             <section name="advanced">
                 <param name="minBaseQual" value="27" />
-            <output name="outputTsv" file="reads_slamdunk_mapped_filtered_tcount.tsv"
+            <output name="outputBam" ftype="bam" file="reads1.bam" compare="sim_size"/>
+            <output name="outputTsv" ftype="tabular" file="reads_slamdunk_mapped_filtered_tcount.tsv"
                 lines_diff="2" />
+            <output name="outputVcf" ftype="vcf" file="reads1_snp.vcf" compare="sim_size"/>
@@ -162,10 +149,13 @@
 **Read length**  Maximum length of reads (usually 50, 100, 150).
 ===============  ==========================================================================================================================================================
-This will run the entire *slamdunk* analysis with the most relevant output files being:
+This will run the entire *slamdunk* analysis (`slamdunk all`) with the most relevant output files being:
-* Tab-separated *tcount* file containing the SLAM-seq statistics per UTR
-* BAM-file with the final mapped reads for visualization and further processing
+* Tab-separated *tcount* file (Count TSV) containing the SLAM-seq statistics per UTR
+* BAM-file with the final filtered mapped reads
+* VCF file of variants called on the final filtered alignments
+These files can be input to the **Alleyoop** tool for visualization and further processing. See the `Slamdunk documentation`_ for more information.
@@ -198,18 +188,10 @@
 **T>C conversion threshold**  Minimum number of T>C conversions to consider a read as T>C read.
 ============================  ================================================================================
+.. _`Slamdunk documentation`:
-        <citation type="bibtex">
- @misc{Neumann2018,
-  author = {Neumann, Tobias},
-  year = {2018},
-  title = Slamdunk},
-  publisher = {GitHub},
-  journal = {GitHub repository},
-  url = {},
-        </citation>
+        <expand macro="citations" />
Binary file test-data/reads1.bam has changed
Binary file test-data/reads1_TCReads.bam has changed
Binary file test-data/reads1_backgroundReads.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reads1_overallrates.csv	Sun Jan 20 06:51:15 2019 -0500
@@ -0,0 +1,7 @@
+# slamdunk rates v0.3.3
+	A	a	C	c	G	g	T	t	N	n	
+A	93	20	0	0	0	1	0	0	0	0	
+C	0	0	106	12	0	0	0	0	0	0	
+G	0	0	0	0	82	33	0	0	0	0	
+T	0	0	4	1	0	0	102	47	0	0	
+N	0	0	0	0	0	0	0	0	0	0	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reads1_snp.vcf	Sun Jan 20 06:51:15 2019 -0500
@@ -0,0 +1,25 @@
+##INFO=<ID=ADP,Number=1,Type=Integer,Description="Average per-sample depth of bases with Phred score >= 15">
+##INFO=<ID=WT,Number=1,Type=Integer,Description="Number of samples called reference (wild-type)">
+##INFO=<ID=HET,Number=1,Type=Integer,Description="Number of samples called heterozygous-variant">
+##INFO=<ID=HOM,Number=1,Type=Integer,Description="Number of samples called homozygous-variant">
+##INFO=<ID=NC,Number=1,Type=Integer,Description="Number of samples not called">
+##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand">
+##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=SDP,Number=1,Type=Integer,Description="Raw Read Depth as reported by SAMtools">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Quality Read Depth of bases with Phred score >= 15">
+##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">
+##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">
+##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">
+##FORMAT=<ID=PVAL,Number=1,Type=String,Description="P-value from Fisher's Exact Test">
+##FORMAT=<ID=RBQ,Number=1,Type=Integer,Description="Average quality of reference-supporting bases (qual1)">
+##FORMAT=<ID=ABQ,Number=1,Type=Integer,Description="Average quality of variant-supporting bases (qual2)">
+##FORMAT=<ID=RDF,Number=1,Type=Integer,Description="Depth of reference-supporting bases on forward strand (reads1plus)">
+##FORMAT=<ID=RDR,Number=1,Type=Integer,Description="Depth of reference-supporting bases on reverse strand (reads1minus)">
+##FORMAT=<ID=ADF,Number=1,Type=Integer,Description="Depth of variant-supporting bases on forward strand (reads2plus)">
+##FORMAT=<ID=ADR,Number=1,Type=Integer,Description="Depth of variant-supporting bases on reverse strand (reads2minus)">
+chr5	120498	.	T	C	.	.	.	.	.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reads1_tcount.tsv	Sun Jan 20 06:51:15 2019 -0500
@@ -0,0 +1,3 @@
+Chromosome	Start	End	Name	Length	Strand	ConversionRate	ReadsCPM	Tcontent	CoverageOnTs	ConversionsOnTs	ReadCount	TcReadCount	multimapCount	ConversionRateLower	ConversionRateUpper
+chr5	120498	121492	Actb	1994	+	0.0222222222222	666666.666667	445	90	2	8	4	0	-1.0	-1.0
+chr5	120498	122492	Actb	1994	+	0.0222222222222	666666.666667	445	90	2	8	4	0	-1.0	-1.0
Binary file test-data/reads2.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reads2_snp.vcf	Sun Jan 20 06:51:15 2019 -0500
@@ -0,0 +1,25 @@
+##INFO=<ID=ADP,Number=1,Type=Integer,Description="Average per-sample depth of bases with Phred score >= 15">
+##INFO=<ID=WT,Number=1,Type=Integer,Description="Number of samples called reference (wild-type)">
+##INFO=<ID=HET,Number=1,Type=Integer,Description="Number of samples called heterozygous-variant">
+##INFO=<ID=HOM,Number=1,Type=Integer,Description="Number of samples called homozygous-variant">
+##INFO=<ID=NC,Number=1,Type=Integer,Description="Number of samples not called">
+##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand">
+##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=SDP,Number=1,Type=Integer,Description="Raw Read Depth as reported by SAMtools">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Quality Read Depth of bases with Phred score >= 15">
+##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">
+##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">
+##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">
+##FORMAT=<ID=PVAL,Number=1,Type=String,Description="P-value from Fisher's Exact Test">
+##FORMAT=<ID=RBQ,Number=1,Type=Integer,Description="Average quality of reference-supporting bases (qual1)">
+##FORMAT=<ID=ABQ,Number=1,Type=Integer,Description="Average quality of variant-supporting bases (qual2)">
+##FORMAT=<ID=RDF,Number=1,Type=Integer,Description="Depth of reference-supporting bases on forward strand (reads1plus)">
+##FORMAT=<ID=RDR,Number=1,Type=Integer,Description="Depth of reference-supporting bases on reverse strand (reads1minus)">
+##FORMAT=<ID=ADF,Number=1,Type=Integer,Description="Depth of variant-supporting bases on forward strand (reads2plus)">
+##FORMAT=<ID=ADR,Number=1,Type=Integer,Description="Depth of variant-supporting bases on reverse strand (reads2minus)">
+chr5	120499	.	T	C	.	.	.	.	.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reads2_tcount.tsv	Sun Jan 20 06:51:15 2019 -0500
@@ -0,0 +1,3 @@
+Chromosome	Start	End	Name	Length	Strand	ConversionRate	ReadsCPM	Tcontent	CoverageOnTs	ConversionsOnTs	ReadCount	TcReadCount	multimapCount	ConversionRateLower	ConversionRateUpper
+chr5	120498	121492	Actb	1994	+	0.0227272727273	1000000.0	445	88	2	6	3	0	-1.0	-1.0
+chr5	120498	122492	Actb	1994	+	0.0227272727273	1000000.0	445	88	2	6	3	0	-1.0	-1.0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/summary.txt	Sun Jan 20 06:51:15 2019 -0500
@@ -0,0 +1,4 @@
+# slamdunk summary v0.3.3
+FileName	SampleName	SampleType	SampleTime	Sequenced	Mapped	Deduplicated	MQ-Filtered	Identity-Filtered	NM-Filtered	Multimap-Filtered	Retained	Counted	Annotation
+./filter/reads1.bam	sample_1	NA	-1	12	12	0	0	0	0	0	12	16	
+./filter/reads2.bam	sample_2	NA	-1	6	6	0	0	0	0	0	6	12