Mercurial > repos > artbio > small_rna_clusters
comparison small_rna_clusters.xml @ 0:8028521b6e4f draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_clusters commit f38805cf151cbda1cf7de0a92cdfeb5978f26547"
author | artbio |
---|---|
date | Mon, 07 Oct 2019 12:51:25 -0400 |
parents | |
children | 160e35e432a0 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8028521b6e4f |
---|---|
1 <tool id="small_rna_clusters" name="small_rna_clusters" version="1.0.0"> | |
2 <description></description> | |
3 <requirements> | |
4 <requirement type="package" version="0.15.3=py27hda2845c_1">pysam</requirement> | |
5 <requirement type="package" version="1.6.4=r36h6115d3f_0">r-optparse</requirement> | |
6 <requirement type="package" version="0.6_28=r36h6115d3f_1002">r-latticeextra</requirement> | |
7 <requirement type="package" version="2.3=r36h6115d3f_1002">r-gridextra</requirement> | |
8 <requirement type="package" version="1.4.3=r36h29659fb_0">r-reshape2</requirement> | |
9 <requirement type="package" version="0.6.6">sambamba</requirement> | |
10 <requirement type="package" version="1.9=h10a08f8_12">samtools</requirement> | |
11 <requirement type="package" version="64.2=he1b5a44_1">icu</requirement> | |
12 </requirements> | |
13 <stdio> | |
14 <exit_code range="1:" level="fatal" description="Tool exception" /> | |
15 </stdio> | |
16 <command detect_errors="exit_code"><![CDATA[ | |
17 #import json | |
18 #import os | |
19 #for $file in $inputs | |
20 sambamba view -t \${GALAXY_SLOTS} -F "not unmapped and sequence_length >= ${minsize} and sequence_length <= ${maxsize}" -f bam '$file' -o '$file.element_identifier' && | |
21 samtools index '$file.element_identifier' && | |
22 #end for | |
23 | |
24 python '$__tool_directory__'/small_rna_clusters.py | |
25 --inputs ${ ' '.join(['"%s"' % x.element_identifier for x in $inputs]) } | |
26 #set $labels = list() | |
27 #for $file in $inputs: | |
28 $labels.append(str($file.element_identifier)) | |
29 #end for | |
30 --sample_names ${ ' '.join(['"%s"' % x for x in $labels]) } | |
31 --minsize $minsize | |
32 --maxsize $maxsize | |
33 --outputs '$output_tab' | |
34 --cluster $cluster | |
35 --bed '$output_bed' | |
36 --bed_skipsize $skip_size | |
37 --bed_skipcounts $skip_counts | |
38 --bed_skipdensity $skip_density | |
39 $strandness && | |
40 | |
41 Rscript '$__tool_directory__'/small_rna_clusters.r | |
42 --first_dataframe '$output_tab' | |
43 --first_plot_method 'Counts' | |
44 --output_pdf '$output_pdf' | |
45 ]]></command> | |
46 <inputs> | |
47 <param name="inputs" type="data" format="bam" label="Select a alignment files to parse" multiple="true" | |
48 help="maps from these bam inputs will be collected in a single pdf output" /> | |
49 <param name="minsize" type="integer" label="Minimal size of reads for inclusion in analysis" | |
50 value="19" help="default value: 19" /> | |
51 <param name="maxsize" type="integer" label="Maximal size of reads for inclusion in analysis" | |
52 value="29" help="default value: 29" /> | |
53 <param name="first_plot" type="hidden" value="Counts"/> | |
54 <param name="cluster" type="integer" label="Clustering distance in nucleotides" value="1" | |
55 help="Sets the distance (in nt) below which reads are clustered to a single median position" /> | |
56 <param name="strandness" argument="--nostrand" type="boolean" truevalue="--nostrand" falsevalue="" checked="false" | |
57 label="Ignore polarity of reads ?" help="Set if you wish to cluster reads regardless of whether they are forward or reverse"/> | |
58 <param name="skip_size" type="integer" label="do not report clusters whose size is less than the specified value" value="1" | |
59 help="Cluster size threshod (in nucleotides) for reporting. Set to 1 (default) reports all clusters, including singlets" /> | |
60 <param name="skip_counts" type="integer" label="do not report cluster with a number of reads lower than the specified value" value="1" | |
61 help="Number-of-reads threshod (in nucleotides) for cluster reporting. Set to 1 (default) reports all clusters, irrespective of their counts" /> | |
62 <param name="skip_density" type="float" label="do not report cluster with density equal or less than the specified value" value="0" | |
63 help="Density threshod (in reads per nucleotides) for reporting. Set to 0 (default) reports all cluster densities" /> | |
64 </inputs> | |
65 | |
66 <outputs> | |
67 <data format="tabular" name="output_tab" label="Counts Dataframe" /> | |
68 <data format="bed" name="output_bed" label="bed file for clusters" /> | |
69 <data format="pdf" name="output_pdf" label="small RNA maps" /> | |
70 </outputs> | |
71 | |
72 <tests> | |
73 <test> <!-- 0 --> | |
74 <param name="inputs" value="input1.bam,input2.bam" ftype="bam" /> | |
75 <param name="cluster" value="500" /> | |
76 <param name="skip_size" value="1" /> | |
77 <param name="strandness" value="false" /> | |
78 <output file="clustering_0.tab" name="output_tab" /> | |
79 <output file="clustering_0.pdf" name="output_pdf" /> | |
80 <output file="bed_0.bed" name="output_bed" /> | |
81 </test> | |
82 <test> <!-- 1 --> | |
83 <param name="inputs" value="input1.bam,input2.bam" ftype="bam" /> | |
84 <param name="cluster" value="500" /> | |
85 <param name="skip_size" value="1" /> | |
86 <param name="strandness" value="true" /> | |
87 <output file="clustering_1.tab" name="output_tab" /> | |
88 <output file="clustering_1.pdf" name="output_pdf" /> | |
89 <output file="bed_1.bed" name="output_bed" /> | |
90 </test> | |
91 <test> <!-- 2 --> | |
92 <param name="inputs" value="input1.bam,input2.bam" ftype="bam" /> | |
93 <param name="cluster" value="500" /> | |
94 <param name="skip_size" value="1000" /> | |
95 <param name="strandness" value="false" /> | |
96 <output file="clustering_2.tab" name="output_tab" /> | |
97 <output file="clustering_2.pdf" name="output_pdf" /> | |
98 <output file="bed_2.bed" name="output_bed" /> | |
99 </test> | |
100 <test> <!-- 3 --> | |
101 <param name="inputs" value="input1.bam,input2.bam" ftype="bam" /> | |
102 <param name="cluster" value="500" /> | |
103 <param name="skip_size" value="1000" /> | |
104 <param name="skip_counts" value="200" /> | |
105 <param name="skip_density" value="0.1" /> | |
106 <param name="strandness" value="false" /> | |
107 <output file="clustering_3.tab" name="output_tab" /> | |
108 <output file="clustering_3.pdf" name="output_pdf" /> | |
109 <output file="bed_3.bed" name="output_bed" /> | |
110 </test> | |
111 <test> <!-- 4 --> | |
112 <param name="inputs" value="input1.bam,input2.bam" ftype="bam" /> | |
113 <param name="cluster" value="2000" /> | |
114 <param name="skip_size" value="2000" /> | |
115 <param name="skip_counts" value="100" /> | |
116 <param name="skip_density" value="0.1" /> | |
117 <param name="strandness" value="true" /> | |
118 <output file="clustering_4.tab" name="output_tab" /> | |
119 <output file="clustering_4.pdf" name="output_pdf" /> | |
120 <output file="bed_4.bed" name="output_bed" /> | |
121 </test> | |
122 </tests> | |
123 <help> | |
124 **What it does** | |
125 | |
126 Clusters of read alignments (provided as bam files) are aggregated along regions of | |
127 *variable* lengths. The Clustering algorithm works as follows: | |
128 | |
129 A read is clustered with the next read on the genomic reference if the two reads are | |
130 separated by *at maximum* the clustering distance (set in nucleotides). If clustered, the | |
131 step is repeated with the following read until clustering fails. A new cluster is then | |
132 searched. | |
133 | |
134 For clustering procedure, one has the possibility to consider the polarity of reads | |
135 (default setting, only forward reads or reverse reads can be clustered, separately), or to | |
136 ignore this polarity. | |
137 | |
138 Clusters of reads are plotted as single bars, their coordinates being the medians of | |
139 the flanking coordinates of the clusters. | |
140 | |
141 In addition, cluster are reported in a bed file. There, clusters can be filtered out upon | |
142 various parameters: cluster size, cluster read number or cluster read density (number of | |
143 reads divided by the length of the cluster). | |
144 | |
145 Note that bed filtering options only affect the number of reported line in the bed file. | |
146 All clusters are shown in the plot. **i.e. the only parameter that affects the number of | |
147 found clusters is the clustering distance.** | |
148 | |
149 **Inputs** | |
150 | |
151 bam alignment files that must be | |
152 | |
153 - single-read | |
154 - sorted | |
155 - mapped to the same reference | |
156 | |
157 .. class:: warningmark | |
158 | |
159 This tools follows a "map-reduce" procedure: multiple inputs, which can be arranged in a | |
160 data collection, are visualised side by side in a single pdf file and are reported in a | |
161 single bed file. | |
162 | |
163 **Output** | |
164 | |
165 A pdf file generated by the R package lattice, a dataframe used to plot the clusters, and | |
166 a bed file that reports significant clusters. | |
167 </help> | |
168 | |
169 <citations> | |
170 <citation type="doi">10.1093/bioinformatics/btp352</citation> | |
171 <citation type="bibtex">@Book{, | |
172 title = {Lattice: Multivariate Data Visualization with R}, | |
173 author = {Deepayan Sarkar}, | |
174 publisher = {Springer}, | |
175 address = {New York}, | |
176 year = {2008}, | |
177 note = {ISBN 978-0-387-75968-5}, | |
178 url = {http://lmdvr.r-forge.r-project.org}, | |
179 }</citation> | |
180 </citations> | |
181 </tool> |