0
|
1 <tool id="blockclust" name="BlockClust" version="1.0.0">
|
2
|
2 <description>efficient clustering and classification of non-coding RNAs from short read RNA-seq profiles</description>
|
0
|
3 <requirements>
|
|
4 <requirement type="package" version="1.0">blockclust</requirement>
|
|
5 <requirement type="package" version="1.1">eden</requirement>
|
|
6 <requirement type="package" version="3.0.1">R_3_0_1</requirement>
|
|
7 <requirement type="package" version="0.1.19">samtools</requirement>
|
|
8 <requirement type="package" version="12.135">mcl</requirement>
|
|
9 <requirement type="package" version="1.0">blockclust_rlibs</requirement>
|
|
10 </requirements>
|
|
11 <version_command>echo '1.0'</version_command>
|
|
12 <command>
|
|
13 #if str($tool_mode.operation) == "pre":
|
|
14 BlockClustPipeLine.pl -m PRE -bam $tool_mode.reads_bam -tbed $tags_bed
|
|
15 #elif str($tool_mode.operation) == "clust":
|
1
|
16 #set $outputdir = $clusters.files_path
|
0
|
17 #set $accept_bed=list()
|
|
18 #set $reject_bed=list()
|
|
19 ## prepare annotations
|
|
20 #if str($tool_mode.reference) == "hg19":
|
|
21 $accept_bed.append("\$BLOCKCLUST_DATA_PATH/annotations/hg19/hg19.accept.bed")
|
|
22 $reject_bed.append("\$BLOCKCLUST_DATA_PATH/annotations/hg19/hg19.reject.bed")
|
|
23 #elif str($tool_mode.reference) == "mm10":
|
|
24 $accept_bed.append("\$BLOCKCLUST_DATA_PATH/annotations/mm10/mm10.accept.bed")
|
|
25 $reject_bed.append("\$BLOCKCLUST_DATA_PATH/annotations/mm10/mm10.reject.bed")
|
|
26 #elif str($tool_mode.reference) == "dm3":
|
|
27 $accept_bed.append("\$BLOCKCLUST_DATA_PATH/annotations/dm3/dm3.accept.bed")
|
|
28 $reject_bed.append("\$BLOCKCLUST_DATA_PATH/annotations/dm3/dm3.reject.bed")
|
|
29 #elif str($tool_mode.reference) == "rheMac3":
|
|
30 $accept_bed.append("\$BLOCKCLUST_DATA_PATH/annotations/rheMac3/rheMac3.accept.bed")
|
|
31 $reject_bed.append("\$BLOCKCLUST_DATA_PATH/annotations/rheMac3/rheMac3.reject.bed")
|
|
32 #elif str($tool_mode.reference) == "panTro4":
|
|
33 $accept_bed.append("\$BLOCKCLUST_DATA_PATH/annotations/panTro3/panTro4.accept.bed")
|
|
34 $reject_bed.append("\$BLOCKCLUST_DATA_PATH/annotations/panTro3/panTro4.reject.bed")
|
|
35 #elif str($tool_mode.reference) == "xenTro3":
|
|
36 $accept_bed.append("\$BLOCKCLUST_DATA_PATH/annotations/xenTro3/xenTro3.accept.bed")
|
|
37 $reject_bed.append("\$BLOCKCLUST_DATA_PATH/annotations/xenTro3/xenTro3.reject.bed")
|
|
38 #elif str($tool_mode.reference) == "celWS235":
|
|
39 $accept_bed.append("\$BLOCKCLUST_DATA_PATH/annotations/celWS235/celWS235.accept.bed")
|
|
40 $reject_bed.append("\$BLOCKCLUST_DATA_PATH/annotations/celWS235/celWS235.reject.bed")
|
|
41 #elif str($tool_mode.reference) == "tair10":
|
|
42 $accept_bed.append("\$BLOCKCLUST_DATA_PATH/annotations/tair10/tair10.accept.bed")
|
|
43 $reject_bed.append("\$BLOCKCLUST_DATA_PATH/annotations/tair10/tair10.reject.bed")
|
|
44 #end if
|
8
|
45 BlockClustPipeLine.pl -m TEST -c \$BLOCKCLUST_DATA_PATH/blockclust.config
|
0
|
46 -t $tool_mode.input_bbo
|
|
47 -a #echo ''.join( $accept_bed )
|
|
48 -r #echo ''.join( $reject_bed )
|
|
49 -o $outputdir
|
8
|
50 #if $tool_mode.nochr:
|
|
51 -nochr
|
|
52 #end if
|
0
|
53 #if str($tool_mode.pred.enable_pred) == "yes":
|
|
54 -p
|
|
55 -pm $tool_mode.pred.pred_mode
|
8
|
56 -md \$BLOCKCLUST_DATA_PATH/models;
|
0
|
57 #if str($tool_mode.pred.pred_mode) == "nearest_neighbour":
|
|
58 cp #echo os.path.join($outputdir,'nearest_neighbour_predictions.txt')# $nearest_neighbour_pred_bed;
|
|
59 #elif str($tool_mode.pred.pred_mode) == "model_based":
|
|
60 cp #echo os.path.join($outputdir,'model_based_predictions.txt')# $model_based_pred_bed;
|
|
61 #end if
|
|
62 #else:
|
|
63 ;
|
|
64 #end if
|
|
65
|
|
66 cp #echo os.path.join($outputdir, 'mcl_clusters','all_clusters.bed')# $clusters;
|
|
67 cp #echo os.path.join($outputdir, 'hclust_tree.pdf')# $hclust_plot;
|
|
68 cp #echo os.path.join($outputdir, 'discretized.gspan.tab')# $sim_tab_out
|
|
69 #elif str($tool_mode.operation) == "post":
|
6
|
70 BlockClustPipeLine.pl -m POST -cbed $tool_mode.clusters_bed -cm $tool_mode.cmsearch_out -tab $tool_mode.sim_tab_in -rfam \$BLOCKCLUST_DATA_PATH/rfam_map.txt -o ./;
|
0
|
71 #end if
|
|
72 </command>
|
|
73 <inputs>
|
|
74 <conditional name="tool_mode">
|
|
75 <param name="operation" type="select" label="Select mode of operation">
|
|
76 <option value="pre">Pre-processing </option>
|
2
|
77 <option value="clust">Clustering and classification</option>
|
0
|
78 <option value="post">Post-processing</option>
|
|
79 </param>
|
|
80 <when value="pre">
|
|
81 <param name="reads_bam" type="data" format="bam" label="BAM file containing alignments" />
|
|
82 </when>
|
|
83 <when value="clust">
|
|
84 <param name="input_bbo" type="data" format="tabular" label="Input blockgroups file" />
|
|
85 <param name="reference" type="select" label="Select reference genome">
|
|
86 <option value="hg19">Human (hg19)</option>
|
|
87 <option value="mm10">Mouse (mm10)</option>
|
|
88 <option value="dm3">Fly (dm3)</option>
|
|
89 <option value="rheMac3">Monkey (rheMac3)</option>
|
|
90 <option value="panTro4">Chimp (panTro4)</option>
|
|
91 <option value="xenTro3">Frog (xenTro3)</option>
|
|
92 <option value="celWS235">C. elegans (celWS235)</option>
|
|
93 <option value="tair10">Arabidopsis thaliana (tair10)</option>
|
|
94 </param>
|
8
|
95 <param name="nochr" type="boolean" label="My input files have no 'chr' for chromosome names" checked="False"/>
|
0
|
96 <conditional name="pred">
|
|
97 <param name="enable_pred" type="select" label="Would you like to perform classification?">
|
|
98 <option value="no">No</option>
|
|
99 <option value="yes">Yes</option>
|
|
100 </param>
|
|
101 <when value="yes">
|
|
102 <param name="pred_mode" type="select" label="Mode of classification">
|
|
103 <option value="model_based">Model based</option>
|
|
104 <option value="nearest_neighbour">Nearest neighbour</option>
|
|
105 </param>
|
|
106 </when>
|
|
107 </conditional>
|
|
108 </when>
|
|
109 <when value="post">
|
|
110 <param name="cmsearch_out" type="data" format="tabular" label="Output of cmsearch tool" />
|
|
111 <param name="clusters_bed" type="data" format="bed" label="BED file containing clusters (output of BlockClust)" />
|
|
112 <param name="sim_tab_in" type="data" format="tabular" label="Pairwise similarities file" />
|
|
113 </when>
|
|
114 </conditional>
|
|
115 </inputs>
|
|
116
|
|
117 <outputs>
|
|
118 <data format="bed" name="tags_bed" label="BlockClust: BAM to BED on ${on_string}">
|
|
119 <filter> tool_mode["operation"]=="pre"</filter>
|
|
120 </data>
|
|
121 <data format="pdf" name="hclust_plot" label="BlockClust: Hierarchical clustering plot on ${on_string}" >
|
|
122 <filter> tool_mode["operation"]=="clust"</filter>
|
|
123 </data>
|
2
|
124 <data format="bed" name="clusters" label="BlockClust: BED of predicted clusters on ${on_string}">
|
0
|
125 <filter> tool_mode["operation"]=="clust"</filter>
|
|
126 </data>
|
2
|
127 <data format="bed" name="model_based_pred_bed" label="BlockClust: Model based predictions BED on ${on_string}">
|
0
|
128 <filter>
|
|
129 ((
|
|
130 tool_mode["operation"] == 'clust' and
|
|
131 tool_mode["pred"]["enable_pred"] == "yes" and
|
|
132 tool_mode["pred"]["pred_mode"] == "model_based"
|
|
133 ))
|
|
134 </filter>
|
|
135 </data>
|
2
|
136 <data format="bed" name="nearest_neighbour_pred_bed" label="BlockClust: Nearest neighbor predictions BED on ${on_string}">
|
0
|
137 <filter>
|
|
138 ((
|
|
139 tool_mode["operation"] == 'clust' and
|
|
140 tool_mode["pred"]["enable_pred"] == "yes" and
|
|
141 tool_mode["pred"]["pred_mode"] == "nearest_neighbour"
|
|
142 ))
|
|
143 </filter>
|
|
144 </data>
|
|
145 <data format="tabular" name="sim_tab_out" label="BlockClust: Pairwise similarities on ${on_string}">
|
|
146 <filter> tool_mode["operation"]=="clust"</filter>
|
|
147 </data>
|
1
|
148 <data format="pdf" name="cluster_dist" from_work_dir="cluster_distribution.pdf" label="BlockClust: Cluster distribution on ${on_string}" >
|
0
|
149 <filter> tool_mode["operation"]=="post"</filter>
|
|
150 </data>
|
1
|
151 <data format="pdf" name="cluster_hclust" from_work_dir="hclust_tree_clusters.pdf" label="BlockClust: Hierarchical clustering plot of cluster centroids on ${on_string}" >
|
0
|
152 <filter> tool_mode["operation"]=="post"</filter>
|
|
153 </data>
|
|
154 </outputs>
|
|
155 <help>
|
|
156
|
|
157 .. class:: infomark
|
|
158
|
|
159 **What it does**
|
|
160
|
2
|
161 BlockClust is an efficient approach to detect transcripts with similar
|
|
162 processing patterns. We propose a novel way to encode expression profiles
|
|
163 in compact discrete structures, which can then be processed using
|
|
164 fast graph-kernel techniques. BlockClust allows both clustering and
|
|
165 classification of small non-coding RNAs.
|
|
166
|
4
|
167 BlockClust runs in three operating modes:
|
|
168
|
2
|
169 1) Pre-processing - converts given mapped reads (BAM) into BED file of tags
|
4
|
170
|
|
171 2) Clustering and classification - of given input blockgroups (output of blockbuster tool) as explained in the original paper.
|
|
172
|
6
|
173 3) Post-processing - plots for overview of predicted clusters.
|
2
|
174
|
|
175 For a thorough analysis of your data, we suggest you to use complete blockclust workflow, which contains all three modes of operation.
|
0
|
176
|
|
177 **Inputs**
|
|
178
|
2
|
179 BlockClust input files are dependent on the mode of operation:
|
4
|
180
|
|
181 1. Pre-processing mode:
|
|
182 * Binary Sequence Alignment Map (BAM) file
|
2
|
183
|
4
|
184 2. Clustering and classification:
|
|
185 * A blockgroups file generated by blockbuster tool
|
|
186 * Select reference genome
|
2
|
187
|
4
|
188 3. Post-processing:
|
|
189 * Output of cmsearch, searched clusters generated by BlockClust against Rfam
|
|
190 * BED file containing clusters generated by BlockClust
|
|
191 * Pairwise similarities of blockgroups generated by BlockClust
|
0
|
192
|
4
|
193 **Outputs**
|
|
194
|
|
195 1. Pre-processing mode:
|
|
196 * BED file of tags with expressions
|
0
|
197
|
4
|
198 2. Clustering and classification:
|
|
199 * Hierarchical clustering plot of all input blockgroups by their similarity
|
|
200 * Pairwise similarities of all input blockgroups
|
|
201 * BED file containing predicted clusters
|
|
202 * BED file containing prediction of blockgroups by pre-compiled SVM binary classification model.
|
2
|
203
|
4
|
204 3. Post-processing:
|
6
|
205 * Plot of distribution of ncRNA families per predicted cluster (overview of cluster precissions). The annotation of ncRNA families are retrieved by searching cluster instances against Rfam database.
|
4
|
206 * Hierarchical clustering made out of centroids of each BlockClust predicted cluster
|
0
|
207
|
|
208 ------
|
|
209
|
|
210 **References**
|
|
211
|
3
|
212 Pavankumar Videm, Dominic Rose, Fabrizio Costa, and Rolf Backofen. "BlockClust: efficient clustering and classification of non-coding RNAs from short read RNA-seq profiles." Bioinformatics 30, no. 12 (2014): i274-i282.
|
0
|
213
|
|
214
|
|
215 </help>
|
|
216 </tool>
|