comparison galaxy/wrappers/ExprHeatmapClustering.xml @ 0:14045c80a222 draft

"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit cb276a594444c8f32e9819fefde3a21f121d35df"
author vandelj
date Fri, 26 Jun 2020 09:38:23 -0400
parents
children 0b09345fa632
comparison
equal deleted inserted replaced
-1:000000000000 0:14045c80a222
1 <tool name="GIANT-Heatmap and Hierarchical clustering" id="giant_hierarchical_clustering" version="0.5.1">
2 <description>Run hierarchical clustering and plot heatmap from expression data and/or differential expression analysis</description>
3 <requirements>
4 <requirement type="package" version="4.8.0">r-plotly</requirement>
5 <requirement type="package" version="1.12.0">r-dendextend</requirement>
6 <requirement type="package" version="0.1_20">r-ggdendro</requirement>
7 <requirement type="package" version="3.2.1">r-ggplot2</requirement>
8 <requirement type="package" version="0.16.0">r-heatmaply</requirement>
9 <requirement type="package" version="0.4.8">r-circlize</requirement>
10 <requirement type="package" version="1.18.1">bioconductor-complexheatmap</requirement>
11 <requirement type="package" version="2.2.2">pandoc</requirement>
12 </requirements>
13 <code file='../../src/General_functions.py'/>
14 <stdio>
15 <regex match="Execution halted"
16 source="both"
17 level="fatal"
18 description="Execution halted, please contact tool developer or administrators." />
19 <regex match="Error in"
20 source="both"
21 level="fatal"
22 description="An error occured during R execution, please contact tool developer." />
23 <exit_code range="10" level="fatal" description="Missing file during html report, see log file for more information." />
24 <exit_code range="1:9" level="fatal" description="Error in R execution, see log file for more information." />
25 </stdio>
26 <command> <![CDATA[
27
28 #if ($dataToCluster.dataToCluster_selector=="expression" or $dataToCluster.dataToCluster_selector=="genericData") and $dataToCluster.expressionData:
29
30 ##start by selecting specific input data columns depending on user request
31 #if $dataToCluster.dataToCluster_selector=="genericData" and $dataToCluster.columnToKeep:
32 awk -v columns="$dataToCluster.columnToKeep" 'BEGIN{FS="\t";OFS="";ORS="";split(columns,columnsTab,",")} FNR==1{for(iColumn=1;iColumn<=length(columnsTab);iColumn++)for(iField=2;iField<=NF;iField++){if(\$iField==columnsTab[iColumn])colsToSelect[iColumn]=iField}} {line=\$1;for(iColumn=1;iColumn<=length(columnsTab);iColumn++)line=line"\t"\$colsToSelect[iColumn];print line"\n";}' $dataToCluster.expressionData > ./selectedExpressionData;
33 #else
34 cp $dataToCluster.expressionData ./selectedExpressionData;
35 #end if
36
37 ##reorder columns of input data based on factors file
38 #if $dataToCluster.reorder_sample.reordering_selector=="factorFile" and $dataToCluster.reorder_sample.factorFileData and $dataToCluster.reorder_sample.factorToUse:
39 awk -v factors="$dataToCluster.reorder_sample.factorToUse" 'BEGIN{FS="\t";OFS="";ORS="";split(factors,factorsTab,",")} FNR==1{for(iFactor=1;iFactor<=length(factorsTab);iFactor++)for(iField=2;iField<=NF;iField++){if(\$iField==factorsTab[iFactor])colsToSelect[iFactor]=iField}} FNR>1{line=\$1;for(iFactor=1;iFactor<=length(factorsTab);iFactor++)line=line"\t"\$colsToSelect[iFactor];print line"\n";}' $dataToCluster.reorder_sample.factorFileData > ./orderingFactor;
40
41 sort -V -k2 ./orderingFactor > ./orderingSample;
42
43 awk 'BEGIN{FS="\t";OFS="";ORS="";factorNumber=0} ARGIND==1{sampleOrdered[FNR]=\$1;factorNumber=FNR} ARGIND==2 && FNR==1{for(iElemt=1;iElemt<=factorNumber;iElemt++)for(iPosit=2;iPosit<=NF;iPosit++)if(\$iPosit==sampleOrdered[iElemt])positOrdered[iElemt]=iPosit} ARGIND==2{line=\$1;for(iElemt=1;iElemt<=factorNumber;iElemt++)if(iElemt in positOrdered)line=line"\t"\$positOrdered[iElemt];print line"\n"}' ./orderingSample ./selectedExpressionData > ./orderedExpressionData;
44
45 ##check if some input data columns were lost during the process
46 awk 'ARGIND==1 && FNR==1{colNumbA=NF} ARGIND==2 && FNR==1{colNumbB=NF} END{if(colNumbA!=colNumbB) print "[WARNING] "colNumbA-colNumbB" input data columns was removed during reordering due to missing information in factor file!\n"}' ./selectedExpressionData ./orderedExpressionData >> $log;
47
48 #if $advSection.conditionClusterNumber!="1":
49 printf "[WARNING]Sample clustering option is selected, sample reordering will not be preserved!\n" >> $log;
50 #end if
51 #else:
52 cp ./selectedExpressionData ./orderedExpressionData;
53 #end if
54 #end if
55
56
57 ##generate common file name for differential analysis results depending on input data nature
58 #if ($dataToCluster.dataToCluster_selector=="expression" or $dataToCluster.dataToCluster_selector=="genericData") and $dataToCluster.filtering_step.filtering_step_selector!="no" and $dataToCluster.filtering_step.select_filtering.filtering_stepBis_selector=="diffExpParam" and $dataToCluster.filtering_step.select_filtering.differentialAnalysis:
59 cp ${dataToCluster.filtering_step.select_filtering.differentialAnalysis} ./filteredDifferentialAnalysis;
60 #end if
61 #if $dataToCluster.dataToCluster_selector=="foldChange" and $dataToCluster.differentialAnalysis:
62 cp $dataToCluster.differentialAnalysis ./filteredDifferentialAnalysis;
63 #end if
64
65
66 ##generate factor information to use for barplot
67 #if $advSection.conditionBarColor.conditionBarColor_selector=="yes" and $advSection.conditionBarColor.factorFileDataBarPlot and $advSection.conditionBarColor.factorToUse:
68 awk -v factor="$advSection.conditionBarColor.factorToUse" 'BEGIN{FS="\t";OFS="";ORS=""} NR==1{for(i=2;i<=NF;i++)if(\$i==factor)colToKeep=i} {print \$1"\t"\$colToKeep"\n"}' $advSection.conditionBarColor.factorFileDataBarPlot > ./barPlotFactor;
69 #end if
70
71 Rscript '$__tool_directory__/../../src/heatMapClustering.R' --log '$log' --outputFile '$outputData' --format '$advSection.imageFormat' --clusterNumber '$advSection.clusterNumber' --maxRows '$advSection.maxSampleToPlot' --sampleClusterNumber '$advSection.conditionClusterNumber' --dataTransformation '$advSection.dataTransformation' --distanceMeasure '$advSection.distanceMeasure' --aggloMethod '$advSection.aggloMethod'
72 #if $advSection.select_color.specifyColors=="true":
73 --personalColors '$advSection.select_color.featureMin_color,$advSection.select_color.featureMedium_color,$advSection.select_color.featureMax_color'
74 #end if
75 #if $advSection.conditionBarColor.conditionBarColor_selector=="yes" and $advSection.conditionBarColor.factorFileDataBarPlot and $advSection.conditionBarColor.factorToUse:
76 --factorInfo './barPlotFactor'
77 --sideBarColorPalette '$advSection.conditionBarColor.sideBarPalette'
78 #end if
79 #if $dataToCluster.dataToCluster_selector=="genericData":
80 --genericData
81 #end if
82 #if $dataToCluster.dataToCluster_selector=="expression" or $dataToCluster.dataToCluster_selector=="genericData":
83 --expressionFile './orderedExpressionData'
84 #if $dataToCluster.filtering_step.filtering_step_selector!="no":
85 --filterInputOutput '$dataToCluster.filtering_step.filtering_step_selector'
86 #if $dataToCluster.filtering_step.select_filtering.filtering_stepBis_selector=="diffExpParam":
87 --diffAnalyseFile './filteredDifferentialAnalysis'
88 #if $dataToCluster.dataToCluster_selector=="expression":
89 --comparisonName '$dataToCluster.filtering_step.select_filtering.comparisonsToInclude'
90 --FCthreshold '$dataToCluster.filtering_step.select_filtering.FCthreshold'
91 --pvalThreshold '$dataToCluster.filtering_step.select_filtering.pvalThreshold'
92 #else:
93 #if $dataToCluster.filtering_step.select_filtering.comparisonsToIncludeLow and $dataToCluster.filtering_step.select_filtering.valThresholdLow:
94 --comparisonNameLow '$dataToCluster.filtering_step.select_filtering.comparisonsToIncludeLow'
95 --FCthreshold '$dataToCluster.filtering_step.select_filtering.valThresholdLow'
96 #end if
97 #if $dataToCluster.filtering_step.select_filtering.comparisonsToIncludeHigh and $dataToCluster.filtering_step.select_filtering.valThresholdHigh:
98 --comparisonNameHigh '$dataToCluster.filtering_step.select_filtering.comparisonsToIncludeHigh'
99 --pvalThreshold '$dataToCluster.filtering_step.select_filtering.valThresholdHigh'
100 #end if
101 #end if
102 #else:
103 --geneListFiltering '$dataToCluster.filtering_step.select_filtering.geneListFile'
104 #end if
105 #end if
106 #else
107 --diffAnalyseFile './filteredDifferentialAnalysis'
108 --comparisonName '$dataToCluster.comparisonsToInclude'
109 #if $dataToCluster.filtering_step.filtering_step_selector!="no":
110 --filterInputOutput '$dataToCluster.filtering_step.filtering_step_selector'
111 #if $dataToCluster.filtering_step.select_filtering.filtering_stepBis_selector=="diffExpParam":
112 --FCthreshold '$dataToCluster.filtering_step.select_filtering.FCthreshold'
113 --pvalThreshold '$dataToCluster.filtering_step.select_filtering.pvalThreshold'
114 #else:
115 --geneListFiltering '$dataToCluster.filtering_step.select_filtering.geneListFile'
116 #end if
117 #end if
118 #end if
119 ;
120 ret_code=\$?;
121 if [ \$ret_code != 0 ]; then
122 exit \$ret_code;
123 else
124 bash $scriptTransfer;
125 ret_code=\$?;
126 if [ \$ret_code != 0 ]; then
127 exit \$ret_code;
128 fi
129 fi;
130 printf "[INFO]End of tool script" >> $log;
131 ]]>
132 </command>
133
134
135
136 <configfiles>
137 <configfile name="scriptTableToHtml">
138 <![CDATA[
139 printf "<!DOCTYPE html>
140 <html>
141 <head>
142 <meta http-equiv=\"Content-type\" content=\"text/html; charset=utf-8\">
143 <link rel=\"stylesheet\" type=\"text/css\" href=\"https://cdn.datatables.net/1.10.16/css/jquery.dataTables.min.css\">
144 <script type=\"text/javascript\" language=\"javascript\" src=\"https://code.jquery.com/jquery-1.12.4.js\">
145 </script>
146 <script type=\"text/javascript\" language=\"javascript\" src=\"https://cdn.datatables.net/1.10.16/js/jquery.dataTables.min.js\">
147 </script>
148 <script type=\"text/javascript\" class=\"init\">
149 \\$(document).ready(function() {
150 \\$(\'\#example\').DataTable( {
151 \"columnDefs\": [ {
152 \"visible\": false,
153 \"targets\": -1
154 } ]
155 } );
156 } );
157 </script>
158 </head>
159 <body style=\"background-color:white;\">
160 <table id=\"example\" class=\"display\" cellspacing=\"0\">
161 " > ${html_file.extra_files_path}/outputClustering.html
162
163 printf "<colgroup>\n" >> ${html_file.extra_files_path}/outputClustering.html
164 #if $dataToCluster.dataToCluster_selector=="foldChange" or ($dataToCluster.dataToCluster_selector=="expression" and $dataToCluster.filtering_step.filtering_step_selector!="no" and $dataToCluster.filtering_step.select_filtering.filtering_stepBis_selector=="diffExpParam"):
165 printf "<col span=\"2\" style=\"background-color:rgb(224,235,235)\">\n" >> ${html_file.extra_files_path}/outputClustering.html
166 printf "<col span=\"1\" style=\"background-color:rgb(250,235,235)\">\n" >> ${html_file.extra_files_path}/outputClustering.html
167 awk 'BEGIN{odd=1;FS="\t"} NR==1{for(i=4;i<=NF;i=i+5){if(odd==1){odd=0;printf "<col span=\"5\" style=\"background-color:rgb(224,238,255)\">\n"}else{odd=1;printf "<col span=\"5\" style=\"background-color:rgb(255,221,224)\">\n"}}}' $outputData >> ${html_file.extra_files_path}/outputClustering.html
168 #else
169 printf "<col span=\"1\" style=\"background-color:rgb(224,235,235)\">\n" >> ${html_file.extra_files_path}/outputClustering.html
170 printf "<col span=\"1\" style=\"background-color:rgb(250,235,235)\">\n" >> ${html_file.extra_files_path}/outputClustering.html
171 awk 'BEGIN{odd=1;FS="\t"} NR==1{for(i=3;i<=NF;i++){if(odd==1){odd=0;printf "<col span=\"1\" style=\"background-color:rgb(224,238,255)\">\n"}else{odd=1;printf "<col span=\"1\" style=\"background-color:rgb(255,221,224)\">\n"}}}' $outputData >> ${html_file.extra_files_path}/outputClustering.html
172 #end if
173
174 printf "</colgroup>\n" >> ${html_file.extra_files_path}/outputClustering.html
175 printf "<thead>\n<tr>\n" >> ${html_file.extra_files_path}/outputClustering.html
176
177 #if $dataToCluster.dataToCluster_selector=="foldChange" or ($dataToCluster.dataToCluster_selector=="expression" and $dataToCluster.filtering_step.filtering_step_selector!="no" and $dataToCluster.filtering_step.select_filtering.filtering_stepBis_selector=="diffExpParam"):
178 printf "<th rowspan=\"2\">Gene</th>\n" >> ${html_file.extra_files_path}/outputClustering.html
179 printf "<th rowspan=\"2\">Info</th>\n" >> ${html_file.extra_files_path}/outputClustering.html
180 printf "<th rowspan=\"2\">Cluster</th>\n" >> ${html_file.extra_files_path}/outputClustering.html
181 awk 'BEGIN{FS="\t"} NR==1{for(i=4;i<=NF;i=i+5)printf "<th colspan=\"5\">"\$i"</th>\n"}' $outputData >> ${html_file.extra_files_path}/outputClustering.html
182 printf "<th></th>\n" >> ${html_file.extra_files_path}/outputClustering.html
183 printf "</tr>\n<tr>\n" >> ${html_file.extra_files_path}/outputClustering.html
184 awk 'BEGIN{FS="\t"} NR==2{for(i=4;i<=NF;i++)printf "<th>"\$i"</th>\n"}' $outputData >> ${html_file.extra_files_path}/outputClustering.html
185 #else
186 printf "<th rowspan=\"1\">Gene</th>\n" >> ${html_file.extra_files_path}/outputClustering.html
187 printf "<th rowspan=\"1\">Cluster</th>\n" >> ${html_file.extra_files_path}/outputClustering.html
188 awk 'BEGIN{FS="\t"} NR==1{for(i=3;i<=NF;i++)printf "<th colspan=\"1\">"\$i"</th>\n"}' $outputData >> ${html_file.extra_files_path}/outputClustering.html
189 #end if
190
191 printf "<th></th>\n" >> ${html_file.extra_files_path}/outputClustering.html
192 printf "</tr>\n</thead>\n" >> ${html_file.extra_files_path}/outputClustering.html
193 printf "<tfoot>\n<tr>\n" >> ${html_file.extra_files_path}/outputClustering.html
194
195 #if $dataToCluster.dataToCluster_selector=="foldChange" or ($dataToCluster.dataToCluster_selector=="expression" and $dataToCluster.filtering_step.filtering_step_selector!="no" and $dataToCluster.filtering_step.select_filtering.filtering_stepBis_selector=="diffExpParam"):
196 awk 'BEGIN{FS="\t"} NR==2{for(i=1;i<=NF;i++)printf "<th>"\$i"</th>\n"}' $outputData >> ${html_file.extra_files_path}/outputClustering.html
197 #else
198 awk 'BEGIN{FS="\t"} NR==1{for(i=1;i<=NF;i++)printf "<th>"\$i"</th>\n"}' $outputData >> ${html_file.extra_files_path}/outputClustering.html
199 #end if
200
201 printf "<th></th>\n" >> ${html_file.extra_files_path}/outputClustering.html
202 printf "</tr>\n</tfoot>\n">> ${html_file.extra_files_path}/outputClustering.html
203 printf "<tbody>\n" >> ${html_file.extra_files_path}/outputClustering.html
204
205 #if $dataToCluster.dataToCluster_selector=="foldChange" or ($dataToCluster.dataToCluster_selector=="expression" and $dataToCluster.filtering_step.filtering_step_selector!="no" and $dataToCluster.filtering_step.select_filtering.filtering_stepBis_selector=="diffExpParam"):
206 awk 'BEGIN{FS="\t"} NR>2{printf "<tr>\n";for(i=1;i<=NF;i++){printf "<th>"\$i"</th>\n"};printf "<th></th>\n";printf "</tr>\n"}' $outputData >> ${html_file.extra_files_path}/outputClustering.html
207 #else
208 awk 'BEGIN{FS="\t"} NR>1{printf "<tr>\n";for(i=1;i<=NF;i++){printf "<th>"\$i"</th>\n"};printf "<th></th>\n";printf "</tr>\n"}' $outputData >> ${html_file.extra_files_path}/outputClustering.html
209 #end if
210
211 printf "</tbody>\n" >> ${html_file.extra_files_path}/outputClustering.html
212
213 printf "</table>
214 </body>
215 </html>" >> ${html_file.extra_files_path}/outputClustering.html
216
217 ]]>
218 </configfile>
219
220 <configfile name="scriptTransfer">
221 <![CDATA[
222
223 mkdir -p $html_file.extra_files_path
224
225
226 ##create HTML file for clustering output table
227 source $scriptTableToHtml
228
229 ##check outputClustering.html is here
230 if ! [ -e ${html_file.extra_files_path}/outputClustering.html ]; then
231 printf "[ERROR]outputClustering.html is missing.\n" >> $log;
232 exit 10
233 fi
234
235 #write header of html file
236 printf "<!DOCTYPE html>\n<html>\n<body>" > $html_file
237
238
239 ##first add reference of the clustering output table
240 printf "<h3>Clustering tabular</h3>\n" >> $html_file
241 printf "<a href=\"outputClustering.html\">Clustering results</a>\n" >> $html_file
242
243
244 ##manage heatmap file
245
246
247 if [ -e ./plotLyDir/Heatmap.html ]; then
248
249 printf "<h3>Heatmap plot</h3>\n" >> $html_file
250
251 ##modify HTML to point to the first script folder
252 sed -i "s/Heatmap_files/PlotLy_Heatmap_scripts/g" ./plotLyDir/Heatmap.html
253
254 ##copy HTML files in both folders
255 cp ./plotLyDir/Heatmap.html ${html_file.extra_files_path}/Heatmap.html
256
257 ##add HTML link
258 printf "<a href=\"Heatmap.html\">Heatmap</a>\n" >> $html_file
259
260 #if $advSection.imagePlotlyFormat=="svg":
261 ##before copying scripts folder modify them to replace png snapshot with svg (not proud of solution but seems to work)
262 cd ./plotLyDir/Heatmap_files/plotly-main-*/
263 awk '{gsub("\"png\"","\"svg\"",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js
264 awk '{gsub("Download plot as a png","Download plot as a svg",\$0);print \$0}' ./plotly-latest.minTemp.js > ./plotly-latest.min.js
265 rm ./plotly-latest.minTemp.js
266 cd ../../../
267 #end if
268
269 #if $advSection.scaleSnapshot!="1.0":
270 ##before copying scripts folder modify scale parameter (not proud of solution but seems to work)
271 cd ./plotLyDir/Heatmap_files/plotly-main-*/
272 awk '{gsub("h=t\\.scale\\|\\|1","h=$advSection.scaleSnapshot",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js
273 mv -f ./plotly-latest.minTemp.js ./plotly-latest.min.js
274 cd ../../../
275 #end if
276
277 ##now copy scripts folder
278 cp -r ./plotLyDir/Heatmap_files $html_file.extra_files_path
279 mv ${html_file.extra_files_path}/Heatmap_files ${html_file.extra_files_path}/PlotLy_Heatmap_scripts
280
281 else
282 printf "[ERROR]Heatmap.html is missing.\n" >> $log;
283 exit 10
284 fi
285
286
287
288
289 ##manage screePlot files
290
291
292 if [ -e ./plotLyDir/screePlot.html ]; then
293
294 printf "<h3>Scree plot</h3>\n" >> $html_file
295
296 ##modify HTML to point to the first script folder
297 sed -i "s/screePlot_files/PlotLy_screePlot_scripts/g" ./plotLyDir/screePlot.html
298
299 ##copy HTML files in both folders
300 cp ./plotLyDir/screePlot.html ${html_file.extra_files_path}/screePlot.html
301
302 ##add HTML link
303 printf "<a href=\"screePlot.html\">Scree plot</a>\n" >> $html_file
304
305 #if $advSection.imagePlotlyFormat=="svg":
306 ##before copying scripts folder modify them to replace png snapshot with svg (not proud of solution but seems to work)
307 cd ./plotLyDir/screePlot_files/plotly-main-*/
308 awk '{gsub("\"png\"","\"svg\"",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js
309 awk '{gsub("Download plot as a png","Download plot as a svg",\$0);print \$0}' ./plotly-latest.minTemp.js > ./plotly-latest.min.js
310 rm ./plotly-latest.minTemp.js
311 cd ../../../
312 #end if
313
314 #if $advSection.scaleSnapshot!="1.0":
315 ##before copying scripts folder modify scale parameter (not proud of solution but seems to work)
316 cd ./plotLyDir/screePlot_files/plotly-main-*/
317 awk '{gsub("h=t\\.scale\\|\\|1","h=$advSection.scaleSnapshot",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js
318 mv -f ./plotly-latest.minTemp.js ./plotly-latest.min.js
319 cd ../../../
320 #end if
321
322 ##now copy scripts folder
323 cp -r ./plotLyDir/screePlot_files $html_file.extra_files_path
324 mv ${html_file.extra_files_path}/screePlot_files ${html_file.extra_files_path}/PlotLy_screePlot_scripts
325
326 else
327 printf "[WARNING]screeplot.html is missing, probably due to limited number of genes.\n" >> $log;
328 fi
329
330
331 ##manage circular files
332
333
334 if [ -e ./plotDir/circularPlot.${advSection.imageFormat} ]; then
335
336 cp ./plotDir/circularPlot.${advSection.imageFormat} ${html_file.extra_files_path}/circularPlot.${advSection.imageFormat}
337
338 printf "<h3>Circular plot</h3>\n" >> $html_file
339
340 ##add HTML link
341 printf "<a href=\"circularPlot.${advSection.imageFormat}\">Circular plot</a>\n" >> $html_file
342
343 else
344 printf "[WARNING]circularPlot file is missing, probably due to limited number of genes.\n" >> $log;
345 fi
346
347
348
349 ##create footer of HTML file
350 printf "</body>\n</html>" >> $html_file
351
352 ]]>
353 </configfile>
354 </configfiles>
355
356
357
358 <inputs>
359 <param type="text" name="title" value="Heatmap_toPersonalize" label="Title for output"/>
360
361 <conditional name="dataToCluster">
362 <param name="dataToCluster_selector" type="select" label="Data to cluster">
363 <option value="expression" selected="true">Expression data</option>
364 <option value="foldChange">Differential expression analysis results</option>
365 <option value="genericData">Generic data table</option>
366 </param>
367 <when value="expression">
368
369 <param type="data" name="expressionData" format="tabular" label="Normalized expression tabular file" multiple="false"/>
370
371 <conditional name="reorder_sample">
372 <param name="reordering_selector" type="select" label="Reorder samples">
373 <option value="no" selected="true">No reordering</option>
374 <option value="factorFile">Reorder sample based on a factors file</option>
375 </param>
376 <when value="factorFile">
377 <param type="data" name="factorFileData" format="tabular" label="Factors file" multiple="false" help="Be sure the conditions clusters number is set to 1 in advanced parameters."/>
378 <param name="factorToUse" type="select" optional="false" multiple="true" label="Select factor(s) to use for reordering" refresh_on_change="true" dynamic_options="get_column_names(reorder_sample['factorFileData'].file_name,0)">
379 <validator type="empty_field" message="You should specify at least one factor"></validator>
380 </param>
381 </when>
382 <when value="no">
383 </when>
384 </conditional>
385
386 <conditional name="filtering_step">
387 <param name="filtering_step_selector" type="select" label="Probes/genes filtering">
388 <option value="no" selected="true">No filtering</option>
389 <option value="input">Filter input probes/genes before clustering</option>
390 <option value="output">Filter probes/genes after clustering (for tabular output)</option>
391 </param>
392 <when value="input">
393 <conditional name="select_filtering">
394 <param name="filtering_stepBis_selector" type="select" label="Filter">
395 <option value="diffExpParam" selected="true">Based on differential expression results (FC and p-val)</option>
396 <option value="geneList">From list of genes</option>
397 </param>
398 <when value="diffExpParam">
399 <param type="data" name="differentialAnalysis" format="tabular" label="Differential analysis tabular file (as given by LIMMA diff.exp. tool)" optional="false" multiple="false">
400 </param>
401
402 <param name="comparisonsToInclude" type="select" optional="false" multiple="true" label="Select comparisons to use for filtering" refresh_on_change="true" dynamic_options="get_column_names_filteredList(select_filtering['differentialAnalysis'].file_name,[0,1],5)">
403 <validator type="empty_field" message="You should specify one factor"></validator>
404 </param>
405
406 <param name="FCthreshold" type="float" value="2" label="Fold change threshold for input (both 'threshold' and '1/threshold' values will be used)" help="Minimum value is 1 (ie. all probes/genes are kept)" >
407 <validator type="in_range" min="1" exclude_min="false" message="Threshold should be greater than 1"/>
408 </param>
409 <param name="pvalThreshold" type="float" value="0.05" label="FDR p-val threshold for input" help="When several comparisons are selected a conservative rule is applied (see details below)" >
410 <validator type="in_range" min="0" max="1" message="Threshold should be between 0 and 1"/>
411 </param>
412 </when>
413 <when value="geneList">
414 <param type="data" format="tabular" name="geneListFile" label="List of genes to keep" multiple="false" help="Gene names should be the same as written in expression file"/>
415 </when>
416 </conditional>
417 </when>
418
419 <when value="output">
420 <conditional name="select_filtering">
421 <param name="filtering_stepBis_selector" type="select" label="Filter">
422 <option value="diffExpParam" selected="true">Based on differential expression results (FC and p-val)</option>
423 <option value="geneList">From list of genes</option>
424 </param>
425 <when value="diffExpParam">
426 <param type="data" name="differentialAnalysis" format="tabular" label="Differential analysis tabular file (as given by LIMMA diff.exp. tool)" optional="false" multiple="false">
427 </param>
428
429 <param name="comparisonsToInclude" type="select" optional="false" multiple="true" label="Select comparisons to use for filtering" refresh_on_change="true" dynamic_options="get_column_names_filteredList(select_filtering['differentialAnalysis'].file_name,[0,1],5)">
430 <validator type="empty_field" message="You should specify one factor"></validator>
431 </param>
432
433 <param name="FCthreshold" type="float" value="2" label="Fold change threshold for output (both 'threshold' and '1/threshold' values will be used)" help="Minimum value is 1 (ie. all probes/genes are kept)" >
434 <validator type="in_range" min="1" exclude_min="false" message="Threshold should be greater than 1"/>
435 </param>
436 <param name="pvalThreshold" type="float" value="0.05" label="FDR p-val threshold for output" help="When several comparisons are selected a conservative rule is applied (see details below)">
437 <validator type="in_range" min="0" max="1" message="Threshold should be between 0 and 1"/>
438 </param>
439 </when>
440 <when value="geneList">
441 <param type="data" format="tabular" name="geneListFile" label="List of genes to keep" multiple="false" help="Gene names should be the same as written in expression file"/>
442 </when>
443 </conditional>
444 </when>
445 <when value="no">
446 </when>
447 </conditional>
448
449 </when>
450
451 <when value="foldChange">
452
453 <param type="data" name="differentialAnalysis" format="tabular" label="Differential analysis tabular file (as given by LIMMA diff.exp. tool)" optional="false" multiple="false">
454 </param>
455
456 <param name="comparisonsToInclude" type="select" optional="false" multiple="true" label="Select comparisons to cluster" refresh_on_change="true" dynamic_options="get_column_names_filteredList(dataToCluster['differentialAnalysis'].file_name,[0,1],5)">
457 <validator type="empty_field" message="You should specify one factor"></validator>
458 </param>
459
460 <conditional name="filtering_step">
461 <param name="filtering_step_selector" type="select" label="Probes/genes filtering">
462 <option value="no" selected="true">No filtering</option>
463 <option value="input">Filter input probes/genes before clustering</option>
464 <option value="output">Filter probes/genes only in tabular output file</option>
465 </param>
466 <when value="input">
467 <conditional name="select_filtering">
468 <param name="filtering_stepBis_selector" type="select" label="Filter">
469 <option value="diffExpParam" selected="true">Based on differential expression results (FC and p-val)</option>
470 <option value="geneList">From list of genes</option>
471 </param>
472 <when value="diffExpParam">
473 <param name="FCthreshold" type="float" value="2" label="Fold change threshold for input (both 'threshold' and '1/threshold' values will be used)" help="Minimum value is 1 (ie. all probes/genes are kept)" >
474 <validator type="in_range" min="1" exclude_min="false" message="FC threshold should be greater than 1"/>
475 </param>
476 <param name="pvalThreshold" type="float" value="0.05" label="FDR p-val threshold for input" help="When several comparisons are selected a conservative rule is applied (see details below)" >
477 <validator type="in_range" min="0" max="1" message="Threshold should be between 0 and 1"/>
478 </param>
479 </when>
480 <when value="geneList">
481 <param type="data" format="tabular" name="geneListFile" label="List of genes to keep" multiple="false" help="Gene names should be the same as written in diff. exp. analysis file"/>
482 </when>
483 </conditional>
484 </when>
485
486 <when value="output">
487 <conditional name="select_filtering">
488 <param name="filtering_stepBis_selector" type="select" label="Filter">
489 <option value="diffExpParam" selected="true">Based on diff. exp. parameters (FC and p-val)</option>
490 <option value="geneList">From list of genes</option>
491 </param>
492 <when value="diffExpParam">
493 <param name="FCthreshold" type="float" value="2" label="Fold change threshold for output (both 'threshold' and '1/threshold' values will be used)" help="Minimum value is 1 (ie. all probes/genes are kept)">
494 <validator type="in_range" min="1" exclude_min="false" message="Threshold should be greater than 1"/>
495 </param>
496 <param name="pvalThreshold" type="float" value="0.05" label="FDR p-val threshold for output" help="When several comparisons are selected a conservative rule is applied (see details below)">
497 <validator type="in_range" min="0" max="1" message="Threshold should be between 0 and 1"/>
498 </param>
499 </when>
500 <when value="geneList">
501 <param type="data" format="tabular" name="geneListFile" label="List of genes to keep" multiple="false" help="Gene names should be the same as written in diff. exp. analysis file"/>
502 </when>
503 </conditional>
504 </when>
505 <when value="no">
506 </when>
507 </conditional>
508
509 </when>
510
511 <when value="genericData">
512
513 <param type="data" name="expressionData" format="tabular" label="Generic tabular file" multiple="false"/>
514
515 <param name="columnToKeep" type="select" optional="false" multiple="true" label="Select column to cluster" refresh_on_change="true" dynamic_options="get_column_names_filteredList(dataToCluster['expressionData'].file_name,[0])">
516 <validator type="empty_field" message="You should select at least on column"></validator>
517 </param>
518
519 <conditional name="reorder_sample">
520 <param name="reordering_selector" type="select" label="Reorder columns">
521 <option value="no" selected="true">No reordering</option>
522 <option value="factorFile">Reorder comlumns based on a factors file</option>
523 </param>
524 <when value="factorFile">
525 <param type="data" name="factorFileData" format="tabular" label="Factors file" multiple="false" help="Be sure the conditions clusters number is set to 1 in advanced parameters."/>
526 <param name="factorToUse" type="select" optional="false" multiple="true" label="Select factor(s) to use for reordering" refresh_on_change="true" dynamic_options="get_column_names(reorder_sample['factorFileData'].file_name,0)">
527 <validator type="empty_field" message="You should specify at least one factor"></validator>
528 </param>
529 </when>
530 <when value="no">
531 </when>
532 </conditional>
533
534 <conditional name="filtering_step">
535 <param name="filtering_step_selector" type="select" label="Probes/genes filtering">
536 <option value="no" selected="true">No filtering</option>
537 <option value="input">Filter input probes/genes before clustering</option>
538 <option value="output">Filter probes/genes after clustering (for tabular output)</option>
539 </param>
540 <when value="input">
541 <conditional name="select_filtering">
542 <param name="filtering_stepBis_selector" type="select" label="Filter">
543 <option value="diffExpParam" selected="true">Based on tabular file content</option>
544 <option value="geneList">From list of genes</option>
545 </param>
546 <when value="diffExpParam">
547 <param type="data" name="differentialAnalysis" format="tabular" label="Tabular file containing filtering information" optional="false" multiple="false">
548 </param>
549
550 <param name="comparisonsToIncludeLow" type="select" optional="true" multiple="true" label="Select columns to consider for low filtering (keeping rows with higher value than a low threshold, ae. FC)" refresh_on_change="true" dynamic_options="get_column_names_filteredList(select_filtering['differentialAnalysis'].file_name,[0])">
551 </param>
552
553 <param name="valThresholdLow" type="float" value="0.0" optional="true" label="Low filtering threshold" help="When several comparisons are selected a conservative rule is applied (see details below)">
554 </param>
555
556 <param name="comparisonsToIncludeHigh" type="select" optional="true" multiple="true" label="Select columns to consider for high filtering (keeping rows with lower value than a high threshold, ae. p-value)" refresh_on_change="true" dynamic_options="get_column_names_filteredList(select_filtering['differentialAnalysis'].file_name,[0])">
557 </param>
558
559 <param name="valThresholdHigh" type="float" value="0.0" optional="true" label="High filtering threshold" help="When several columns are selected a conservative rule is applied (see details below)" >
560 </param>
561 </when>
562 <when value="geneList">
563 <param type="data" format="tabular" name="geneListFile" label="List of genes to keep" multiple="false" help="Gene names should be the same as written in input file"/>
564 </when>
565 </conditional>
566 </when>
567
568 <when value="output">
569 <conditional name="select_filtering">
570 <param name="filtering_stepBis_selector" type="select" label="Filter">
571 <option value="diffExpParam" selected="true">Based on tabular file content</option>
572 <option value="geneList">From list of genes</option>
573 </param>
574 <when value="diffExpParam">
575 <param type="data" name="differentialAnalysis" format="tabular" label="Tabular file containing filtering information" optional="false" multiple="false">
576 </param>
577
578 <param name="comparisonsToIncludeLow" type="select" optional="true" multiple="true" label="Select columns to consider for low filtering (keeping rows with higher value than a low threshold, ae. FC)" refresh_on_change="true" dynamic_options="get_column_names_filteredList(select_filtering['differentialAnalysis'].file_name,[0])">
579 </param>
580
581 <param name="valThresholdLow" type="float" value="0.0" optional="true" label="Low filtering threshold" help="When several comparisons are selected a conservative rule is applied (see details below)">
582 </param>
583
584 <param name="comparisonsToIncludeHigh" type="select" optional="true" multiple="true" label="Select columns to consider for high filtering (keeping rows with lower value than a high threshold, ae. p-value)" refresh_on_change="true" dynamic_options="get_column_names_filteredList(select_filtering['differentialAnalysis'].file_name,[0])">
585 </param>
586
587 <param name="valThresholdHigh" type="float" value="0.0" optional="true" label="High filtering threshold" help="When several columns are selected a conservative rule is applied (see details below)" >
588 </param>
589 </when>
590 <when value="geneList">
591 <param type="data" format="tabular" name="geneListFile" label="List of genes to keep" multiple="false" help="Gene names should be the same as written in input file"/>
592 </when>
593 </conditional>
594 </when>
595 <when value="no">
596 </when>
597 </conditional>
598
599 </when>
600 </conditional>
601
602 <section name="advSection" title="Advanced parameters" expanded="false">
603
604 <param name="clusterNumber" type="integer" value="5" label="Requested number of genes clusters" help="Use scree plot to adjust the number of genes clusters">
605 <validator type="in_range" min="2" message="Cluster number should be greater than 1"/>
606 </param>
607
608 <param name="conditionClusterNumber" type="integer" value="1" label="Requested number of conditions clusters (1 = no clustering)">
609 <validator type="in_range" min="1" message="Cluster number should be greater than 0"/>
610 </param>
611
612 <param name="dataTransformation" type="select" label="Apply mathematical transformation to data before clustering">
613 <option value="no" selected="true">No</option>
614 <option value="log">Natural Logarithm</option>
615 <option value="log2">Base 2 Logarithm</option>
616 </param>
617
618 <param name="distanceMeasure" type="select" label="Distance measure used for clustering" help="See documentation of 'Dist' R package for more information">
619 <option value="euclidean" selected="true">euclidean</option>
620 <option value="manhattan">manhattan</option>
621 <option value="binary">binary</option>
622 <option value="pearson">pearson</option>
623 <option value="spearman">spearman</option>
624 <option value="kendall">kendall</option>
625 </param>
626
627 <param name="aggloMethod" type="select" label="Agglomeration method used for clustering" help="See documentation of 'hclust' R method for more information">
628 <option value="complete">complete</option>
629 <option value="median">median</option>
630 <option value="centroid">centroid</option>
631 <option value="average">average</option>
632 <option value="single">single</option>
633 <option value="mcquitty">mcquitty</option>
634 <option value="ward.D">ward1</option>
635 <option value="ward.D2" selected="true">ward2</option>
636 </param>
637
638 <conditional name="conditionBarColor">
639 <param name="conditionBarColor_selector" type="select" label="Add side bar color for samples/comparisons">
640 <option value="no" selected="true">No</option>
641 <option value="yes">Yes please</option>
642 </param>
643 <when value="yes">
644 <param type="data" name="factorFileDataBarPlot" format="tabular" label="Factors file" multiple="false" help="Available only for expression data clustering"/>
645 <param name="factorToUse" type="select" optional="false" multiple="false" label="Select factor to use for coloring side bar" refresh_on_change="true" dynamic_options="get_column_names(conditionBarColor['factorFileDataBarPlot'].file_name,0)">
646 <validator type="empty_field" message="You should specify one factor"></validator>
647 </param>
648 <param name="sideBarPalette" type="select" label="Side bar color palette">
649 <option value="Spectral" selected="true">Spectral</option>
650 <option value="Set1">Set1</option>
651 <option value="Set2">Set2</option>
652 <option value="Set3">Set3</option>
653 <option value="RdYlBu">RdYlBu</option>
654 <option value="RdYlGn">RdYlGn</option>
655 <option value="PiYG">PiYG</option>
656 </param>
657 </when>
658 <when value="no">
659 </when>
660 </conditional>
661
662 <param name="maxSampleToPlot" type="integer" value="1000" label="Maximum gene number to plot">
663 <validator type="in_range" min="2" message="The number should be greater than 1"/>
664 </param>
665
666 <conditional name="select_color">
667 <param type="boolean" name="specifyColors" checked="false" label="Personalized heatmap colors">
668 </param>
669 <when value="true">
670 <param name="featureMin_color" type="color" label="Min value color" value="#ff00ff">
671 </param>
672
673 <param name="featureMedium_color" type="color" label="Medium value color" value="#4455ff">
674 </param>
675
676 <param name="featureMax_color" type="color" label="Max value color" value="#00ffff">
677 </param>
678 </when>
679 <when value="false">
680 </when>
681 </conditional>
682
683 <param type="select" name="imageFormat" display="radio" label="Output format">
684 <option value="png">PNG format</option>
685 <option value="pdf">PDF format</option>
686 </param>
687 <param type="select" name="imagePlotlyFormat" display="radio" label="Html snapshot format">
688 <option value="png">PNG format</option>
689 <option value="svg">SVG format</option>
690 </param>
691 <param name="scaleSnapshot" type="float" value="1.0" label="Scale html snapshots to increase resolution" help="Minimum value is 1.0 (default resolution)" >
692 <validator type="in_range" min="1.0" exclude_min="false" message="Scale should be greater than 1"/>
693 </param>
694 </section>
695
696 </inputs>
697
698
699
700 <outputs>
701 <data format="tabular" name="outputData" label="${title}_ClusteringResults"/>
702
703 <data format="html" name="html_file" label="${title}_HTML.html"/>
704 <!--
705 <collection name="outputHeatmap" label="${title}_Heatmap" type="list">
706 <discover_datasets pattern="(?P&lt;designation&gt;Heatmap.*)\.(?P&lt;ext&gt;[^\._]+)?" directory="plotDir" visible="false"/>
707 <discover_datasets pattern="(?P&lt;designation&gt;screePlot.*)\.(?P&lt;ext&gt;[^\._]+)?" directory="plotDir" visible="false"/>
708 <discover_datasets pattern="(?P&lt;designation&gt;circularPlot.*)\.(?P&lt;ext&gt;[^\._]+)?" directory="plotDir" visible="false"/>
709 </collection>
710 -->
711 <data format="txt" name="log" label="${title}_Log" />
712 </outputs>
713
714
715
716 <tests>
717 <test maxseconds="7200">
718 <param name="dataToCluster_selector" value="expression" />
719 <param name="expressionData" value="./NormalizedData.tabular" />
720 <param name="filtering_step_selector" value="input" />
721 <param name="filtering_stepBis_selector" value="diffExpParam" />
722 <param name="differentialAnalysis" value="./LIMMAstatistics.tabular" />
723 <param name="comparisonsToInclude" value="WT*WY14643-KO*WY14643" />
724 <param name="FCthreshold" value="1.2" />
725 <param name="pvalThreshold" value="0.05" />
726 <output name="log" file="./HierarchicalClustering/ExpressionClustering.log" lines_diff="6" />
727 </test>
728 <test maxseconds="7200">
729 <param name="dataToCluster_selector" value="foldChange" />
730 <param name="differentialAnalysis" value="./LIMMAstatistics.tabular" />
731 <param name="comparisonsToInclude" value="WT*WY14643+KO*WY14643-WT*Control-KO*Control,WT*WY14643+WT*Control-KO*WY14643-KO*Control" />
732 <param name="filtering_step_selector" value="output" />
733 <param name="filtering_stepBis_selector" value="diffExpParam" />
734 <param name="FCthreshold" value="1.2" />
735 <param name="pvalThreshold" value="0.05" />
736 <output name="outputData" file="./HierarchicalClustering/foldChangeClustering.tabular" />
737 <output name="log" file="./HierarchicalClustering/foldChangeClustering.log" lines_diff="6" />
738 </test>
739 </tests>
740
741
742
743 <help>
744 <![CDATA[
745 **What it does**
746
747 Run hierarchical clustering on gene expression data or differential expression analysis (from arrays and RNA-seq studies) and diplay correponding heatmap.
748
749 -----
750
751 **Parameters**
752
753 \- **Title** to personalize output file names (please avoid special characters).
754
755 \- **Data to cluster**, genes can be clustered based on : expression data, results from differential analysis tool or any tabular file content.
756
757
758 - **Expression data** with samples as columns and genes as rows (header row contains sample names and first column gene identifiers).
759
760 ::
761
762 Conditions 157_(HuGene-2_0-st).CEL 156_(HuGene-2_0-st).CEL 155_(HuGene-2_0-st).CEL 154_(HuGene-2_0-st).CEL
763 DDX11L2 4.500872 4.429759 4.780281 4.996189
764 MIR1302-2 3.415065 3.520472 3.471503 3.567988
765 OR4F5 3.737956 3.011586 3.424494 3.497545
766 VWA1 5.189621 5.129595 4.806793 5.227014
767
768
769 - **Differential expression analysis results** with contrasts statistics (p-val, FDR p-val, FC, log2(FC) and t-statistic) as columns and genes as rows (first and second rows contain comparison definition and first and second columns contain gene identifiers and functional informations). Please respect the GIANT-Differential Expression Analysis tool output format.
770
771 ::
772
773 LIMMA comparison WT*Treat WT*Treat WT*Treat WT*Treat WT*Treat
774 Gene Info p-val FDR.p-val FC log2(FC) t-stat
775 ARSD na 0.0057 0.41 0.8389 -0.2534 -5.175
776 TTTY10 na 1.6e-07 0.0074 0.6403 -0.6432 -6.122
777 MIR548AL na 0.072 0.2914 1.711 0.775 10.43
778
779 \- **Comparisons to cluster** when clustering is performed on differential results, log2(FC) values of selected comparisons will be used.
780
781 - **Generic tabular data** with samples as columns and genes as rows (header row contains sample names and first column gene identifiers).
782
783 ::
784
785 Conditions SampleA SampleB SampleC SampleD
786 DDX11L2 4.500872 4.429759 4.780281 4.996189
787 MIR1302-2 3.415065 3.520472 3.471503 3.567988
788 OR4F5 3.737956 3.011586 3.424494 3.497545
789 VWA1 5.189621 5.129595 4.806793 5.227014
790
791 \- **Samples to cluster** when clustering is performed on generic data, user have to select the columns to consider in clustering (first column, containing gene identifiers, will be automatically selected).
792
793
794 \- **Reorder samples** (only available for expression and generic data clustering).
795
796 - **Based on a factors file**, samples will be sorted in an alphabetical/numerical order for the selected factors. Names in the 1st column of the factors file have to match with the columns names of the data to cluster.
797
798 ::
799
800 Conditions Sex Treatment Reaction
801 154_(HuGene-2_0-st).CEL 1 TreatA Pos
802 156_(HuGene-2_0-st).CEL 0 NoTreat Pos
803 157_(HuGene-2_0-st).CEL 0 TreatB Neg
804 155_(HuGene-2_0-st).CEL 0 NoTreat Neg
805
806 \- **Genes filtering** can be applied before or after clustering step.
807
808 - **Filtering before clustering** allows to restrict clustering to differentially expressed genes using differential analysis results (available for expression data and differential results clustering) or any generic file (available for generic data clustering). As an alternative, a specific gene list file can be directly used for filtering.
809
810 - **Filtering after clustering** will have no effect on clustering or generated heatmaps. This filter is only applied to generated tabular files to keep differentially expressed genes (using differential analysis file or any generic file) or specific user defined genes (using gene list file).
811
812 \- **Filter approaches** : three filtering strategies can be applied before/after clustering depending on the nature of clustered data. These strategies use : differential analysis results (available for expression data and differential results clustering), generic file content (available for generic data clustering) or a gene list file (available for any input data).
813
814 - **From differential analysis results** to filter genes based on fold change and FDR p-val for selected comparisons.
815
816 \- **Differential expression results file** is requested only for expression data clustering. For differential results clustering, the same differential results file selected as "data to cluster" is used. (see "Data to cluster section" for requested format)
817
818 \- **Comparisons to use** are requested only for expression data clustering. For differential results clustering, the same comparisons selected in "data to cluster" section will be used. If several comparisons are selected, genes that satisfy both fold change and FDR p-val thresholds in at least one of these comparisons are kept.
819
820 \- **Fold change threshold** to use for filtering, genes with fold change >= threshold or fold change <= 1/threshold will be kept (set this threshold to 1 if you do not want to filter on fold change).
821
822 \- **FDR p-val threshold** to use for filtering, genes with FDR p-val <= threshold will be kept (set this threshold to 1 if you do not want to filter on FDR p-val).
823
824
825 - **From generic tabular file** to filter genes based on selected columns values.
826
827 \- **Generic tabular file** contains gene in the first column and various informations used for filtering in the following (same format as clustered generic tabular file).
828
829 \- **Low filtering columns** used to discard rows with values below a given threshold (typically for Fold Change filtering). If several columns are selected, rows satisfying threshold condition in at least one of these columns are kept.
830
831 \- **Low filtering threshold** below which the rows are discarded, the same threshold is applied for all selected columns.
832
833 \- **High filtering columns** used to discard rows with values above a given threshold (typically for p-value filtering). If several columns are selected, rows satisfying threshold condition in at least one of these columns are kept.
834
835 \- **High filtering threshold** above which the rows are discarded, the same threshold is applied for all selected columns.
836
837 - **From list of genes** to focus on pre-identified genes.
838
839 \- **Gene list file** with genes identifiers as one column file without header.
840
841 ::
842
843 DDX11L2
844 VWA1
845 TTTY10
846 ARSD
847
848 -----
849
850 **Advanced parameters**
851
852 \- **Genes cluster number** used by hierarchical clustering (minimum is 2). See generated screeplot to adjust this number before re-running a clustering.
853
854 \- **Samples/comparisons clusters number** used by hierarchical clustering applied on columns/conditions. Set to 1 (ie. no clustering) if you need to conserve input columns order for visualization purposes. Columns clusters information is not included (yet) in output tabular file.
855
856 \- **Mathematical transformation** can be applied to clustered data before clustering and visualization. Data used for the filtering step are not modified by this transformation.
857
858 \- **Distance measure** used to cluster rows and columns.
859
860 \- **Agglomeration method** used to cluster rows and columns.
861
862 \- **Add side bar** to vizualize factor values for displayed columns/conditions, represented as a colored side bar in the heatmap.
863
864 - **Factor file** that contains factor information for coloring (same format as the factor file used for input data columns reordering).
865
866 - **Factor to use** to color side bar depending on its values for displayed columns/conditions.
867
868 - **Color palette used** for coloring factor values (see RColorBrewer R package documentation for more information on proposed palettes).
869
870 \- **Maximum gene number** : for readability and running time considerations only, number of displayed rows (genes) in heatmaps/circular plot can be limited. Clustering information in generated tabular file and scree plot are computed from a global clustering considering all genes (excepting those filtered out before clustering). Heatmap and circular plot are displayed for a random gene selection, to avoid such random selection we advise you to use input filtering option before clustering to have a gene number below this limit.
871
872 \- **Personalized heatmap colors** to build color gradient choosing start, middle and end colors.
873
874 \- **Output format** for circular plots only.
875
876 \- **Html snapshot format** for interactive plotly plots.
877
878 \- **Scale html snapshots** to increase resolution of snapshots taken from interactive plotly plots.
879
880 -----
881
882 **Outputs**
883
884 \- **Tabular clustering file** containing cluster information for each gene satifying filtering steps. If expression or generic data was clustered, a two columns file is generated with gene identifiers and cluster numbers with possibly additional columns containing informations used for filtering. If differential results was clustered, a similar file is returned with an additional column containing cluster numbers and differential statistics coresponding to comparisons used for filtering.
885
886 \- **HTML file** to access interactive version of heatmap and screeplot through PlotLy html pages, circular plot image and tabulated clustering results. As a reminder, when the number of genes to display in heatmap/circular plot exceeds the maximum gene number parameter, a random sampling is performed for plotting efficiency. Thus, clustering displayed on heatmap/circular plot may slighlty differ from clustering information contained in tabular file as heatmap/circular plot clustering is done over a subset of genes whereas tabular file contains clustering results performed on all genes.
887
888 \- **LOG file** containing information about execution. Useful especially if tool execution fails. Please attach this log file in any bug report.
889
890 ]]>
891 </help>
892 <citations>
893 <citation type="bibtex">@misc{vandel_jimmy_2018_1477870, author = {Vandel, J. and Gheeraert, C. and Eeckhoute, J. and Staels, B. and Lefebvre, P. and Dubois-Chevalier, J.}, title = {GIANT: Galaxy-based Interactive tools for ANalaysis of Transcriptomic data}, month = nov, year = 2018, doi = {10.5281/zenodo.1477870}, url = {https://doi.org/10.5281/zenodo.1477870}
894 }</citation>
895
896 <citation type="bibtex">@article{,
897 author = {Galili, Tal and O'Callaghan, Alan and
898 Sidi, Jonathan and Sievert, Carson},
899 title = {heatmaply: an R package for creating interactive cluster
900 heatmaps for online publishing},
901 journal = {Bioinformatics},
902 year = {2017},
903 doi = {10.1093/bioinformatics/btx657},
904 url = {http://dx.doi.org/10.1093/bioinformatics/btx657},
905 eprint =
906 {https://academic.oup.com/bioinformatics/article-pdf/doi/10.1093/bioinformatics/btx657/21358327/btx657.pdf}
907 }</citation>
908
909 <citation type="bibtex">@article{doi:10.1093/bioinformatics/btu393,
910 author = {Gu, Zuguang and Gu, Lei and Eils, Roland and Schlesner, Matthias and Brors, Benedikt},
911 title = {circlize implements and enhances circular visualization in R },
912 journal = {Bioinformatics},
913 volume = {30},
914 number = {19},
915 pages = {2811-2812},
916 year = {2014},
917 doi = {10.1093/bioinformatics/btu393},
918 URL = {http://dx.doi.org/10.1093/bioinformatics/btu393},
919 eprint = {/oup/backfile/content_public/journal/bioinformatics/30/19/10.1093_bioinformatics_btu393/2/btu393.pdf}
920 }</citation>
921
922 <citation type="bibtex">@online{plotly, author = {Plotly Technologies Inc.}, title = {Collaborative data science}, publisher = {Plotly Technologies Inc.}, address = {Montreal, QC}, year = {2015}, url = {https://plot.ly}
923 }</citation>
924
925
926 </citations>
927
928 </tool>