Mercurial > repos > vandelj > giant_hierarchical_clustering
comparison galaxy/wrappers/ExprHeatmapClustering.xml @ 0:14045c80a222 draft
"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit cb276a594444c8f32e9819fefde3a21f121d35df"
author | vandelj |
---|---|
date | Fri, 26 Jun 2020 09:38:23 -0400 |
parents | |
children | 0b09345fa632 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:14045c80a222 |
---|---|
1 <tool name="GIANT-Heatmap and Hierarchical clustering" id="giant_hierarchical_clustering" version="0.5.1"> | |
2 <description>Run hierarchical clustering and plot heatmap from expression data and/or differential expression analysis</description> | |
3 <requirements> | |
4 <requirement type="package" version="4.8.0">r-plotly</requirement> | |
5 <requirement type="package" version="1.12.0">r-dendextend</requirement> | |
6 <requirement type="package" version="0.1_20">r-ggdendro</requirement> | |
7 <requirement type="package" version="3.2.1">r-ggplot2</requirement> | |
8 <requirement type="package" version="0.16.0">r-heatmaply</requirement> | |
9 <requirement type="package" version="0.4.8">r-circlize</requirement> | |
10 <requirement type="package" version="1.18.1">bioconductor-complexheatmap</requirement> | |
11 <requirement type="package" version="2.2.2">pandoc</requirement> | |
12 </requirements> | |
13 <code file='../../src/General_functions.py'/> | |
14 <stdio> | |
15 <regex match="Execution halted" | |
16 source="both" | |
17 level="fatal" | |
18 description="Execution halted, please contact tool developer or administrators." /> | |
19 <regex match="Error in" | |
20 source="both" | |
21 level="fatal" | |
22 description="An error occured during R execution, please contact tool developer." /> | |
23 <exit_code range="10" level="fatal" description="Missing file during html report, see log file for more information." /> | |
24 <exit_code range="1:9" level="fatal" description="Error in R execution, see log file for more information." /> | |
25 </stdio> | |
26 <command> <![CDATA[ | |
27 | |
28 #if ($dataToCluster.dataToCluster_selector=="expression" or $dataToCluster.dataToCluster_selector=="genericData") and $dataToCluster.expressionData: | |
29 | |
30 ##start by selecting specific input data columns depending on user request | |
31 #if $dataToCluster.dataToCluster_selector=="genericData" and $dataToCluster.columnToKeep: | |
32 awk -v columns="$dataToCluster.columnToKeep" 'BEGIN{FS="\t";OFS="";ORS="";split(columns,columnsTab,",")} FNR==1{for(iColumn=1;iColumn<=length(columnsTab);iColumn++)for(iField=2;iField<=NF;iField++){if(\$iField==columnsTab[iColumn])colsToSelect[iColumn]=iField}} {line=\$1;for(iColumn=1;iColumn<=length(columnsTab);iColumn++)line=line"\t"\$colsToSelect[iColumn];print line"\n";}' $dataToCluster.expressionData > ./selectedExpressionData; | |
33 #else | |
34 cp $dataToCluster.expressionData ./selectedExpressionData; | |
35 #end if | |
36 | |
37 ##reorder columns of input data based on factors file | |
38 #if $dataToCluster.reorder_sample.reordering_selector=="factorFile" and $dataToCluster.reorder_sample.factorFileData and $dataToCluster.reorder_sample.factorToUse: | |
39 awk -v factors="$dataToCluster.reorder_sample.factorToUse" 'BEGIN{FS="\t";OFS="";ORS="";split(factors,factorsTab,",")} FNR==1{for(iFactor=1;iFactor<=length(factorsTab);iFactor++)for(iField=2;iField<=NF;iField++){if(\$iField==factorsTab[iFactor])colsToSelect[iFactor]=iField}} FNR>1{line=\$1;for(iFactor=1;iFactor<=length(factorsTab);iFactor++)line=line"\t"\$colsToSelect[iFactor];print line"\n";}' $dataToCluster.reorder_sample.factorFileData > ./orderingFactor; | |
40 | |
41 sort -V -k2 ./orderingFactor > ./orderingSample; | |
42 | |
43 awk 'BEGIN{FS="\t";OFS="";ORS="";factorNumber=0} ARGIND==1{sampleOrdered[FNR]=\$1;factorNumber=FNR} ARGIND==2 && FNR==1{for(iElemt=1;iElemt<=factorNumber;iElemt++)for(iPosit=2;iPosit<=NF;iPosit++)if(\$iPosit==sampleOrdered[iElemt])positOrdered[iElemt]=iPosit} ARGIND==2{line=\$1;for(iElemt=1;iElemt<=factorNumber;iElemt++)if(iElemt in positOrdered)line=line"\t"\$positOrdered[iElemt];print line"\n"}' ./orderingSample ./selectedExpressionData > ./orderedExpressionData; | |
44 | |
45 ##check if some input data columns were lost during the process | |
46 awk 'ARGIND==1 && FNR==1{colNumbA=NF} ARGIND==2 && FNR==1{colNumbB=NF} END{if(colNumbA!=colNumbB) print "[WARNING] "colNumbA-colNumbB" input data columns was removed during reordering due to missing information in factor file!\n"}' ./selectedExpressionData ./orderedExpressionData >> $log; | |
47 | |
48 #if $advSection.conditionClusterNumber!="1": | |
49 printf "[WARNING]Sample clustering option is selected, sample reordering will not be preserved!\n" >> $log; | |
50 #end if | |
51 #else: | |
52 cp ./selectedExpressionData ./orderedExpressionData; | |
53 #end if | |
54 #end if | |
55 | |
56 | |
57 ##generate common file name for differential analysis results depending on input data nature | |
58 #if ($dataToCluster.dataToCluster_selector=="expression" or $dataToCluster.dataToCluster_selector=="genericData") and $dataToCluster.filtering_step.filtering_step_selector!="no" and $dataToCluster.filtering_step.select_filtering.filtering_stepBis_selector=="diffExpParam" and $dataToCluster.filtering_step.select_filtering.differentialAnalysis: | |
59 cp ${dataToCluster.filtering_step.select_filtering.differentialAnalysis} ./filteredDifferentialAnalysis; | |
60 #end if | |
61 #if $dataToCluster.dataToCluster_selector=="foldChange" and $dataToCluster.differentialAnalysis: | |
62 cp $dataToCluster.differentialAnalysis ./filteredDifferentialAnalysis; | |
63 #end if | |
64 | |
65 | |
66 ##generate factor information to use for barplot | |
67 #if $advSection.conditionBarColor.conditionBarColor_selector=="yes" and $advSection.conditionBarColor.factorFileDataBarPlot and $advSection.conditionBarColor.factorToUse: | |
68 awk -v factor="$advSection.conditionBarColor.factorToUse" 'BEGIN{FS="\t";OFS="";ORS=""} NR==1{for(i=2;i<=NF;i++)if(\$i==factor)colToKeep=i} {print \$1"\t"\$colToKeep"\n"}' $advSection.conditionBarColor.factorFileDataBarPlot > ./barPlotFactor; | |
69 #end if | |
70 | |
71 Rscript '$__tool_directory__/../../src/heatMapClustering.R' --log '$log' --outputFile '$outputData' --format '$advSection.imageFormat' --clusterNumber '$advSection.clusterNumber' --maxRows '$advSection.maxSampleToPlot' --sampleClusterNumber '$advSection.conditionClusterNumber' --dataTransformation '$advSection.dataTransformation' --distanceMeasure '$advSection.distanceMeasure' --aggloMethod '$advSection.aggloMethod' | |
72 #if $advSection.select_color.specifyColors=="true": | |
73 --personalColors '$advSection.select_color.featureMin_color,$advSection.select_color.featureMedium_color,$advSection.select_color.featureMax_color' | |
74 #end if | |
75 #if $advSection.conditionBarColor.conditionBarColor_selector=="yes" and $advSection.conditionBarColor.factorFileDataBarPlot and $advSection.conditionBarColor.factorToUse: | |
76 --factorInfo './barPlotFactor' | |
77 --sideBarColorPalette '$advSection.conditionBarColor.sideBarPalette' | |
78 #end if | |
79 #if $dataToCluster.dataToCluster_selector=="genericData": | |
80 --genericData | |
81 #end if | |
82 #if $dataToCluster.dataToCluster_selector=="expression" or $dataToCluster.dataToCluster_selector=="genericData": | |
83 --expressionFile './orderedExpressionData' | |
84 #if $dataToCluster.filtering_step.filtering_step_selector!="no": | |
85 --filterInputOutput '$dataToCluster.filtering_step.filtering_step_selector' | |
86 #if $dataToCluster.filtering_step.select_filtering.filtering_stepBis_selector=="diffExpParam": | |
87 --diffAnalyseFile './filteredDifferentialAnalysis' | |
88 #if $dataToCluster.dataToCluster_selector=="expression": | |
89 --comparisonName '$dataToCluster.filtering_step.select_filtering.comparisonsToInclude' | |
90 --FCthreshold '$dataToCluster.filtering_step.select_filtering.FCthreshold' | |
91 --pvalThreshold '$dataToCluster.filtering_step.select_filtering.pvalThreshold' | |
92 #else: | |
93 #if $dataToCluster.filtering_step.select_filtering.comparisonsToIncludeLow and $dataToCluster.filtering_step.select_filtering.valThresholdLow: | |
94 --comparisonNameLow '$dataToCluster.filtering_step.select_filtering.comparisonsToIncludeLow' | |
95 --FCthreshold '$dataToCluster.filtering_step.select_filtering.valThresholdLow' | |
96 #end if | |
97 #if $dataToCluster.filtering_step.select_filtering.comparisonsToIncludeHigh and $dataToCluster.filtering_step.select_filtering.valThresholdHigh: | |
98 --comparisonNameHigh '$dataToCluster.filtering_step.select_filtering.comparisonsToIncludeHigh' | |
99 --pvalThreshold '$dataToCluster.filtering_step.select_filtering.valThresholdHigh' | |
100 #end if | |
101 #end if | |
102 #else: | |
103 --geneListFiltering '$dataToCluster.filtering_step.select_filtering.geneListFile' | |
104 #end if | |
105 #end if | |
106 #else | |
107 --diffAnalyseFile './filteredDifferentialAnalysis' | |
108 --comparisonName '$dataToCluster.comparisonsToInclude' | |
109 #if $dataToCluster.filtering_step.filtering_step_selector!="no": | |
110 --filterInputOutput '$dataToCluster.filtering_step.filtering_step_selector' | |
111 #if $dataToCluster.filtering_step.select_filtering.filtering_stepBis_selector=="diffExpParam": | |
112 --FCthreshold '$dataToCluster.filtering_step.select_filtering.FCthreshold' | |
113 --pvalThreshold '$dataToCluster.filtering_step.select_filtering.pvalThreshold' | |
114 #else: | |
115 --geneListFiltering '$dataToCluster.filtering_step.select_filtering.geneListFile' | |
116 #end if | |
117 #end if | |
118 #end if | |
119 ; | |
120 ret_code=\$?; | |
121 if [ \$ret_code != 0 ]; then | |
122 exit \$ret_code; | |
123 else | |
124 bash $scriptTransfer; | |
125 ret_code=\$?; | |
126 if [ \$ret_code != 0 ]; then | |
127 exit \$ret_code; | |
128 fi | |
129 fi; | |
130 printf "[INFO]End of tool script" >> $log; | |
131 ]]> | |
132 </command> | |
133 | |
134 | |
135 | |
136 <configfiles> | |
137 <configfile name="scriptTableToHtml"> | |
138 <![CDATA[ | |
139 printf "<!DOCTYPE html> | |
140 <html> | |
141 <head> | |
142 <meta http-equiv=\"Content-type\" content=\"text/html; charset=utf-8\"> | |
143 <link rel=\"stylesheet\" type=\"text/css\" href=\"https://cdn.datatables.net/1.10.16/css/jquery.dataTables.min.css\"> | |
144 <script type=\"text/javascript\" language=\"javascript\" src=\"https://code.jquery.com/jquery-1.12.4.js\"> | |
145 </script> | |
146 <script type=\"text/javascript\" language=\"javascript\" src=\"https://cdn.datatables.net/1.10.16/js/jquery.dataTables.min.js\"> | |
147 </script> | |
148 <script type=\"text/javascript\" class=\"init\"> | |
149 \\$(document).ready(function() { | |
150 \\$(\'\#example\').DataTable( { | |
151 \"columnDefs\": [ { | |
152 \"visible\": false, | |
153 \"targets\": -1 | |
154 } ] | |
155 } ); | |
156 } ); | |
157 </script> | |
158 </head> | |
159 <body style=\"background-color:white;\"> | |
160 <table id=\"example\" class=\"display\" cellspacing=\"0\"> | |
161 " > ${html_file.extra_files_path}/outputClustering.html | |
162 | |
163 printf "<colgroup>\n" >> ${html_file.extra_files_path}/outputClustering.html | |
164 #if $dataToCluster.dataToCluster_selector=="foldChange" or ($dataToCluster.dataToCluster_selector=="expression" and $dataToCluster.filtering_step.filtering_step_selector!="no" and $dataToCluster.filtering_step.select_filtering.filtering_stepBis_selector=="diffExpParam"): | |
165 printf "<col span=\"2\" style=\"background-color:rgb(224,235,235)\">\n" >> ${html_file.extra_files_path}/outputClustering.html | |
166 printf "<col span=\"1\" style=\"background-color:rgb(250,235,235)\">\n" >> ${html_file.extra_files_path}/outputClustering.html | |
167 awk 'BEGIN{odd=1;FS="\t"} NR==1{for(i=4;i<=NF;i=i+5){if(odd==1){odd=0;printf "<col span=\"5\" style=\"background-color:rgb(224,238,255)\">\n"}else{odd=1;printf "<col span=\"5\" style=\"background-color:rgb(255,221,224)\">\n"}}}' $outputData >> ${html_file.extra_files_path}/outputClustering.html | |
168 #else | |
169 printf "<col span=\"1\" style=\"background-color:rgb(224,235,235)\">\n" >> ${html_file.extra_files_path}/outputClustering.html | |
170 printf "<col span=\"1\" style=\"background-color:rgb(250,235,235)\">\n" >> ${html_file.extra_files_path}/outputClustering.html | |
171 awk 'BEGIN{odd=1;FS="\t"} NR==1{for(i=3;i<=NF;i++){if(odd==1){odd=0;printf "<col span=\"1\" style=\"background-color:rgb(224,238,255)\">\n"}else{odd=1;printf "<col span=\"1\" style=\"background-color:rgb(255,221,224)\">\n"}}}' $outputData >> ${html_file.extra_files_path}/outputClustering.html | |
172 #end if | |
173 | |
174 printf "</colgroup>\n" >> ${html_file.extra_files_path}/outputClustering.html | |
175 printf "<thead>\n<tr>\n" >> ${html_file.extra_files_path}/outputClustering.html | |
176 | |
177 #if $dataToCluster.dataToCluster_selector=="foldChange" or ($dataToCluster.dataToCluster_selector=="expression" and $dataToCluster.filtering_step.filtering_step_selector!="no" and $dataToCluster.filtering_step.select_filtering.filtering_stepBis_selector=="diffExpParam"): | |
178 printf "<th rowspan=\"2\">Gene</th>\n" >> ${html_file.extra_files_path}/outputClustering.html | |
179 printf "<th rowspan=\"2\">Info</th>\n" >> ${html_file.extra_files_path}/outputClustering.html | |
180 printf "<th rowspan=\"2\">Cluster</th>\n" >> ${html_file.extra_files_path}/outputClustering.html | |
181 awk 'BEGIN{FS="\t"} NR==1{for(i=4;i<=NF;i=i+5)printf "<th colspan=\"5\">"\$i"</th>\n"}' $outputData >> ${html_file.extra_files_path}/outputClustering.html | |
182 printf "<th></th>\n" >> ${html_file.extra_files_path}/outputClustering.html | |
183 printf "</tr>\n<tr>\n" >> ${html_file.extra_files_path}/outputClustering.html | |
184 awk 'BEGIN{FS="\t"} NR==2{for(i=4;i<=NF;i++)printf "<th>"\$i"</th>\n"}' $outputData >> ${html_file.extra_files_path}/outputClustering.html | |
185 #else | |
186 printf "<th rowspan=\"1\">Gene</th>\n" >> ${html_file.extra_files_path}/outputClustering.html | |
187 printf "<th rowspan=\"1\">Cluster</th>\n" >> ${html_file.extra_files_path}/outputClustering.html | |
188 awk 'BEGIN{FS="\t"} NR==1{for(i=3;i<=NF;i++)printf "<th colspan=\"1\">"\$i"</th>\n"}' $outputData >> ${html_file.extra_files_path}/outputClustering.html | |
189 #end if | |
190 | |
191 printf "<th></th>\n" >> ${html_file.extra_files_path}/outputClustering.html | |
192 printf "</tr>\n</thead>\n" >> ${html_file.extra_files_path}/outputClustering.html | |
193 printf "<tfoot>\n<tr>\n" >> ${html_file.extra_files_path}/outputClustering.html | |
194 | |
195 #if $dataToCluster.dataToCluster_selector=="foldChange" or ($dataToCluster.dataToCluster_selector=="expression" and $dataToCluster.filtering_step.filtering_step_selector!="no" and $dataToCluster.filtering_step.select_filtering.filtering_stepBis_selector=="diffExpParam"): | |
196 awk 'BEGIN{FS="\t"} NR==2{for(i=1;i<=NF;i++)printf "<th>"\$i"</th>\n"}' $outputData >> ${html_file.extra_files_path}/outputClustering.html | |
197 #else | |
198 awk 'BEGIN{FS="\t"} NR==1{for(i=1;i<=NF;i++)printf "<th>"\$i"</th>\n"}' $outputData >> ${html_file.extra_files_path}/outputClustering.html | |
199 #end if | |
200 | |
201 printf "<th></th>\n" >> ${html_file.extra_files_path}/outputClustering.html | |
202 printf "</tr>\n</tfoot>\n">> ${html_file.extra_files_path}/outputClustering.html | |
203 printf "<tbody>\n" >> ${html_file.extra_files_path}/outputClustering.html | |
204 | |
205 #if $dataToCluster.dataToCluster_selector=="foldChange" or ($dataToCluster.dataToCluster_selector=="expression" and $dataToCluster.filtering_step.filtering_step_selector!="no" and $dataToCluster.filtering_step.select_filtering.filtering_stepBis_selector=="diffExpParam"): | |
206 awk 'BEGIN{FS="\t"} NR>2{printf "<tr>\n";for(i=1;i<=NF;i++){printf "<th>"\$i"</th>\n"};printf "<th></th>\n";printf "</tr>\n"}' $outputData >> ${html_file.extra_files_path}/outputClustering.html | |
207 #else | |
208 awk 'BEGIN{FS="\t"} NR>1{printf "<tr>\n";for(i=1;i<=NF;i++){printf "<th>"\$i"</th>\n"};printf "<th></th>\n";printf "</tr>\n"}' $outputData >> ${html_file.extra_files_path}/outputClustering.html | |
209 #end if | |
210 | |
211 printf "</tbody>\n" >> ${html_file.extra_files_path}/outputClustering.html | |
212 | |
213 printf "</table> | |
214 </body> | |
215 </html>" >> ${html_file.extra_files_path}/outputClustering.html | |
216 | |
217 ]]> | |
218 </configfile> | |
219 | |
220 <configfile name="scriptTransfer"> | |
221 <![CDATA[ | |
222 | |
223 mkdir -p $html_file.extra_files_path | |
224 | |
225 | |
226 ##create HTML file for clustering output table | |
227 source $scriptTableToHtml | |
228 | |
229 ##check outputClustering.html is here | |
230 if ! [ -e ${html_file.extra_files_path}/outputClustering.html ]; then | |
231 printf "[ERROR]outputClustering.html is missing.\n" >> $log; | |
232 exit 10 | |
233 fi | |
234 | |
235 #write header of html file | |
236 printf "<!DOCTYPE html>\n<html>\n<body>" > $html_file | |
237 | |
238 | |
239 ##first add reference of the clustering output table | |
240 printf "<h3>Clustering tabular</h3>\n" >> $html_file | |
241 printf "<a href=\"outputClustering.html\">Clustering results</a>\n" >> $html_file | |
242 | |
243 | |
244 ##manage heatmap file | |
245 | |
246 | |
247 if [ -e ./plotLyDir/Heatmap.html ]; then | |
248 | |
249 printf "<h3>Heatmap plot</h3>\n" >> $html_file | |
250 | |
251 ##modify HTML to point to the first script folder | |
252 sed -i "s/Heatmap_files/PlotLy_Heatmap_scripts/g" ./plotLyDir/Heatmap.html | |
253 | |
254 ##copy HTML files in both folders | |
255 cp ./plotLyDir/Heatmap.html ${html_file.extra_files_path}/Heatmap.html | |
256 | |
257 ##add HTML link | |
258 printf "<a href=\"Heatmap.html\">Heatmap</a>\n" >> $html_file | |
259 | |
260 #if $advSection.imagePlotlyFormat=="svg": | |
261 ##before copying scripts folder modify them to replace png snapshot with svg (not proud of solution but seems to work) | |
262 cd ./plotLyDir/Heatmap_files/plotly-main-*/ | |
263 awk '{gsub("\"png\"","\"svg\"",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js | |
264 awk '{gsub("Download plot as a png","Download plot as a svg",\$0);print \$0}' ./plotly-latest.minTemp.js > ./plotly-latest.min.js | |
265 rm ./plotly-latest.minTemp.js | |
266 cd ../../../ | |
267 #end if | |
268 | |
269 #if $advSection.scaleSnapshot!="1.0": | |
270 ##before copying scripts folder modify scale parameter (not proud of solution but seems to work) | |
271 cd ./plotLyDir/Heatmap_files/plotly-main-*/ | |
272 awk '{gsub("h=t\\.scale\\|\\|1","h=$advSection.scaleSnapshot",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js | |
273 mv -f ./plotly-latest.minTemp.js ./plotly-latest.min.js | |
274 cd ../../../ | |
275 #end if | |
276 | |
277 ##now copy scripts folder | |
278 cp -r ./plotLyDir/Heatmap_files $html_file.extra_files_path | |
279 mv ${html_file.extra_files_path}/Heatmap_files ${html_file.extra_files_path}/PlotLy_Heatmap_scripts | |
280 | |
281 else | |
282 printf "[ERROR]Heatmap.html is missing.\n" >> $log; | |
283 exit 10 | |
284 fi | |
285 | |
286 | |
287 | |
288 | |
289 ##manage screePlot files | |
290 | |
291 | |
292 if [ -e ./plotLyDir/screePlot.html ]; then | |
293 | |
294 printf "<h3>Scree plot</h3>\n" >> $html_file | |
295 | |
296 ##modify HTML to point to the first script folder | |
297 sed -i "s/screePlot_files/PlotLy_screePlot_scripts/g" ./plotLyDir/screePlot.html | |
298 | |
299 ##copy HTML files in both folders | |
300 cp ./plotLyDir/screePlot.html ${html_file.extra_files_path}/screePlot.html | |
301 | |
302 ##add HTML link | |
303 printf "<a href=\"screePlot.html\">Scree plot</a>\n" >> $html_file | |
304 | |
305 #if $advSection.imagePlotlyFormat=="svg": | |
306 ##before copying scripts folder modify them to replace png snapshot with svg (not proud of solution but seems to work) | |
307 cd ./plotLyDir/screePlot_files/plotly-main-*/ | |
308 awk '{gsub("\"png\"","\"svg\"",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js | |
309 awk '{gsub("Download plot as a png","Download plot as a svg",\$0);print \$0}' ./plotly-latest.minTemp.js > ./plotly-latest.min.js | |
310 rm ./plotly-latest.minTemp.js | |
311 cd ../../../ | |
312 #end if | |
313 | |
314 #if $advSection.scaleSnapshot!="1.0": | |
315 ##before copying scripts folder modify scale parameter (not proud of solution but seems to work) | |
316 cd ./plotLyDir/screePlot_files/plotly-main-*/ | |
317 awk '{gsub("h=t\\.scale\\|\\|1","h=$advSection.scaleSnapshot",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js | |
318 mv -f ./plotly-latest.minTemp.js ./plotly-latest.min.js | |
319 cd ../../../ | |
320 #end if | |
321 | |
322 ##now copy scripts folder | |
323 cp -r ./plotLyDir/screePlot_files $html_file.extra_files_path | |
324 mv ${html_file.extra_files_path}/screePlot_files ${html_file.extra_files_path}/PlotLy_screePlot_scripts | |
325 | |
326 else | |
327 printf "[WARNING]screeplot.html is missing, probably due to limited number of genes.\n" >> $log; | |
328 fi | |
329 | |
330 | |
331 ##manage circular files | |
332 | |
333 | |
334 if [ -e ./plotDir/circularPlot.${advSection.imageFormat} ]; then | |
335 | |
336 cp ./plotDir/circularPlot.${advSection.imageFormat} ${html_file.extra_files_path}/circularPlot.${advSection.imageFormat} | |
337 | |
338 printf "<h3>Circular plot</h3>\n" >> $html_file | |
339 | |
340 ##add HTML link | |
341 printf "<a href=\"circularPlot.${advSection.imageFormat}\">Circular plot</a>\n" >> $html_file | |
342 | |
343 else | |
344 printf "[WARNING]circularPlot file is missing, probably due to limited number of genes.\n" >> $log; | |
345 fi | |
346 | |
347 | |
348 | |
349 ##create footer of HTML file | |
350 printf "</body>\n</html>" >> $html_file | |
351 | |
352 ]]> | |
353 </configfile> | |
354 </configfiles> | |
355 | |
356 | |
357 | |
358 <inputs> | |
359 <param type="text" name="title" value="Heatmap_toPersonalize" label="Title for output"/> | |
360 | |
361 <conditional name="dataToCluster"> | |
362 <param name="dataToCluster_selector" type="select" label="Data to cluster"> | |
363 <option value="expression" selected="true">Expression data</option> | |
364 <option value="foldChange">Differential expression analysis results</option> | |
365 <option value="genericData">Generic data table</option> | |
366 </param> | |
367 <when value="expression"> | |
368 | |
369 <param type="data" name="expressionData" format="tabular" label="Normalized expression tabular file" multiple="false"/> | |
370 | |
371 <conditional name="reorder_sample"> | |
372 <param name="reordering_selector" type="select" label="Reorder samples"> | |
373 <option value="no" selected="true">No reordering</option> | |
374 <option value="factorFile">Reorder sample based on a factors file</option> | |
375 </param> | |
376 <when value="factorFile"> | |
377 <param type="data" name="factorFileData" format="tabular" label="Factors file" multiple="false" help="Be sure the conditions clusters number is set to 1 in advanced parameters."/> | |
378 <param name="factorToUse" type="select" optional="false" multiple="true" label="Select factor(s) to use for reordering" refresh_on_change="true" dynamic_options="get_column_names(reorder_sample['factorFileData'].file_name,0)"> | |
379 <validator type="empty_field" message="You should specify at least one factor"></validator> | |
380 </param> | |
381 </when> | |
382 <when value="no"> | |
383 </when> | |
384 </conditional> | |
385 | |
386 <conditional name="filtering_step"> | |
387 <param name="filtering_step_selector" type="select" label="Probes/genes filtering"> | |
388 <option value="no" selected="true">No filtering</option> | |
389 <option value="input">Filter input probes/genes before clustering</option> | |
390 <option value="output">Filter probes/genes after clustering (for tabular output)</option> | |
391 </param> | |
392 <when value="input"> | |
393 <conditional name="select_filtering"> | |
394 <param name="filtering_stepBis_selector" type="select" label="Filter"> | |
395 <option value="diffExpParam" selected="true">Based on differential expression results (FC and p-val)</option> | |
396 <option value="geneList">From list of genes</option> | |
397 </param> | |
398 <when value="diffExpParam"> | |
399 <param type="data" name="differentialAnalysis" format="tabular" label="Differential analysis tabular file (as given by LIMMA diff.exp. tool)" optional="false" multiple="false"> | |
400 </param> | |
401 | |
402 <param name="comparisonsToInclude" type="select" optional="false" multiple="true" label="Select comparisons to use for filtering" refresh_on_change="true" dynamic_options="get_column_names_filteredList(select_filtering['differentialAnalysis'].file_name,[0,1],5)"> | |
403 <validator type="empty_field" message="You should specify one factor"></validator> | |
404 </param> | |
405 | |
406 <param name="FCthreshold" type="float" value="2" label="Fold change threshold for input (both 'threshold' and '1/threshold' values will be used)" help="Minimum value is 1 (ie. all probes/genes are kept)" > | |
407 <validator type="in_range" min="1" exclude_min="false" message="Threshold should be greater than 1"/> | |
408 </param> | |
409 <param name="pvalThreshold" type="float" value="0.05" label="FDR p-val threshold for input" help="When several comparisons are selected a conservative rule is applied (see details below)" > | |
410 <validator type="in_range" min="0" max="1" message="Threshold should be between 0 and 1"/> | |
411 </param> | |
412 </when> | |
413 <when value="geneList"> | |
414 <param type="data" format="tabular" name="geneListFile" label="List of genes to keep" multiple="false" help="Gene names should be the same as written in expression file"/> | |
415 </when> | |
416 </conditional> | |
417 </when> | |
418 | |
419 <when value="output"> | |
420 <conditional name="select_filtering"> | |
421 <param name="filtering_stepBis_selector" type="select" label="Filter"> | |
422 <option value="diffExpParam" selected="true">Based on differential expression results (FC and p-val)</option> | |
423 <option value="geneList">From list of genes</option> | |
424 </param> | |
425 <when value="diffExpParam"> | |
426 <param type="data" name="differentialAnalysis" format="tabular" label="Differential analysis tabular file (as given by LIMMA diff.exp. tool)" optional="false" multiple="false"> | |
427 </param> | |
428 | |
429 <param name="comparisonsToInclude" type="select" optional="false" multiple="true" label="Select comparisons to use for filtering" refresh_on_change="true" dynamic_options="get_column_names_filteredList(select_filtering['differentialAnalysis'].file_name,[0,1],5)"> | |
430 <validator type="empty_field" message="You should specify one factor"></validator> | |
431 </param> | |
432 | |
433 <param name="FCthreshold" type="float" value="2" label="Fold change threshold for output (both 'threshold' and '1/threshold' values will be used)" help="Minimum value is 1 (ie. all probes/genes are kept)" > | |
434 <validator type="in_range" min="1" exclude_min="false" message="Threshold should be greater than 1"/> | |
435 </param> | |
436 <param name="pvalThreshold" type="float" value="0.05" label="FDR p-val threshold for output" help="When several comparisons are selected a conservative rule is applied (see details below)"> | |
437 <validator type="in_range" min="0" max="1" message="Threshold should be between 0 and 1"/> | |
438 </param> | |
439 </when> | |
440 <when value="geneList"> | |
441 <param type="data" format="tabular" name="geneListFile" label="List of genes to keep" multiple="false" help="Gene names should be the same as written in expression file"/> | |
442 </when> | |
443 </conditional> | |
444 </when> | |
445 <when value="no"> | |
446 </when> | |
447 </conditional> | |
448 | |
449 </when> | |
450 | |
451 <when value="foldChange"> | |
452 | |
453 <param type="data" name="differentialAnalysis" format="tabular" label="Differential analysis tabular file (as given by LIMMA diff.exp. tool)" optional="false" multiple="false"> | |
454 </param> | |
455 | |
456 <param name="comparisonsToInclude" type="select" optional="false" multiple="true" label="Select comparisons to cluster" refresh_on_change="true" dynamic_options="get_column_names_filteredList(dataToCluster['differentialAnalysis'].file_name,[0,1],5)"> | |
457 <validator type="empty_field" message="You should specify one factor"></validator> | |
458 </param> | |
459 | |
460 <conditional name="filtering_step"> | |
461 <param name="filtering_step_selector" type="select" label="Probes/genes filtering"> | |
462 <option value="no" selected="true">No filtering</option> | |
463 <option value="input">Filter input probes/genes before clustering</option> | |
464 <option value="output">Filter probes/genes only in tabular output file</option> | |
465 </param> | |
466 <when value="input"> | |
467 <conditional name="select_filtering"> | |
468 <param name="filtering_stepBis_selector" type="select" label="Filter"> | |
469 <option value="diffExpParam" selected="true">Based on differential expression results (FC and p-val)</option> | |
470 <option value="geneList">From list of genes</option> | |
471 </param> | |
472 <when value="diffExpParam"> | |
473 <param name="FCthreshold" type="float" value="2" label="Fold change threshold for input (both 'threshold' and '1/threshold' values will be used)" help="Minimum value is 1 (ie. all probes/genes are kept)" > | |
474 <validator type="in_range" min="1" exclude_min="false" message="FC threshold should be greater than 1"/> | |
475 </param> | |
476 <param name="pvalThreshold" type="float" value="0.05" label="FDR p-val threshold for input" help="When several comparisons are selected a conservative rule is applied (see details below)" > | |
477 <validator type="in_range" min="0" max="1" message="Threshold should be between 0 and 1"/> | |
478 </param> | |
479 </when> | |
480 <when value="geneList"> | |
481 <param type="data" format="tabular" name="geneListFile" label="List of genes to keep" multiple="false" help="Gene names should be the same as written in diff. exp. analysis file"/> | |
482 </when> | |
483 </conditional> | |
484 </when> | |
485 | |
486 <when value="output"> | |
487 <conditional name="select_filtering"> | |
488 <param name="filtering_stepBis_selector" type="select" label="Filter"> | |
489 <option value="diffExpParam" selected="true">Based on diff. exp. parameters (FC and p-val)</option> | |
490 <option value="geneList">From list of genes</option> | |
491 </param> | |
492 <when value="diffExpParam"> | |
493 <param name="FCthreshold" type="float" value="2" label="Fold change threshold for output (both 'threshold' and '1/threshold' values will be used)" help="Minimum value is 1 (ie. all probes/genes are kept)"> | |
494 <validator type="in_range" min="1" exclude_min="false" message="Threshold should be greater than 1"/> | |
495 </param> | |
496 <param name="pvalThreshold" type="float" value="0.05" label="FDR p-val threshold for output" help="When several comparisons are selected a conservative rule is applied (see details below)"> | |
497 <validator type="in_range" min="0" max="1" message="Threshold should be between 0 and 1"/> | |
498 </param> | |
499 </when> | |
500 <when value="geneList"> | |
501 <param type="data" format="tabular" name="geneListFile" label="List of genes to keep" multiple="false" help="Gene names should be the same as written in diff. exp. analysis file"/> | |
502 </when> | |
503 </conditional> | |
504 </when> | |
505 <when value="no"> | |
506 </when> | |
507 </conditional> | |
508 | |
509 </when> | |
510 | |
511 <when value="genericData"> | |
512 | |
513 <param type="data" name="expressionData" format="tabular" label="Generic tabular file" multiple="false"/> | |
514 | |
515 <param name="columnToKeep" type="select" optional="false" multiple="true" label="Select column to cluster" refresh_on_change="true" dynamic_options="get_column_names_filteredList(dataToCluster['expressionData'].file_name,[0])"> | |
516 <validator type="empty_field" message="You should select at least on column"></validator> | |
517 </param> | |
518 | |
519 <conditional name="reorder_sample"> | |
520 <param name="reordering_selector" type="select" label="Reorder columns"> | |
521 <option value="no" selected="true">No reordering</option> | |
522 <option value="factorFile">Reorder comlumns based on a factors file</option> | |
523 </param> | |
524 <when value="factorFile"> | |
525 <param type="data" name="factorFileData" format="tabular" label="Factors file" multiple="false" help="Be sure the conditions clusters number is set to 1 in advanced parameters."/> | |
526 <param name="factorToUse" type="select" optional="false" multiple="true" label="Select factor(s) to use for reordering" refresh_on_change="true" dynamic_options="get_column_names(reorder_sample['factorFileData'].file_name,0)"> | |
527 <validator type="empty_field" message="You should specify at least one factor"></validator> | |
528 </param> | |
529 </when> | |
530 <when value="no"> | |
531 </when> | |
532 </conditional> | |
533 | |
534 <conditional name="filtering_step"> | |
535 <param name="filtering_step_selector" type="select" label="Probes/genes filtering"> | |
536 <option value="no" selected="true">No filtering</option> | |
537 <option value="input">Filter input probes/genes before clustering</option> | |
538 <option value="output">Filter probes/genes after clustering (for tabular output)</option> | |
539 </param> | |
540 <when value="input"> | |
541 <conditional name="select_filtering"> | |
542 <param name="filtering_stepBis_selector" type="select" label="Filter"> | |
543 <option value="diffExpParam" selected="true">Based on tabular file content</option> | |
544 <option value="geneList">From list of genes</option> | |
545 </param> | |
546 <when value="diffExpParam"> | |
547 <param type="data" name="differentialAnalysis" format="tabular" label="Tabular file containing filtering information" optional="false" multiple="false"> | |
548 </param> | |
549 | |
550 <param name="comparisonsToIncludeLow" type="select" optional="true" multiple="true" label="Select columns to consider for low filtering (keeping rows with higher value than a low threshold, ae. FC)" refresh_on_change="true" dynamic_options="get_column_names_filteredList(select_filtering['differentialAnalysis'].file_name,[0])"> | |
551 </param> | |
552 | |
553 <param name="valThresholdLow" type="float" value="0.0" optional="true" label="Low filtering threshold" help="When several comparisons are selected a conservative rule is applied (see details below)"> | |
554 </param> | |
555 | |
556 <param name="comparisonsToIncludeHigh" type="select" optional="true" multiple="true" label="Select columns to consider for high filtering (keeping rows with lower value than a high threshold, ae. p-value)" refresh_on_change="true" dynamic_options="get_column_names_filteredList(select_filtering['differentialAnalysis'].file_name,[0])"> | |
557 </param> | |
558 | |
559 <param name="valThresholdHigh" type="float" value="0.0" optional="true" label="High filtering threshold" help="When several columns are selected a conservative rule is applied (see details below)" > | |
560 </param> | |
561 </when> | |
562 <when value="geneList"> | |
563 <param type="data" format="tabular" name="geneListFile" label="List of genes to keep" multiple="false" help="Gene names should be the same as written in input file"/> | |
564 </when> | |
565 </conditional> | |
566 </when> | |
567 | |
568 <when value="output"> | |
569 <conditional name="select_filtering"> | |
570 <param name="filtering_stepBis_selector" type="select" label="Filter"> | |
571 <option value="diffExpParam" selected="true">Based on tabular file content</option> | |
572 <option value="geneList">From list of genes</option> | |
573 </param> | |
574 <when value="diffExpParam"> | |
575 <param type="data" name="differentialAnalysis" format="tabular" label="Tabular file containing filtering information" optional="false" multiple="false"> | |
576 </param> | |
577 | |
578 <param name="comparisonsToIncludeLow" type="select" optional="true" multiple="true" label="Select columns to consider for low filtering (keeping rows with higher value than a low threshold, ae. FC)" refresh_on_change="true" dynamic_options="get_column_names_filteredList(select_filtering['differentialAnalysis'].file_name,[0])"> | |
579 </param> | |
580 | |
581 <param name="valThresholdLow" type="float" value="0.0" optional="true" label="Low filtering threshold" help="When several comparisons are selected a conservative rule is applied (see details below)"> | |
582 </param> | |
583 | |
584 <param name="comparisonsToIncludeHigh" type="select" optional="true" multiple="true" label="Select columns to consider for high filtering (keeping rows with lower value than a high threshold, ae. p-value)" refresh_on_change="true" dynamic_options="get_column_names_filteredList(select_filtering['differentialAnalysis'].file_name,[0])"> | |
585 </param> | |
586 | |
587 <param name="valThresholdHigh" type="float" value="0.0" optional="true" label="High filtering threshold" help="When several columns are selected a conservative rule is applied (see details below)" > | |
588 </param> | |
589 </when> | |
590 <when value="geneList"> | |
591 <param type="data" format="tabular" name="geneListFile" label="List of genes to keep" multiple="false" help="Gene names should be the same as written in input file"/> | |
592 </when> | |
593 </conditional> | |
594 </when> | |
595 <when value="no"> | |
596 </when> | |
597 </conditional> | |
598 | |
599 </when> | |
600 </conditional> | |
601 | |
602 <section name="advSection" title="Advanced parameters" expanded="false"> | |
603 | |
604 <param name="clusterNumber" type="integer" value="5" label="Requested number of genes clusters" help="Use scree plot to adjust the number of genes clusters"> | |
605 <validator type="in_range" min="2" message="Cluster number should be greater than 1"/> | |
606 </param> | |
607 | |
608 <param name="conditionClusterNumber" type="integer" value="1" label="Requested number of conditions clusters (1 = no clustering)"> | |
609 <validator type="in_range" min="1" message="Cluster number should be greater than 0"/> | |
610 </param> | |
611 | |
612 <param name="dataTransformation" type="select" label="Apply mathematical transformation to data before clustering"> | |
613 <option value="no" selected="true">No</option> | |
614 <option value="log">Natural Logarithm</option> | |
615 <option value="log2">Base 2 Logarithm</option> | |
616 </param> | |
617 | |
618 <param name="distanceMeasure" type="select" label="Distance measure used for clustering" help="See documentation of 'Dist' R package for more information"> | |
619 <option value="euclidean" selected="true">euclidean</option> | |
620 <option value="manhattan">manhattan</option> | |
621 <option value="binary">binary</option> | |
622 <option value="pearson">pearson</option> | |
623 <option value="spearman">spearman</option> | |
624 <option value="kendall">kendall</option> | |
625 </param> | |
626 | |
627 <param name="aggloMethod" type="select" label="Agglomeration method used for clustering" help="See documentation of 'hclust' R method for more information"> | |
628 <option value="complete">complete</option> | |
629 <option value="median">median</option> | |
630 <option value="centroid">centroid</option> | |
631 <option value="average">average</option> | |
632 <option value="single">single</option> | |
633 <option value="mcquitty">mcquitty</option> | |
634 <option value="ward.D">ward1</option> | |
635 <option value="ward.D2" selected="true">ward2</option> | |
636 </param> | |
637 | |
638 <conditional name="conditionBarColor"> | |
639 <param name="conditionBarColor_selector" type="select" label="Add side bar color for samples/comparisons"> | |
640 <option value="no" selected="true">No</option> | |
641 <option value="yes">Yes please</option> | |
642 </param> | |
643 <when value="yes"> | |
644 <param type="data" name="factorFileDataBarPlot" format="tabular" label="Factors file" multiple="false" help="Available only for expression data clustering"/> | |
645 <param name="factorToUse" type="select" optional="false" multiple="false" label="Select factor to use for coloring side bar" refresh_on_change="true" dynamic_options="get_column_names(conditionBarColor['factorFileDataBarPlot'].file_name,0)"> | |
646 <validator type="empty_field" message="You should specify one factor"></validator> | |
647 </param> | |
648 <param name="sideBarPalette" type="select" label="Side bar color palette"> | |
649 <option value="Spectral" selected="true">Spectral</option> | |
650 <option value="Set1">Set1</option> | |
651 <option value="Set2">Set2</option> | |
652 <option value="Set3">Set3</option> | |
653 <option value="RdYlBu">RdYlBu</option> | |
654 <option value="RdYlGn">RdYlGn</option> | |
655 <option value="PiYG">PiYG</option> | |
656 </param> | |
657 </when> | |
658 <when value="no"> | |
659 </when> | |
660 </conditional> | |
661 | |
662 <param name="maxSampleToPlot" type="integer" value="1000" label="Maximum gene number to plot"> | |
663 <validator type="in_range" min="2" message="The number should be greater than 1"/> | |
664 </param> | |
665 | |
666 <conditional name="select_color"> | |
667 <param type="boolean" name="specifyColors" checked="false" label="Personalized heatmap colors"> | |
668 </param> | |
669 <when value="true"> | |
670 <param name="featureMin_color" type="color" label="Min value color" value="#ff00ff"> | |
671 </param> | |
672 | |
673 <param name="featureMedium_color" type="color" label="Medium value color" value="#4455ff"> | |
674 </param> | |
675 | |
676 <param name="featureMax_color" type="color" label="Max value color" value="#00ffff"> | |
677 </param> | |
678 </when> | |
679 <when value="false"> | |
680 </when> | |
681 </conditional> | |
682 | |
683 <param type="select" name="imageFormat" display="radio" label="Output format"> | |
684 <option value="png">PNG format</option> | |
685 <option value="pdf">PDF format</option> | |
686 </param> | |
687 <param type="select" name="imagePlotlyFormat" display="radio" label="Html snapshot format"> | |
688 <option value="png">PNG format</option> | |
689 <option value="svg">SVG format</option> | |
690 </param> | |
691 <param name="scaleSnapshot" type="float" value="1.0" label="Scale html snapshots to increase resolution" help="Minimum value is 1.0 (default resolution)" > | |
692 <validator type="in_range" min="1.0" exclude_min="false" message="Scale should be greater than 1"/> | |
693 </param> | |
694 </section> | |
695 | |
696 </inputs> | |
697 | |
698 | |
699 | |
700 <outputs> | |
701 <data format="tabular" name="outputData" label="${title}_ClusteringResults"/> | |
702 | |
703 <data format="html" name="html_file" label="${title}_HTML.html"/> | |
704 <!-- | |
705 <collection name="outputHeatmap" label="${title}_Heatmap" type="list"> | |
706 <discover_datasets pattern="(?P<designation>Heatmap.*)\.(?P<ext>[^\._]+)?" directory="plotDir" visible="false"/> | |
707 <discover_datasets pattern="(?P<designation>screePlot.*)\.(?P<ext>[^\._]+)?" directory="plotDir" visible="false"/> | |
708 <discover_datasets pattern="(?P<designation>circularPlot.*)\.(?P<ext>[^\._]+)?" directory="plotDir" visible="false"/> | |
709 </collection> | |
710 --> | |
711 <data format="txt" name="log" label="${title}_Log" /> | |
712 </outputs> | |
713 | |
714 | |
715 | |
716 <tests> | |
717 <test maxseconds="7200"> | |
718 <param name="dataToCluster_selector" value="expression" /> | |
719 <param name="expressionData" value="./NormalizedData.tabular" /> | |
720 <param name="filtering_step_selector" value="input" /> | |
721 <param name="filtering_stepBis_selector" value="diffExpParam" /> | |
722 <param name="differentialAnalysis" value="./LIMMAstatistics.tabular" /> | |
723 <param name="comparisonsToInclude" value="WT*WY14643-KO*WY14643" /> | |
724 <param name="FCthreshold" value="1.2" /> | |
725 <param name="pvalThreshold" value="0.05" /> | |
726 <output name="log" file="./HierarchicalClustering/ExpressionClustering.log" lines_diff="6" /> | |
727 </test> | |
728 <test maxseconds="7200"> | |
729 <param name="dataToCluster_selector" value="foldChange" /> | |
730 <param name="differentialAnalysis" value="./LIMMAstatistics.tabular" /> | |
731 <param name="comparisonsToInclude" value="WT*WY14643+KO*WY14643-WT*Control-KO*Control,WT*WY14643+WT*Control-KO*WY14643-KO*Control" /> | |
732 <param name="filtering_step_selector" value="output" /> | |
733 <param name="filtering_stepBis_selector" value="diffExpParam" /> | |
734 <param name="FCthreshold" value="1.2" /> | |
735 <param name="pvalThreshold" value="0.05" /> | |
736 <output name="outputData" file="./HierarchicalClustering/foldChangeClustering.tabular" /> | |
737 <output name="log" file="./HierarchicalClustering/foldChangeClustering.log" lines_diff="6" /> | |
738 </test> | |
739 </tests> | |
740 | |
741 | |
742 | |
743 <help> | |
744 <![CDATA[ | |
745 **What it does** | |
746 | |
747 Run hierarchical clustering on gene expression data or differential expression analysis (from arrays and RNA-seq studies) and diplay correponding heatmap. | |
748 | |
749 ----- | |
750 | |
751 **Parameters** | |
752 | |
753 \- **Title** to personalize output file names (please avoid special characters). | |
754 | |
755 \- **Data to cluster**, genes can be clustered based on : expression data, results from differential analysis tool or any tabular file content. | |
756 | |
757 | |
758 - **Expression data** with samples as columns and genes as rows (header row contains sample names and first column gene identifiers). | |
759 | |
760 :: | |
761 | |
762 Conditions 157_(HuGene-2_0-st).CEL 156_(HuGene-2_0-st).CEL 155_(HuGene-2_0-st).CEL 154_(HuGene-2_0-st).CEL | |
763 DDX11L2 4.500872 4.429759 4.780281 4.996189 | |
764 MIR1302-2 3.415065 3.520472 3.471503 3.567988 | |
765 OR4F5 3.737956 3.011586 3.424494 3.497545 | |
766 VWA1 5.189621 5.129595 4.806793 5.227014 | |
767 | |
768 | |
769 - **Differential expression analysis results** with contrasts statistics (p-val, FDR p-val, FC, log2(FC) and t-statistic) as columns and genes as rows (first and second rows contain comparison definition and first and second columns contain gene identifiers and functional informations). Please respect the GIANT-Differential Expression Analysis tool output format. | |
770 | |
771 :: | |
772 | |
773 LIMMA comparison WT*Treat WT*Treat WT*Treat WT*Treat WT*Treat | |
774 Gene Info p-val FDR.p-val FC log2(FC) t-stat | |
775 ARSD na 0.0057 0.41 0.8389 -0.2534 -5.175 | |
776 TTTY10 na 1.6e-07 0.0074 0.6403 -0.6432 -6.122 | |
777 MIR548AL na 0.072 0.2914 1.711 0.775 10.43 | |
778 | |
779 \- **Comparisons to cluster** when clustering is performed on differential results, log2(FC) values of selected comparisons will be used. | |
780 | |
781 - **Generic tabular data** with samples as columns and genes as rows (header row contains sample names and first column gene identifiers). | |
782 | |
783 :: | |
784 | |
785 Conditions SampleA SampleB SampleC SampleD | |
786 DDX11L2 4.500872 4.429759 4.780281 4.996189 | |
787 MIR1302-2 3.415065 3.520472 3.471503 3.567988 | |
788 OR4F5 3.737956 3.011586 3.424494 3.497545 | |
789 VWA1 5.189621 5.129595 4.806793 5.227014 | |
790 | |
791 \- **Samples to cluster** when clustering is performed on generic data, user have to select the columns to consider in clustering (first column, containing gene identifiers, will be automatically selected). | |
792 | |
793 | |
794 \- **Reorder samples** (only available for expression and generic data clustering). | |
795 | |
796 - **Based on a factors file**, samples will be sorted in an alphabetical/numerical order for the selected factors. Names in the 1st column of the factors file have to match with the columns names of the data to cluster. | |
797 | |
798 :: | |
799 | |
800 Conditions Sex Treatment Reaction | |
801 154_(HuGene-2_0-st).CEL 1 TreatA Pos | |
802 156_(HuGene-2_0-st).CEL 0 NoTreat Pos | |
803 157_(HuGene-2_0-st).CEL 0 TreatB Neg | |
804 155_(HuGene-2_0-st).CEL 0 NoTreat Neg | |
805 | |
806 \- **Genes filtering** can be applied before or after clustering step. | |
807 | |
808 - **Filtering before clustering** allows to restrict clustering to differentially expressed genes using differential analysis results (available for expression data and differential results clustering) or any generic file (available for generic data clustering). As an alternative, a specific gene list file can be directly used for filtering. | |
809 | |
810 - **Filtering after clustering** will have no effect on clustering or generated heatmaps. This filter is only applied to generated tabular files to keep differentially expressed genes (using differential analysis file or any generic file) or specific user defined genes (using gene list file). | |
811 | |
812 \- **Filter approaches** : three filtering strategies can be applied before/after clustering depending on the nature of clustered data. These strategies use : differential analysis results (available for expression data and differential results clustering), generic file content (available for generic data clustering) or a gene list file (available for any input data). | |
813 | |
814 - **From differential analysis results** to filter genes based on fold change and FDR p-val for selected comparisons. | |
815 | |
816 \- **Differential expression results file** is requested only for expression data clustering. For differential results clustering, the same differential results file selected as "data to cluster" is used. (see "Data to cluster section" for requested format) | |
817 | |
818 \- **Comparisons to use** are requested only for expression data clustering. For differential results clustering, the same comparisons selected in "data to cluster" section will be used. If several comparisons are selected, genes that satisfy both fold change and FDR p-val thresholds in at least one of these comparisons are kept. | |
819 | |
820 \- **Fold change threshold** to use for filtering, genes with fold change >= threshold or fold change <= 1/threshold will be kept (set this threshold to 1 if you do not want to filter on fold change). | |
821 | |
822 \- **FDR p-val threshold** to use for filtering, genes with FDR p-val <= threshold will be kept (set this threshold to 1 if you do not want to filter on FDR p-val). | |
823 | |
824 | |
825 - **From generic tabular file** to filter genes based on selected columns values. | |
826 | |
827 \- **Generic tabular file** contains gene in the first column and various informations used for filtering in the following (same format as clustered generic tabular file). | |
828 | |
829 \- **Low filtering columns** used to discard rows with values below a given threshold (typically for Fold Change filtering). If several columns are selected, rows satisfying threshold condition in at least one of these columns are kept. | |
830 | |
831 \- **Low filtering threshold** below which the rows are discarded, the same threshold is applied for all selected columns. | |
832 | |
833 \- **High filtering columns** used to discard rows with values above a given threshold (typically for p-value filtering). If several columns are selected, rows satisfying threshold condition in at least one of these columns are kept. | |
834 | |
835 \- **High filtering threshold** above which the rows are discarded, the same threshold is applied for all selected columns. | |
836 | |
837 - **From list of genes** to focus on pre-identified genes. | |
838 | |
839 \- **Gene list file** with genes identifiers as one column file without header. | |
840 | |
841 :: | |
842 | |
843 DDX11L2 | |
844 VWA1 | |
845 TTTY10 | |
846 ARSD | |
847 | |
848 ----- | |
849 | |
850 **Advanced parameters** | |
851 | |
852 \- **Genes cluster number** used by hierarchical clustering (minimum is 2). See generated screeplot to adjust this number before re-running a clustering. | |
853 | |
854 \- **Samples/comparisons clusters number** used by hierarchical clustering applied on columns/conditions. Set to 1 (ie. no clustering) if you need to conserve input columns order for visualization purposes. Columns clusters information is not included (yet) in output tabular file. | |
855 | |
856 \- **Mathematical transformation** can be applied to clustered data before clustering and visualization. Data used for the filtering step are not modified by this transformation. | |
857 | |
858 \- **Distance measure** used to cluster rows and columns. | |
859 | |
860 \- **Agglomeration method** used to cluster rows and columns. | |
861 | |
862 \- **Add side bar** to vizualize factor values for displayed columns/conditions, represented as a colored side bar in the heatmap. | |
863 | |
864 - **Factor file** that contains factor information for coloring (same format as the factor file used for input data columns reordering). | |
865 | |
866 - **Factor to use** to color side bar depending on its values for displayed columns/conditions. | |
867 | |
868 - **Color palette used** for coloring factor values (see RColorBrewer R package documentation for more information on proposed palettes). | |
869 | |
870 \- **Maximum gene number** : for readability and running time considerations only, number of displayed rows (genes) in heatmaps/circular plot can be limited. Clustering information in generated tabular file and scree plot are computed from a global clustering considering all genes (excepting those filtered out before clustering). Heatmap and circular plot are displayed for a random gene selection, to avoid such random selection we advise you to use input filtering option before clustering to have a gene number below this limit. | |
871 | |
872 \- **Personalized heatmap colors** to build color gradient choosing start, middle and end colors. | |
873 | |
874 \- **Output format** for circular plots only. | |
875 | |
876 \- **Html snapshot format** for interactive plotly plots. | |
877 | |
878 \- **Scale html snapshots** to increase resolution of snapshots taken from interactive plotly plots. | |
879 | |
880 ----- | |
881 | |
882 **Outputs** | |
883 | |
884 \- **Tabular clustering file** containing cluster information for each gene satifying filtering steps. If expression or generic data was clustered, a two columns file is generated with gene identifiers and cluster numbers with possibly additional columns containing informations used for filtering. If differential results was clustered, a similar file is returned with an additional column containing cluster numbers and differential statistics coresponding to comparisons used for filtering. | |
885 | |
886 \- **HTML file** to access interactive version of heatmap and screeplot through PlotLy html pages, circular plot image and tabulated clustering results. As a reminder, when the number of genes to display in heatmap/circular plot exceeds the maximum gene number parameter, a random sampling is performed for plotting efficiency. Thus, clustering displayed on heatmap/circular plot may slighlty differ from clustering information contained in tabular file as heatmap/circular plot clustering is done over a subset of genes whereas tabular file contains clustering results performed on all genes. | |
887 | |
888 \- **LOG file** containing information about execution. Useful especially if tool execution fails. Please attach this log file in any bug report. | |
889 | |
890 ]]> | |
891 </help> | |
892 <citations> | |
893 <citation type="bibtex">@misc{vandel_jimmy_2018_1477870, author = {Vandel, J. and Gheeraert, C. and Eeckhoute, J. and Staels, B. and Lefebvre, P. and Dubois-Chevalier, J.}, title = {GIANT: Galaxy-based Interactive tools for ANalaysis of Transcriptomic data}, month = nov, year = 2018, doi = {10.5281/zenodo.1477870}, url = {https://doi.org/10.5281/zenodo.1477870} | |
894 }</citation> | |
895 | |
896 <citation type="bibtex">@article{, | |
897 author = {Galili, Tal and O'Callaghan, Alan and | |
898 Sidi, Jonathan and Sievert, Carson}, | |
899 title = {heatmaply: an R package for creating interactive cluster | |
900 heatmaps for online publishing}, | |
901 journal = {Bioinformatics}, | |
902 year = {2017}, | |
903 doi = {10.1093/bioinformatics/btx657}, | |
904 url = {http://dx.doi.org/10.1093/bioinformatics/btx657}, | |
905 eprint = | |
906 {https://academic.oup.com/bioinformatics/article-pdf/doi/10.1093/bioinformatics/btx657/21358327/btx657.pdf} | |
907 }</citation> | |
908 | |
909 <citation type="bibtex">@article{doi:10.1093/bioinformatics/btu393, | |
910 author = {Gu, Zuguang and Gu, Lei and Eils, Roland and Schlesner, Matthias and Brors, Benedikt}, | |
911 title = {circlize implements and enhances circular visualization in R }, | |
912 journal = {Bioinformatics}, | |
913 volume = {30}, | |
914 number = {19}, | |
915 pages = {2811-2812}, | |
916 year = {2014}, | |
917 doi = {10.1093/bioinformatics/btu393}, | |
918 URL = {http://dx.doi.org/10.1093/bioinformatics/btu393}, | |
919 eprint = {/oup/backfile/content_public/journal/bioinformatics/30/19/10.1093_bioinformatics_btu393/2/btu393.pdf} | |
920 }</citation> | |
921 | |
922 <citation type="bibtex">@online{plotly, author = {Plotly Technologies Inc.}, title = {Collaborative data science}, publisher = {Plotly Technologies Inc.}, address = {Montreal, QC}, year = {2015}, url = {https://plot.ly} | |
923 }</citation> | |
924 | |
925 | |
926 </citations> | |
927 | |
928 </tool> |