comparison galaxy/wrappers/ArrayNormalization.xml @ 0:708f43bda2b6 draft

"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit cb276a594444c8f32e9819fefde3a21f121d35df"
author vandelj
date Fri, 26 Jun 2020 09:35:11 -0400
parents
children 2762a2622e0d
comparison
equal deleted inserted replaced
-1:000000000000 0:708f43bda2b6
1 <tool name="GIANT-Normalization with APT Summarize" id="giant_aptsummarize" version="0.1.1">
2 <description>Apply Affymetrix Power Tool summarize function to .CEL collection</description>
3 <requirements>
4 <requirement type="package" version="2.10.1">apt-probeset-summarize</requirement>
5 </requirements>
6 <stdio>
7 <regex match="Execution halted"
8 source="both"
9 level="fatal"
10 description="Execution halted, please contact tool developer or administrators." />
11 <exit_code range="10" level="fatal" description="Error in post-treatments, see log file for more information." />
12 <exit_code range="1:9" level="fatal" description="Error during apt command execution, see log file for more information." />
13 </stdio>
14
15
16
17 <command> <![CDATA[
18 ##ONLY FOR LOCAL TEST PURPOSE, COMMENT OTHERWISE
19 ##set $pathToApt='/mnt/galaxy/home/galaxy/Software/ThermoFischer/apt-1.20.6-x86_64-intel-linux/bin'
20
21 mkdir ./apt_output/;
22
23 #if $mainCondition.arraySelection=="other":
24 #if $CDFfile:
25 ##.cdf file should has original name
26 cp $CDFfile ./${CDFfile.name};
27 #end if
28 #end if
29
30
31 ##run APT command
32
33 ##ONLY FOR LOCAL TEST PURPOSE COMMENT IF NOT NECESSARY
34 ##${pathToApt}/apt-probeset-summarize -a $commandLine
35 ##AND UNCOMMENT LINE BELOW
36 apt-probeset-summarize -a $commandLine
37
38 #if $mainCondition.arraySelection=="other":
39 #if $PGFfile and $CLFfile:
40 --use-pgf-names
41 -p $PGFfile
42 -c $CLFfile
43 #end if
44 #if $CDFfile:
45 -d ./${CDFfile.name}
46 #end if
47 #if $MPSfile and $normLevel=="core":
48 -m $MPSfile
49 #end if
50 #if $BGPfile:
51 -b $BGPfile
52 #end if
53
54 #else:
55
56 #if $mainCondition.arrayID.fields.pathPGF!="null" and $mainCondition.arrayID.fields.pathCLF!="null":
57 --use-pgf-names
58 -p $mainCondition.arrayID.fields.pathPGF
59 -c $mainCondition.arrayID.fields.pathCLF
60 #else:
61 #if $mainCondition.arrayID.fields.pathCDF!="null":
62 -d $mainCondition.arrayID.fields.pathCDF
63 #end if
64 #end if
65 #if $normLevel=="core" and $mainCondition.arrayID.fields.pathMPS!="null":
66 -m $mainCondition.arrayID.fields.pathMPS
67 #end if
68 #if $mainCondition.arrayID.fields.pathBGP!="null":
69 -b $mainCondition.arrayID.fields.pathBGP
70 #end if
71 #end if
72
73 -o ./apt_output/
74
75 \$(echo '$inputData' | tr "," " ") >> $log 2>&1;
76
77 ret_code=\$?;
78
79 printf "[INFO]Normalized files: "$inputData >> $log;
80
81 if [ \$ret_code != 0 ]; then
82 exit \$ret_code;
83 else
84
85 ##launch post-treatment scripts
86
87 bash $scriptFormat;
88 #if $annotationCondition.addAnnotation=="true":
89 echo "[INFO]Annotation file used:" >> $log;
90 #if $mainCondition.arraySelection=="other":
91 bash $scriptAnnotation $mainCondition.annotationFile;
92 echo $mainCondition.annotationFile.name >> $log;
93 #else:
94 #if $normLevel=="core":
95 bash $scriptAnnotation $mainCondition.arrayID.fields.pathAnnotTrans;
96 #else:
97 bash $scriptAnnotation $mainCondition.arrayID.fields.pathAnnotProbe;
98 #end if
99 cat $mainCondition.arrayID.fields.versionInfo >> $log;
100 #end if
101 #end if
102 fi;
103 printf "[INFO]End of tool script" >> $log;
104 ]]>
105 </command>
106
107
108
109 <configfiles>
110 <configfile name="scriptFormat">
111 <![CDATA[
112 #for $inputDataset in $inputData
113 echo '${inputDataset.name}' >> ./tempColumnName.txt
114 #end for
115 awk ' BEGIN{firstLine=1;OFS="";ORS=""} ARGIND==1{tab[FNR]=$1;nbCol=FNR} ARGIND==2 && !($1 ~ /\\#/){if(firstLine==0){print "\n"\$0}else{print "Conditions";for(i=1;i<=nbCol;i++){print "\t"tab[i]};firstLine=0}}' ./tempColumnName.txt ./apt_output/*summary.txt > $outputData
116
117 if [ ! -e $outputData ]; then
118 printf "[ERROR]Formating results failed" >> $log;
119 exit 10
120 fi
121 ]]>
122 </configfile>
123
124 <configfile name="scriptAnnotation">
125 <![CDATA[
126 #if $annotationCondition.addAnnotation=="true":
127 #if $annotationCondition.mergingMethod=="none":
128
129 #if $annotationCondition.keepAnnotated=="true":
130 awk 'BEGIN{OFS="\t"} ARGIND==1{dico[\$1]=\$2} ARGIND==2 && FNR==1{print \$0} ARGIND==2 && FNR>1{if(\$1 in dico){\$1=dico[\$1]"_("\$1")";print \$0}}' \$1 $outputData > ./tempData
131 #else:
132 awk 'BEGIN{OFS="\t"} ARGIND==1{dico[\$1]=\$2} ARGIND==2 && FNR==1{print \$0} ARGIND==2 && FNR>1{if(\$1 in dico){\$1=dico[\$1]"_("\$1")"};print \$0}' \$1 $outputData > ./tempData
133 #end if
134
135 #end if
136 #if $annotationCondition.mergingMethod=="mean":
137
138 #if $annotationCondition.keepAnnotated=="true":
139 awk 'BEGIN{OFS="\t"} ARGIND==1{dico[\$1]=\$2} ARGIND==2 && FNR==1{print \$0;nbCol=NF} ARGIND==2 && FNR>1{if(\$1 in dico){cpt[dico[\$1]]++;for(i=2;i<=nbCol;i++)sum[dico[\$1]][i]+=\$i}} END{for(iName in cpt){line=iName;for(iCol=2;iCol<=nbCol;iCol++){line=line"\t"sum[iName][iCol]/cpt[iName]};print line}}' \$1 $outputData > ./tempData
140 #else:
141 awk 'BEGIN{OFS="\t"} ARGIND==1{dico[\$1]=\$2} ARGIND==2 && FNR==1{print \$0;nbCol=NF} ARGIND==2 && FNR>1{if(\$1 in dico){cpt[dico[\$1]]++;for(i=2;i<=nbCol;i++)sum[dico[\$1]][i]+=\$i}else{print \$0}} END{for(iName in cpt){line=iName;for(iCol=2;iCol<=nbCol;iCol++){line=line"\t"sum[iName][iCol]/cpt[iName]};print line}}' \$1 $outputData > ./tempData
142 #end if
143
144 #end if
145
146 #if $annotationCondition.mergingMethod=="higherVar":
147
148 #if $annotationCondition.keepAnnotated=="true":
149 awk 'BEGIN{OFS="\t"} ARGIND==1{dico[\$1]=\$2} ARGIND==2 && FNR==1{print \$0;nbCol=NF} ARGIND==2 && FNR>1{if(\$1 in dico){mean=0;for(i=2;i<=nbCol;i++){mean+=\$i};mean=mean/(nbCol-1);curVar=0;for(i=2;i<=nbCol;i++){curVar+=(mean-\$i)^2};if(curVar>var[dico[\$1]]){var[dico[\$1]]=curVar;for(i=2;i<=nbCol;i++){bestVar[dico[\$1]][i]=\$i}}}} END{for(iName in bestVar){line=iName;for(iCol=2;iCol<=nbCol;iCol++){line=line"\t"bestVar[iName][iCol]};print line}}' \$1 $outputData > ./tempData
150 #else:
151 awk 'BEGIN{OFS="\t"} ARGIND==1{dico[\$1]=\$2} ARGIND==2 && FNR==1{print \$0;nbCol=NF} ARGIND==2 && FNR>1{if(\$1 in dico){mean=0;for(i=2;i<=nbCol;i++){mean+=\$i};mean=mean/(nbCol-1);curVar=0;for(i=2;i<=nbCol;i++){curVar+=(mean-\$i)^2};if(curVar>var[dico[\$1]]){var[dico[\$1]]=curVar;for(i=2;i<=nbCol;i++){bestVar[dico[\$1]][i]=\$i}}}else{print \$0}} END{for(iName in bestVar){line=iName;for(iCol=2;iCol<=nbCol;iCol++){line=line"\t"bestVar[iName][iCol]};print line}}' \$1 $outputData > ./tempData
152 #end if
153
154 #end if
155
156 #if $annotationCondition.mergingMethod=="lowerVar":
157
158 #if $annotationCondition.keepAnnotated=="true":
159 awk 'BEGIN{OFS="\t"} ARGIND==1{dico[\$1]=\$2} ARGIND==2 && FNR==1{print \$0;nbCol=NF} ARGIND==2 && FNR>1{if(\$1 in dico){mean=0;for(i=2;i<=nbCol;i++){mean+=\$i};mean=mean/(nbCol-1);curVar=0;for(i=2;i<=nbCol;i++){curVar+=(mean-\$i)^2};if(var[dico[\$1]]==0 || curVar<var[dico[\$1]]){var[dico[\$1]]=curVar;for(i=2;i<=nbCol;i++){bestVar[dico[\$1]][i]=\$i}}}} END{for(iName in bestVar){line=iName;for(iCol=2;iCol<=nbCol;iCol++){line=line"\t"bestVar[iName][iCol]};print line}}' \$1 $outputData > ./tempData
160 #else:
161 awk 'BEGIN{OFS="\t"} ARGIND==1{dico[\$1]=\$2} ARGIND==2 && FNR==1{print \$0;nbCol=NF} ARGIND==2 && FNR>1{if(\$1 in dico){mean=0;for(i=2;i<=nbCol;i++){mean+=\$i};mean=mean/(nbCol-1);curVar=0;for(i=2;i<=nbCol;i++){curVar+=(mean-\$i)^2};if(var[dico[\$1]]==0 || curVar<var[dico[\$1]]){var[dico[\$1]]=curVar;for(i=2;i<=nbCol;i++){bestVar[dico[\$1]][i]=\$i}}}else{print \$0}} END{for(iName in bestVar){line=iName;for(iCol=2;iCol<=nbCol;iCol++){line=line"\t"bestVar[iName][iCol]};print line}}' \$1 $outputData > ./tempData
162 #end if
163
164 #end if
165
166 if [ -e ./tempData ]; then
167 mv -f ./tempData $outputData
168 else
169 printf "[ERROR]Annotation merge failed" >> $log;
170 exit 10
171 fi
172
173 #end if
174
175 ]]>
176 </configfile>
177 </configfiles>
178
179
180
181 <inputs>
182 <param type="text" name="title" value="APT_toPersonalize" label="Title for output"/>
183
184 <param type="data" name="inputData" format="cel" label=".cel collection file" optional="false" multiple="true">
185 <validator type="empty_dataset" message="At least one data file should be selected"></validator>
186 </param>
187
188 <param type="select" name="commandLine" display="radio" label="Normalization to perform" help="For more details go to APT webpage">
189 <option value="rma-gc-scale" selected="true">gc correction + scale intensity + rma</option>
190 <option value="scale-intensities,rma-bg,quant-norm.sketch=0.usepm=true.bioc=true,pm-only,med-polish">scale intensity + rma</option>
191 <option value="gc-correction,rma-bg,quant-norm.sketch=0.usepm=true.bioc=true,pm-only,med-polish">gc correction + rma</option>
192 <option value="rma">rma</option>
193 </param>
194
195 <param type="select" name="normLevel" display="radio" label="Normalization level" help="'Core genes' option is not available for all arrays">
196 <option value="core">Core genes</option>
197 <option value="probeset">Probe set</option>
198 </param>
199
200 <conditional name="mainCondition">
201
202 <param name="arraySelection" type="select" label="Select GeneChip array configuration files">
203 <option value="common">pre-loaded array files</option>
204 <option value="other">array files selection</option>
205 </param>
206
207 <when value="common">
208 <param name="arrayID" label="Name" type="select">
209 <options from_data_table="aptTool">
210 </options>
211 </param>
212 </when>
213
214 <when value="other">
215
216 <param type="data" format="data" name="PGFfile" label=".pgf indicates which probes are grouped together into a probeset*" optional="true" multiple="false">
217 </param>
218
219 <param type="data" format="data" name="CLFfile" label=".clf indicates where the probes are located in the CEL file*" optional="true" multiple="false">
220 </param>
221
222 <param type="data" format="data" name="CDFfile" label=".cdf contains both .pgf and .clf informations* (requested only if .pgf and .clf files are not available)" optional="true" multiple="false">
223 </param>
224
225 <param type="data" format="data" name="MPSfile" label=".mps defines a probeset as a collection of existing probesets* (requested for normalization at gene level)" optional="true" multiple="false" help="If .mps is not selected/available a 'probeset level' normalization will be automatically performed">
226 </param>
227
228 <param type="data" format="data" name="BGPfile" label=".bgp indicates which probes are to be used for computing background* (requested only if GC driven background correction is applied)" optional="true" multiple="false" help="*Files should be downloaded from corresponding array specific Affymetrix webpage to insure format compatibility">
229 </param>
230
231 <param type="data" name="annotationFile" format="tabular" label="tabular file containing available probesets annotation (requested if annotation options are selected)" optional="true" multiple="false" >
232 </param>
233
234 </when>
235 </conditional>
236
237 <conditional name="annotationCondition">
238 <param type="boolean" name="addAnnotation" checked="false" label="Add gene annotation">
239 </param>
240 <when value="true">
241 <param type="boolean" name="keepAnnotated" checked="false" label="Discard probe set without gene annotation">
242 </param>
243
244 <param type="select" name="mergingMethod" display="radio" label="Merging approach for probe set with same gene annotation">
245 <option value="none">No merging</option>
246 <option value="mean" selected="true">Mean between probes [recommended]</option>
247 <option value="higherVar">Keep probe with higher variance</option>
248 <option value="lowerVar">Keep probe with lower variance</option>
249 </param>
250 </when>
251 <when value="false">
252 </when>
253 </conditional>
254 </inputs>
255
256
257
258 <outputs>
259 <data format="tabular" name="outputData" label="${title}_NormalizedData"/>
260 <data format="txt" name="log" label="${title}_Log" />
261 </outputs>
262
263
264
265 <tests>
266 <test maxseconds="3600">
267 <param name="inputData" value="./CELfiles/GSM205766.CEL,./CELfiles/GSM205767.CEL,./CELfiles/GSM205768.CEL" />
268 <param name="commandLine" value="rma-gc-scale" />
269 <param name="normLevel" value="core" />
270 <param name="arraySelection" value="other" />
271 <param name="CDFfile" value="./Mouse430_2.cdf" />
272 <param name="annotationFile" value="./formatedAnnotation.csv" />
273 <param name="addAnnotation" value="true" />
274 <param name="keepAnnotated" value="false" />
275 <param name="mergingMethod" value="mean" />
276 <output name="outputData" file="./APT-summarize/expressionOutput.csv" />
277 <output name="log" file="./APT-summarize/output.log" lines_diff="2" />
278 </test>
279 </tests>
280
281
282
283
284 <help>
285 <![CDATA[
286 **What it does**
287
288 To normalize expression data from Affymetrix GeneChip arrays through Affymetrix Power Tools software.
289
290 -----
291
292 **Parameters**
293
294 \- **Title** to personalize output file names (please avoid special characters).
295
296
297 \- **.CEL files** you want to normalize (you can select multiple .CEL files or unique collection).
298
299
300 \- **Analysis to perform** : as this tool run the 'apt-probeset-summarize' command line from Affymetrix Power Tool software, this field represents the desired analysis pathway (-a parameter). Analysis pathways are summarized through predifined pathway alias with full description of all run steps below:
301
302 - gc correction + scale intensity + rma: "gc-correction,scale-intensities,rma-bg,quant-norm.sketch=0.usepm=true.bioc=true,pm-only,med-polish"
303
304 - scale intensity + rma: "scale-intensities,rma-bg,quant-norm.sketch=0.usepm=true.bioc=true,pm-only,med-polish"
305
306 - gc correction + rma: "gc-correction,rma-bg,quant-norm.sketch=0.usepm=true.bioc=true,pm-only,med-polish"
307
308 - rma: "rma-bg,quant-norm.sketch=0.usepm=true.bioc=true,pm-only,med-polish"
309
310 Please refer to `APT tool web page`__ to get more details about analysis steps.
311
312 \- **Normalization level** to choose when GeneChip array kind allows such distinction (ie. when a .mps file exists in Affymetrix webpage). 'Probe set level' is the first level where probes for same exon are merged in probe sets. 'Gene level' [recommended] is an upper level where several probe sets are merged in meta probe sets. If a meta probe set can be see as a good single gene expression indicator, several meta probe sets can still share same gene annotation.
313
314 \- **GeneChip array configuration files** according to .CEL files. Configuration files for given arrays can be pre-loaded by administrators of the used Galaxy instance and listed in the aptTool.loc file. In this case you just have to select the array kind according to .CEL files, thus pre-loaded configuration files will be used automatically for normalization. If configuration files were not pre-loaded, you have to choose "array files selection" and then select manually configuration files for the corresponding array. Configuration files (.pgf, .clf, .cdf, .mps, .bgp) are available in library archive files from the Affymetrix webpage of the array (as an example, configuration files for HTA2.0 array are found in the 'Main Analysis Files' in the 'Support file' section of `this page`__). All these files are not mandatory for all normalizations. For basic normalization, both .pgf and.clf files or only .cdf file are mandatory. .mps file is requested for gene level normalization if it is available (see Normalization level parameter). .bgp file is requested for analysis pathway with specific background computation. Annotation file is a simple two column tabular file, with first column containing probe set names and second corresponding annotations, it should not contains any column header. This file can be generated from Affymetrix annotation file available in the same section as array configuration files in Affymetrix webpages, please keep a trace of used annotation version to insure reproducibility.
315
316 Extract of annotation file::
317
318 TC01000020.hg.1 SAMD11
319 TC01000021.hg.1 KLHL17
320 TC01000022.hg.1 PLEKHN1
321 TC01000023.hg.1 ISG15
322 TC01000024.hg.1 AGRN
323
324 \- **Gene annotation** : add gene annotation when available (for pre-loaded arrays, annotation file path should be filled in the aptTool.loc file, for other arrays, annotation file should be selected manually). You can choose to keep or discard in normalized file (meta) probe sets without match in annotation file and select merging policy for probe sets sharing same gene annotation.
325
326 -----
327
328 **Advanced parameters**
329
330 \- **Additional parameter** to give to 'apt-probeset-summarize' command line except -a, -o, -d, -p, -c, -m, -b parameters already filled by previous fields.
331
332 -----
333
334 **Outputs**
335
336 \- **tabular file** containing log2 transformed normalized data as tab delimited matrix. First row contains .CEL names and first column contains (meta) probe set names and/or annotated gene names::
337
338 Conditions 157_(HuGene-2_0-st).CEL 156_(HuGene-2_0-st).CEL 155_(HuGene-2_0-st).CEL 154_(HuGene-2_0-st).CEL
339 DDX11L2 4.500872 4.429759 4.780281 4.996189
340 MIR1302-2 3.415065 3.520472 3.471503 3.567988
341 OR4F5 3.737956 3.011586 3.424494 3.497545
342 VWA1 5.189621 5.129595 4.806793 5.227014
343
344 \- **LOG file** containing information about execution. Useful especially if tool execution fais. Please attach this log file in any bug report.
345
346 .. __: http://media.affymetrix.com/support/developer/powertools/changelog/apt-probeset-summarize.html#intro
347
348 .. __: https://www.thermofisher.com/order/catalog/product/902233#/902233
349
350 ]]>
351 </help>
352
353
354 <citations>
355 <citation type="bibtex">@misc{vandel_jimmy_2018_1477870, author = {Vandel, J. and Gheeraert, C. and Eeckhoute, J. and Staels, B. and Lefebvre, P. and Dubois-Chevalier, J.}, title = {GIANT: Galaxy-based Interactive tools for ANalaysis of Transcriptomic data}, month = nov, year = 2018, doi = {10.5281/zenodo.1477870}, url = {https://doi.org/10.5281/zenodo.1477870}
356 }</citation>
357
358 <citation type="bibtex">@online{apt, author = {Thermo Fisher Scientific}, title = {Affymetrix Power Tools}, publisher = {Life Technologies Corp.}, address = {USA, MA}, version = {2.10.2}, year = {2018}, url = {https://www.affymetrix.com/support/developer/powertools/changelog/index.html}
359 }</citation>
360 </citations>
361 </tool>