Mercurial > repos > galaxyp > pyprophet_export
comparison pyprophet_export.xml @ 0:2bc6bbf651b9 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyprophet commit a83d231286a8df67483df46e76b4b3a2ef90b251"
author | galaxyp |
---|---|
date | Tue, 25 Feb 2020 18:23:48 -0500 |
parents | |
children | 102d940d365c |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2bc6bbf651b9 |
---|---|
1 <tool id="pyprophet_export" name="PyProphet export" version="@VERSION@.0"> | |
2 <description> | |
3 Export tabular files, optional swath2stats export | |
4 </description> | |
5 <macros> | |
6 <import>macros.xml</import> | |
7 </macros> | |
8 <expand macro="requirements"> | |
9 <requirement type="package" version="1.16.0">bioconductor-swath2stats</requirement> | |
10 <requirement type="package" version="0.8.4">r-dplyr</requirement> | |
11 <requirement type="package" version="1.12.8">r-data.table</requirement> | |
12 <requirement type="package" version="2.3">r-gridextra</requirement> | |
13 </expand> | |
14 <command detect_errors="aggressive"> | |
15 <![CDATA[ | |
16 ln -s '$input' ./input.osw && | |
17 pyprophet export | |
18 --in=./input.osw | |
19 --format=$conditional_output.format | |
20 | |
21 #if $conditional_output.format=='legacy_split': | |
22 $conditional_output.transition_quant | |
23 --max_transition_pep=$conditional_output.max_transition_pep | |
24 --ipf=$conditional_output.ipf | |
25 --ipf_max_peptidoform_pep=$conditional_output.ipf_max_peptidoform_pep | |
26 --max_rs_peakgroup_qvalue=$conditional_output.max_rs_peakgroup_qvalue | |
27 --max_global_peptide_qvalue=$conditional_output.max_global_peptide_qvalue | |
28 --max_global_protein_qvalue=$conditional_output.max_global_protein_qvalue | |
29 | |
30 #elif $conditional_output.format=='legacy_merged': | |
31 $conditional_output.transition_quant | |
32 --max_transition_pep=$conditional_output.max_transition_pep | |
33 --ipf=$conditional_output.ipf | |
34 --ipf_max_peptidoform_pep=$conditional_output.ipf_max_peptidoform_pep | |
35 --max_rs_peakgroup_qvalue=$conditional_output.max_rs_peakgroup_qvalue | |
36 --max_global_peptide_qvalue=$conditional_output.max_global_peptide_qvalue | |
37 --max_global_protein_qvalue=$conditional_output.max_global_protein_qvalue | |
38 | |
39 #elif $conditional_output.format=='matrix': | |
40 --ipf=$conditional_output.ipf | |
41 --ipf_max_peptidoform_pep=$conditional_output.ipf_max_peptidoform_pep | |
42 --max_rs_peakgroup_qvalue=$conditional_output.max_rs_peakgroup_qvalue | |
43 --max_global_peptide_qvalue=$conditional_output.max_global_peptide_qvalue | |
44 --max_global_protein_qvalue=$conditional_output.max_global_protein_qvalue | |
45 #end if | |
46 $peptide_error | |
47 $protein_error | |
48 --out=./output.tsv | |
49 | |
50 #if $conditional_swath2stats.swath2stats=='yes_swath2stats': | |
51 && cat '${swath2stats}' | |
52 && Rscript '${swath2stats}' | |
53 #end if | |
54 | |
55 #if $conditional_output.format=='score_plots': | |
56 && mv *score_plots.pdf '$score_plots' | |
57 #else: | |
58 && mv output.tsv '$export_file' | |
59 #end if | |
60 | |
61 | |
62 ]]> | |
63 </command> | |
64 <configfiles> | |
65 <configfile name="swath2stats"><![CDATA[ | |
66 | |
67 #if $conditional_swath2stats.swath2stats=='yes_swath2stats': | |
68 | |
69 library("SWATH2stats") | |
70 library("data.table") | |
71 library("dplyr") | |
72 library(gridExtra) | |
73 | |
74 ########################### Input ############################################## | |
75 | |
76 ## read in pyprophet export file | |
77 data_me <- data.frame(fread('output.tsv', sep='\t', header=TRUE)) | |
78 | |
79 ## read in study design template | |
80 study_design <- data.frame(fread('$conditional_swath2stats.study_design', sep='\t', header=TRUE)) | |
81 | |
82 ## merge both files on filename column | |
83 data.annotated <- sample_annotation(data_me, study_design, column.file = "filename") | |
84 | |
85 | |
86 ########################### QC plots and tabular files ######################### | |
87 | |
88 ## remove decoys when generating plots | |
89 data.annotated.nodecoy <- subset(data.annotated, decoy==FALSE) | |
90 | |
91 pdf("summary.pdf", fonts = "Times", pointsize = 12) | |
92 plot(0,type='n',axes=FALSE,ann=FALSE) | |
93 title(main="Summarized plots and tables from pyprophet export file") | |
94 | |
95 ## Look at Numbers of peptides and proteins per run | |
96 grid.table(count_analytes(data.annotated.nodecoy), rows= NULL) | |
97 | |
98 ## Correlation of the intensities | |
99 correlation_int <- plot_correlation_between_samples(data.annotated.nodecoy, column.values = 'Intensity') | |
100 | |
101 ## Plot the correlation of the delta_rt, which is the deviation of the retention time from the expected retention time | |
102 correlation_rt <- plot_correlation_between_samples(data.annotated.nodecoy, column.values = 'delta_rt') | |
103 | |
104 ## Plot the variation of the signal across replicates | |
105 variation <- plot_variation(data.annotated.nodecoy) | |
106 plot(0,type='n',axes=FALSE,ann=FALSE) | |
107 grid.table(variation[[2]]) | |
108 | |
109 ## Plot the total variation versus variation within replicates | |
110 variation_total <- plot_variation_vs_total(data.annotated.nodecoy) | |
111 | |
112 ## Calculate the summed signal per peptide and protein across samples | |
113 peptide_signal <- write_matrix_peptides(data.annotated.nodecoy) | |
114 protein_signal <- write_matrix_proteins(data.annotated.nodecoy) | |
115 | |
116 | |
117 #if str($conditional_swath2stats.conditional_fdr_replica.calc_fdr_replica) =="calc_fdr_replica_yes": | |
118 | |
119 ## Estimate the overall FDR across runs using a target decoy strategy | |
120 fdr_target_decoy <- assess_fdr_overall(data.annotated, n.range = $conditional_swath2stats.conditional_fdr_replica.n_range, FFT = $conditional_swath2stats.conditional_fdr_replica.fft, output = 'Rconsole') | |
121 print(fdr_target_decoy) | |
122 dev.off() | |
123 #else | |
124 dev.off() | |
125 #end if | |
126 | |
127 ############################# Filtering ######################################## | |
128 | |
129 data.filtered = data.annotated | |
130 | |
131 #if str($conditional_swath2stats.conditional_fdr_replica.calc_fdr_replica) =="calc_fdr_replica_yes": | |
132 | |
133 ## According to this FDR estimation one can filter the data with a higher mscore threshold to reach an overall protein FDR of 5%. | |
134 ## Check what m-score cut-off is requiered for Protein FDR of 5 % | |
135 cutoff_mscore = mscore4protfdr(data_me, FFT = $conditional_swath2stats.conditional_fdr_replica.fft, fdr_target = $conditional_swath2stats.conditional_fdr_replica.fdr_target) | |
136 print(cutoff_mscore) | |
137 ## Filter data for values that pass the 0.001 mscore criteria in at least two replicates of one condition | |
138 data.filtered <- filter_mscore_condition(data.filtered, cutoff_mscore, n.replica = $conditional_swath2stats.conditional_fdr_replica.n_replica) | |
139 #end if | |
140 | |
141 #if str($conditional_swath2stats.conditional_max_pep.filter_max_pep) == "filter_max_pep_yes": | |
142 ## Select only the 10 peptides showing strongest signal per protein | |
143 data.filtered <- filter_on_max_peptides(data.filtered, n_peptides = $conditional_swath2stats.conditional_max_pep.n_peptides_max) | |
144 #end if | |
145 | |
146 | |
147 #if str($conditional_swath2stats.conditional_min_pep.filter_min_pep) == "filter_min_pep_yes": | |
148 ## Filter for proteins that are supported by at least two peptides | |
149 data.filtered <- filter_on_min_peptides(data.filtered, n_peptides = $conditional_swath2stats.conditional_min_pep.n_peptides_min) | |
150 #end if | |
151 | |
152 ########################### Output ############################################ | |
153 ## Convert the data into a transition-level format (one row per transition measured). | |
154 data.transition <- disaggregate(data.filtered) | |
155 | |
156 ## Convert the data into the format required by MSstats. | |
157 MSstats.input <- convert4MSstats(data.transition) | |
158 | |
159 ### Transitions which were found at different RT / multiple scans are combined by summarizing the Intensities | |
160 Test = MSstats.input %>% group_by(ProteinName, PeptideSequence, PrecursorCharge, FragmentIon, ProductCharge, IsotopeLabelType, BioReplicate, Condition, Run) %>% summarise(Intensity = sum(Intensity)) | |
161 | |
162 Test = Test[, c("ProteinName", "PeptideSequence", "PrecursorCharge", "FragmentIon", "ProductCharge", "IsotopeLabelType", "Intensity", "BioReplicate", "Condition", "Run")] | |
163 | |
164 write.table(Test, file="$msstats_input", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | |
165 write.table(peptide_signal, file="$peptide_signal", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | |
166 write.table(protein_signal, file="$protein_signal", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | |
167 | |
168 #end if | |
169 | |
170 ]]></configfile> | |
171 </configfiles> | |
172 <inputs> | |
173 <param name="input" type="data" format="osw" label="Input file" help="This file needs to be in OSW format (--in)" /> | |
174 <conditional name="conditional_output"> | |
175 <param argument="format" type="select" label="Export format, either matrix, legacy_split, legacy_merged (mProphet/PyProphet) or score_plots format" > | |
176 <option value="legacy_split" selected="True">legaxy_split</option> | |
177 <option value="legacy_merged">legacy_merged</option> | |
178 <option value="matrix">matrix</option> | |
179 <option value="score_plots">score_plots</option> | |
180 </param> | |
181 <when value="legacy_split"> | |
182 | |
183 <param name="transition_quant" type="boolean" truevalue="--transition_quantification" falsevalue="--no-transition_quantification" checked="True" label="Report aggregated transition-level quantification" help="(--transition_quantification / --no-transition_quantification)" /> | |
184 <param argument="max_transition_pep" type="float" value="0.7" label="Maximum PEP to retain scored transitions for quantification (requires transition-level scoring)" /> | |
185 <param argument="ipf" type="select" display="radio" label="Should IPF results be reported if present? 'peptidoform': Report results on peptidoform-level, 'augmented': Augment OpenSWATH results with IPF scores, 'disable': Ignore IPF results" > | |
186 <option value="peptidoform" selected="True" >peptidoform </option> | |
187 <option value="augmented">augmented</option> | |
188 <option value="disable">disable</option> | |
189 </param> | |
190 <param argument="ipf_max_peptidoform_pep" type="float" value="0.4" label="IPF: Filter results to maximum run-specific peptidoform-level PEP" /> | |
191 <param argument="max_rs_peakgroup_qvalue" type="float" value="0.05" label="Filter results to maximum run-specific peak group-level q-value" /> | |
192 <param argument="max_global_peptide_qvalue" type="float" value="0.01" label="Filter results to maximum global peptide-level q-value" /> | |
193 <param argument="max_global_protein_qvalue" type="float" value="0.01" label="ilter results to maximum global protein-level q-value" /> | |
194 </when> | |
195 <when value="legacy_merged"> | |
196 | |
197 <param name="transition_quant" type="boolean" truevalue="--transition_quantification" falsevalue="--no-transition_quantification" checked="True" label="Report aggregated transition-level quantification" help="(--transition_quantification / --no-transition_quantification)" /> | |
198 <param argument="max_transition_pep" type="float" value="0.7" label="Maximum PEP to retain scored transitions for quantification (requires transition-level scoring)" /> | |
199 <param argument="ipf" type="select" display="radio" label="Should IPF results be reported if present? 'peptidoform': Report results on peptidoform-level, 'augmented': Augment OpenSWATH results with IPF scores, 'disable': Ignore IPF results" > | |
200 <option value="peptidoform" selected="True">peptidoform </option> | |
201 <option value="augmented">augmented</option> | |
202 <option value="disable">disable</option> | |
203 </param> | |
204 <param argument="ipf_max_peptidoform_pep" type="float" value="0.4" label="IPF: Filter results to maximum run-specific peptidoform-level PEP" /> | |
205 <param argument="max_rs_peakgroup_qvalue" type="float" value="0.05" label="Filter results to maximum run-specific peak group-level q-value" /> | |
206 <param argument="max_global_peptide_qvalue" type="float" value="0.01" label="Filter results to maximum global peptide-level q-value" /> | |
207 <param argument="max_global_protein_qvalue" type="float" value="0.01" label="ilter results to maximum global protein-level q-value" /> | |
208 </when> | |
209 <when value="matrix"> | |
210 | |
211 <param argument="ipf" type="select" display="radio" label="Should IPF results be reported if present? 'peptidoform': Report results on peptidoform-level, 'augmented': Augment OpenSWATH results with IPF scores, 'disable': Ignore IPF results" > | |
212 <option value="peptidoform" selected="True">peptidoform </option> | |
213 <option value="augmented">augmented</option> | |
214 <option value="disable">disable</option> | |
215 </param> | |
216 <param argument="ipf_max_peptidoform_pep" type="float" value="0.4" label="IPF: Filter results to maximum run-specific peptidoform-level PEP" /> | |
217 <param argument="max_rs_peakgroup_qvalue" type="float" value="0.05" label="Filter results to maximum run-specific peak group-level q-value" /> | |
218 <param argument="max_global_peptide_qvalue" type="float" value="0.01" label="Filter results to maximum global peptide-level q-value" /> | |
219 <param argument="max_global_protein_qvalue" type="float" value="0.01" label="ilter results to maximum global protein-level q-value" /> | |
220 </when> | |
221 <when value="score_plots"/> | |
222 </conditional> | |
223 <param name="peptide_error" type="boolean" truevalue="--peptide" falsevalue="--no-peptide" checked="True" label="Append peptide-level error-rate estimates if available" help="(--peptide / --no-peptide)" /> | |
224 <param name="protein_error" type="boolean" truevalue="--protein" falsevalue="--no-protein" checked="True" label="Append protein-level error-rate estimates if available" help="(--protein / --no-protein)" /> | |
225 <conditional name="conditional_swath2stats"> | |
226 <param name="swath2stats" type="select" label="Use swath2stats to export file for statsics" > | |
227 <option value="yes_swath2stats" selected="True">yes</option> | |
228 <option value="no_swath2stats">no</option> | |
229 </param> | |
230 <when value="yes_swath2stats"> | |
231 <param name="study_design" type="data" format="tabular" label="Study design tabular file" help="Needs to have columns with Filename, Condition, BioReplicate, Run" /> | |
232 <conditional name="conditional_fdr_replica"> | |
233 <param name="calc_fdr_replica" type="select" label="Filter for fdr and number of replicates" > | |
234 <option value="calc_fdr_replica_yes" selected="True">Yes</option> | |
235 <option value="calc_fdr_replica_no">No</option> | |
236 </param> | |
237 <when value="calc_fdr_replica_yes"> | |
238 <param name="fft" type="float" value="0.5" label="FFT. Ratio of false positives to true negatives, q-values from pyProphet stats output" help="As an approximation, the q-values of multiple runs are averaged and supplied as argument FFT. Numeric from 0 to 1."/> | |
239 <param name="n_range" type="float" value="10" label="Option to set the number of magnitude for which the m_score threshold is decreased" /> | |
240 <param name="fdr_target" type="float" value="0.05" label="FDR target." help="An m_score cutoff achieving and FDR smaller fdr_target will be selected. Calculated as FDR = decoys*FFT/targets" /> | |
241 <param name="n_replica" type="integer" value="2" label="Number Replicates." help="Number of measurements within at least one condition that have to pass the mscore threshold for this transition." /> | |
242 </when> | |
243 <when value="calc_fdr_replica_no"/> | |
244 </conditional> | |
245 <conditional name="conditional_max_pep"> | |
246 <param name="filter_max_pep" type="select" label="Filter for a maximum number of peptides per protein" > | |
247 <option value="filter_max_pep_yes" selected="True">Yes</option> | |
248 <option value="filter_max_pep_no">No</option> | |
249 </param> | |
250 <when value="filter_max_pep_yes"> | |
251 <param name="n_peptides_max" type="integer" value="10" label="Maximum number of peptides per protein." help="Maximum number of highest intense peptides to filter the data on." /> | |
252 </when> | |
253 <when value="filter_max_pep_no"/> | |
254 </conditional> | |
255 <conditional name="conditional_min_pep"> | |
256 <param name="filter_min_pep" type="select" label="Filter for a proteins that are supported by a minimum number of peptides" > | |
257 <option value="filter_min_pep_yes" selected="True">Yes</option> | |
258 <option value="filter_min_pep_no">No</option> | |
259 </param> | |
260 <when value="filter_min_pep_yes"> | |
261 <param name="n_peptides_min" type="integer" value="2" label="Minimum number of peptides per protein" help="Number of minimal number of peptide IDs associated with a protein ID in order to be kept in the dataset." /> | |
262 </when> | |
263 <when value="filter_min_pep_no"/> | |
264 </conditional> | |
265 </when> | |
266 <when value="no_swath2stats"/> | |
267 </conditional> | |
268 </inputs> | |
269 <outputs> | |
270 <data name="export_file" format="tabular" label="${tool.name} on ${on_string}: export.tabular" > | |
271 <filter>conditional_output['format'] != 'score_plots'</filter> | |
272 </data> | |
273 <data name="score_plots" format="pdf" label="${tool.name} on ${on_string}: score_plots.pdf" > | |
274 <filter>conditional_output['format'] == 'score_plots'</filter> | |
275 </data> | |
276 <data name="summary" format="pdf" from_work_dir="summary.pdf" label = "${tool.name} on ${on_string}: summary.pdf"> | |
277 <filter>conditional_swath2stats['swath2stats'] == 'yes_swath2stats'</filter> | |
278 </data> | |
279 <data name="peptide_signal" format="tabular" label="${tool.name} on ${on_string}: peptide_signal.tabular" from_work_dir="peptide_signal.tabular" > | |
280 <filter>conditional_swath2stats['swath2stats'] == 'yes_swath2stats'</filter> | |
281 </data> | |
282 <data name="protein_signal" format="tabular" label="${tool.name} on ${on_string}: protein_signal.tabular" from_work_dir="protein_signal.tabular" > | |
283 <filter>conditional_swath2stats['swath2stats'] == 'yes_swath2stats'</filter> | |
284 </data> | |
285 <data name="msstats_input" format="tabular" label="${tool.name} on ${on_string}: msstats_input.tabular" from_work_dir="msstats_input.tabular" > | |
286 <filter>conditional_swath2stats['swath2stats'] == 'yes_swath2stats'</filter> | |
287 </data> | |
288 </outputs> | |
289 <tests> | |
290 <test expect_num_outputs="1"> | |
291 <param name="input" value="protein2.osw" ftype="osw" /> | |
292 <param name="format" value="legacy_merged" /> | |
293 <param name="max_global_peptide_qvalue" value="0.2" /> | |
294 <conditional name="conditional_swath2stats"> | |
295 <param name="swath2stats" value="no_swath2stats"/> | |
296 </conditional> | |
297 <output name="export_file" file="output.tabular" /> | |
298 </test> | |
299 <test expect_num_outputs="1"> | |
300 <param name="input" value="protein2.osw" ftype="osw" /> | |
301 <param name="format" value="score_plots" /> | |
302 <conditional name="conditional_swath2stats"> | |
303 <param name="swath2stats" value="no_swath2stats"/> | |
304 </conditional> | |
305 <output name="score_plots" file="score_plots.pdf" /> | |
306 </test> | |
307 <test expect_failure="true"> | |
308 <param name="input" value="protein2.osw" ftype="osw" /> | |
309 <param name="format" value="legacy_merged" /> | |
310 <conditional name="conditional_swath2stats"> | |
311 <param name="study_design" value="study_design.tabular" ftype="tabular" /> | |
312 <conditional name="conditional_fdr_replica"> | |
313 <param name="calc_fdr_replica" value="calc_fdr_replica_no"/> | |
314 </conditional> | |
315 <conditional name="conditional_max_pep"> | |
316 <param name="filter_max_pep" value="filter_max_pep_no" /> | |
317 </conditional> | |
318 <conditional name="conditional_min_pep"> | |
319 <param name="filter_min_pep" value="filter_min_pep_no" /> | |
320 </conditional> | |
321 </conditional> | |
322 <assert_stderr> | |
323 <has_text text="replacement has 1 row, data has 0" /> | |
324 </assert_stderr> | |
325 </test> | |
326 </tests> | |
327 <help> | |
328 <![CDATA[ | |
329 **What it does** | |
330 | |
331 PyProphet: Semi-supervised learning and scoring of OpenSWATH results. | |
332 | |
333 Export tabular (tsv) tables. | |
334 | |
335 Optional SWATH2stats output. SWATH2stats is intended to transform SWATH data from the OpenSWATH software into a format readable by other statistics packages while performing filtering, annotation and FDR estimation. | |
336 | |
337 **Study desing file for SWATH2stats** | |
338 | |
339 - Tabular file with columns that are named: Filename, Condition, BioReplicate, Run. | |
340 - The Filename should be part or the same as the original filenames used in OpenSWATH workflow | |
341 - The Condition should be a | |
342 - The BioReplicate is corresponds to the biological replicate | |
343 - The Run is the number of the run in which the sample was measured | |
344 | |
345 :: | |
346 | |
347 Filename Condition BioReplicate Run | |
348 healthy1.mzml healthy 1 1 | |
349 healthy2.mzml healthy 2 2 | |
350 diseased1.mzml diseased 3 3 | |
351 ... | |
352 ... | |
353 | |
354 | |
355 PyProphet is a Python re-implementation of the mProphet algorithm (Reiter 2010 Nature Methods) optimized for SWATH-MS data acquired by data-independent acquisition (DIA). The algorithm was originally published in (Telemann 2014 Bioinformatics) and has since been extended to support new data types and analysis modes (Rosenberger 2017, Nature biotechnology and Nature methods). | |
356 | |
357 For more information, visit @link@ | |
358 | |
359 ]]> | |
360 </help> | |
361 <expand macro="citations"> | |
362 <citation type="doi">10.1371/journal.pone.0153160</citation> | |
363 </expand> | |
364 </tool> |