comparison pyprophet_export.xml @ 0:2bc6bbf651b9 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyprophet commit a83d231286a8df67483df46e76b4b3a2ef90b251"
author galaxyp
date Tue, 25 Feb 2020 18:23:48 -0500
parents
children 102d940d365c
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc6bbf651b9
1 <tool id="pyprophet_export" name="PyProphet export" version="@VERSION@.0">
2 <description>
3 Export tabular files, optional swath2stats export
4 </description>
5 <macros>
6 <import>macros.xml</import>
7 </macros>
8 <expand macro="requirements">
9 <requirement type="package" version="1.16.0">bioconductor-swath2stats</requirement>
10 <requirement type="package" version="0.8.4">r-dplyr</requirement>
11 <requirement type="package" version="1.12.8">r-data.table</requirement>
12 <requirement type="package" version="2.3">r-gridextra</requirement>
13 </expand>
14 <command detect_errors="aggressive">
15 <![CDATA[
16 ln -s '$input' ./input.osw &&
17 pyprophet export
18 --in=./input.osw
19 --format=$conditional_output.format
20
21 #if $conditional_output.format=='legacy_split':
22 $conditional_output.transition_quant
23 --max_transition_pep=$conditional_output.max_transition_pep
24 --ipf=$conditional_output.ipf
25 --ipf_max_peptidoform_pep=$conditional_output.ipf_max_peptidoform_pep
26 --max_rs_peakgroup_qvalue=$conditional_output.max_rs_peakgroup_qvalue
27 --max_global_peptide_qvalue=$conditional_output.max_global_peptide_qvalue
28 --max_global_protein_qvalue=$conditional_output.max_global_protein_qvalue
29
30 #elif $conditional_output.format=='legacy_merged':
31 $conditional_output.transition_quant
32 --max_transition_pep=$conditional_output.max_transition_pep
33 --ipf=$conditional_output.ipf
34 --ipf_max_peptidoform_pep=$conditional_output.ipf_max_peptidoform_pep
35 --max_rs_peakgroup_qvalue=$conditional_output.max_rs_peakgroup_qvalue
36 --max_global_peptide_qvalue=$conditional_output.max_global_peptide_qvalue
37 --max_global_protein_qvalue=$conditional_output.max_global_protein_qvalue
38
39 #elif $conditional_output.format=='matrix':
40 --ipf=$conditional_output.ipf
41 --ipf_max_peptidoform_pep=$conditional_output.ipf_max_peptidoform_pep
42 --max_rs_peakgroup_qvalue=$conditional_output.max_rs_peakgroup_qvalue
43 --max_global_peptide_qvalue=$conditional_output.max_global_peptide_qvalue
44 --max_global_protein_qvalue=$conditional_output.max_global_protein_qvalue
45 #end if
46 $peptide_error
47 $protein_error
48 --out=./output.tsv
49
50 #if $conditional_swath2stats.swath2stats=='yes_swath2stats':
51 && cat '${swath2stats}'
52 && Rscript '${swath2stats}'
53 #end if
54
55 #if $conditional_output.format=='score_plots':
56 && mv *score_plots.pdf '$score_plots'
57 #else:
58 && mv output.tsv '$export_file'
59 #end if
60
61
62 ]]>
63 </command>
64 <configfiles>
65 <configfile name="swath2stats"><![CDATA[
66
67 #if $conditional_swath2stats.swath2stats=='yes_swath2stats':
68
69 library("SWATH2stats")
70 library("data.table")
71 library("dplyr")
72 library(gridExtra)
73
74 ########################### Input ##############################################
75
76 ## read in pyprophet export file
77 data_me <- data.frame(fread('output.tsv', sep='\t', header=TRUE))
78
79 ## read in study design template
80 study_design <- data.frame(fread('$conditional_swath2stats.study_design', sep='\t', header=TRUE))
81
82 ## merge both files on filename column
83 data.annotated <- sample_annotation(data_me, study_design, column.file = "filename")
84
85
86 ########################### QC plots and tabular files #########################
87
88 ## remove decoys when generating plots
89 data.annotated.nodecoy <- subset(data.annotated, decoy==FALSE)
90
91 pdf("summary.pdf", fonts = "Times", pointsize = 12)
92 plot(0,type='n',axes=FALSE,ann=FALSE)
93 title(main="Summarized plots and tables from pyprophet export file")
94
95 ## Look at Numbers of peptides and proteins per run
96 grid.table(count_analytes(data.annotated.nodecoy), rows= NULL)
97
98 ## Correlation of the intensities
99 correlation_int <- plot_correlation_between_samples(data.annotated.nodecoy, column.values = 'Intensity')
100
101 ## Plot the correlation of the delta_rt, which is the deviation of the retention time from the expected retention time
102 correlation_rt <- plot_correlation_between_samples(data.annotated.nodecoy, column.values = 'delta_rt')
103
104 ## Plot the variation of the signal across replicates
105 variation <- plot_variation(data.annotated.nodecoy)
106 plot(0,type='n',axes=FALSE,ann=FALSE)
107 grid.table(variation[[2]])
108
109 ## Plot the total variation versus variation within replicates
110 variation_total <- plot_variation_vs_total(data.annotated.nodecoy)
111
112 ## Calculate the summed signal per peptide and protein across samples
113 peptide_signal <- write_matrix_peptides(data.annotated.nodecoy)
114 protein_signal <- write_matrix_proteins(data.annotated.nodecoy)
115
116
117 #if str($conditional_swath2stats.conditional_fdr_replica.calc_fdr_replica) =="calc_fdr_replica_yes":
118
119 ## Estimate the overall FDR across runs using a target decoy strategy
120 fdr_target_decoy <- assess_fdr_overall(data.annotated, n.range = $conditional_swath2stats.conditional_fdr_replica.n_range, FFT = $conditional_swath2stats.conditional_fdr_replica.fft, output = 'Rconsole')
121 print(fdr_target_decoy)
122 dev.off()
123 #else
124 dev.off()
125 #end if
126
127 ############################# Filtering ########################################
128
129 data.filtered = data.annotated
130
131 #if str($conditional_swath2stats.conditional_fdr_replica.calc_fdr_replica) =="calc_fdr_replica_yes":
132
133 ## According to this FDR estimation one can filter the data with a higher mscore threshold to reach an overall protein FDR of 5%.
134 ## Check what m-score cut-off is requiered for Protein FDR of 5 %
135 cutoff_mscore = mscore4protfdr(data_me, FFT = $conditional_swath2stats.conditional_fdr_replica.fft, fdr_target = $conditional_swath2stats.conditional_fdr_replica.fdr_target)
136 print(cutoff_mscore)
137 ## Filter data for values that pass the 0.001 mscore criteria in at least two replicates of one condition
138 data.filtered <- filter_mscore_condition(data.filtered, cutoff_mscore, n.replica = $conditional_swath2stats.conditional_fdr_replica.n_replica)
139 #end if
140
141 #if str($conditional_swath2stats.conditional_max_pep.filter_max_pep) == "filter_max_pep_yes":
142 ## Select only the 10 peptides showing strongest signal per protein
143 data.filtered <- filter_on_max_peptides(data.filtered, n_peptides = $conditional_swath2stats.conditional_max_pep.n_peptides_max)
144 #end if
145
146
147 #if str($conditional_swath2stats.conditional_min_pep.filter_min_pep) == "filter_min_pep_yes":
148 ## Filter for proteins that are supported by at least two peptides
149 data.filtered <- filter_on_min_peptides(data.filtered, n_peptides = $conditional_swath2stats.conditional_min_pep.n_peptides_min)
150 #end if
151
152 ########################### Output ############################################
153 ## Convert the data into a transition-level format (one row per transition measured).
154 data.transition <- disaggregate(data.filtered)
155
156 ## Convert the data into the format required by MSstats.
157 MSstats.input <- convert4MSstats(data.transition)
158
159 ### Transitions which were found at different RT / multiple scans are combined by summarizing the Intensities
160 Test = MSstats.input %>% group_by(ProteinName, PeptideSequence, PrecursorCharge, FragmentIon, ProductCharge, IsotopeLabelType, BioReplicate, Condition, Run) %>% summarise(Intensity = sum(Intensity))
161
162 Test = Test[, c("ProteinName", "PeptideSequence", "PrecursorCharge", "FragmentIon", "ProductCharge", "IsotopeLabelType", "Intensity", "BioReplicate", "Condition", "Run")]
163
164 write.table(Test, file="$msstats_input", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
165 write.table(peptide_signal, file="$peptide_signal", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
166 write.table(protein_signal, file="$protein_signal", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
167
168 #end if
169
170 ]]></configfile>
171 </configfiles>
172 <inputs>
173 <param name="input" type="data" format="osw" label="Input file" help="This file needs to be in OSW format (--in)" />
174 <conditional name="conditional_output">
175 <param argument="format" type="select" label="Export format, either matrix, legacy_split, legacy_merged (mProphet/PyProphet) or score_plots format" >
176 <option value="legacy_split" selected="True">legaxy_split</option>
177 <option value="legacy_merged">legacy_merged</option>
178 <option value="matrix">matrix</option>
179 <option value="score_plots">score_plots</option>
180 </param>
181 <when value="legacy_split">
182
183 <param name="transition_quant" type="boolean" truevalue="--transition_quantification" falsevalue="--no-transition_quantification" checked="True" label="Report aggregated transition-level quantification" help="(--transition_quantification / --no-transition_quantification)" />
184 <param argument="max_transition_pep" type="float" value="0.7" label="Maximum PEP to retain scored transitions for quantification (requires transition-level scoring)" />
185 <param argument="ipf" type="select" display="radio" label="Should IPF results be reported if present? 'peptidoform': Report results on peptidoform-level, 'augmented': Augment OpenSWATH results with IPF scores, 'disable': Ignore IPF results" >
186 <option value="peptidoform" selected="True" >peptidoform </option>
187 <option value="augmented">augmented</option>
188 <option value="disable">disable</option>
189 </param>
190 <param argument="ipf_max_peptidoform_pep" type="float" value="0.4" label="IPF: Filter results to maximum run-specific peptidoform-level PEP" />
191 <param argument="max_rs_peakgroup_qvalue" type="float" value="0.05" label="Filter results to maximum run-specific peak group-level q-value" />
192 <param argument="max_global_peptide_qvalue" type="float" value="0.01" label="Filter results to maximum global peptide-level q-value" />
193 <param argument="max_global_protein_qvalue" type="float" value="0.01" label="ilter results to maximum global protein-level q-value" />
194 </when>
195 <when value="legacy_merged">
196
197 <param name="transition_quant" type="boolean" truevalue="--transition_quantification" falsevalue="--no-transition_quantification" checked="True" label="Report aggregated transition-level quantification" help="(--transition_quantification / --no-transition_quantification)" />
198 <param argument="max_transition_pep" type="float" value="0.7" label="Maximum PEP to retain scored transitions for quantification (requires transition-level scoring)" />
199 <param argument="ipf" type="select" display="radio" label="Should IPF results be reported if present? 'peptidoform': Report results on peptidoform-level, 'augmented': Augment OpenSWATH results with IPF scores, 'disable': Ignore IPF results" >
200 <option value="peptidoform" selected="True">peptidoform </option>
201 <option value="augmented">augmented</option>
202 <option value="disable">disable</option>
203 </param>
204 <param argument="ipf_max_peptidoform_pep" type="float" value="0.4" label="IPF: Filter results to maximum run-specific peptidoform-level PEP" />
205 <param argument="max_rs_peakgroup_qvalue" type="float" value="0.05" label="Filter results to maximum run-specific peak group-level q-value" />
206 <param argument="max_global_peptide_qvalue" type="float" value="0.01" label="Filter results to maximum global peptide-level q-value" />
207 <param argument="max_global_protein_qvalue" type="float" value="0.01" label="ilter results to maximum global protein-level q-value" />
208 </when>
209 <when value="matrix">
210
211 <param argument="ipf" type="select" display="radio" label="Should IPF results be reported if present? 'peptidoform': Report results on peptidoform-level, 'augmented': Augment OpenSWATH results with IPF scores, 'disable': Ignore IPF results" >
212 <option value="peptidoform" selected="True">peptidoform </option>
213 <option value="augmented">augmented</option>
214 <option value="disable">disable</option>
215 </param>
216 <param argument="ipf_max_peptidoform_pep" type="float" value="0.4" label="IPF: Filter results to maximum run-specific peptidoform-level PEP" />
217 <param argument="max_rs_peakgroup_qvalue" type="float" value="0.05" label="Filter results to maximum run-specific peak group-level q-value" />
218 <param argument="max_global_peptide_qvalue" type="float" value="0.01" label="Filter results to maximum global peptide-level q-value" />
219 <param argument="max_global_protein_qvalue" type="float" value="0.01" label="ilter results to maximum global protein-level q-value" />
220 </when>
221 <when value="score_plots"/>
222 </conditional>
223 <param name="peptide_error" type="boolean" truevalue="--peptide" falsevalue="--no-peptide" checked="True" label="Append peptide-level error-rate estimates if available" help="(--peptide / --no-peptide)" />
224 <param name="protein_error" type="boolean" truevalue="--protein" falsevalue="--no-protein" checked="True" label="Append protein-level error-rate estimates if available" help="(--protein / --no-protein)" />
225 <conditional name="conditional_swath2stats">
226 <param name="swath2stats" type="select" label="Use swath2stats to export file for statsics" >
227 <option value="yes_swath2stats" selected="True">yes</option>
228 <option value="no_swath2stats">no</option>
229 </param>
230 <when value="yes_swath2stats">
231 <param name="study_design" type="data" format="tabular" label="Study design tabular file" help="Needs to have columns with Filename, Condition, BioReplicate, Run" />
232 <conditional name="conditional_fdr_replica">
233 <param name="calc_fdr_replica" type="select" label="Filter for fdr and number of replicates" >
234 <option value="calc_fdr_replica_yes" selected="True">Yes</option>
235 <option value="calc_fdr_replica_no">No</option>
236 </param>
237 <when value="calc_fdr_replica_yes">
238 <param name="fft" type="float" value="0.5" label="FFT. Ratio of false positives to true negatives, q-values from pyProphet stats output" help="As an approximation, the q-values of multiple runs are averaged and supplied as argument FFT. Numeric from 0 to 1."/>
239 <param name="n_range" type="float" value="10" label="Option to set the number of magnitude for which the m_score threshold is decreased" />
240 <param name="fdr_target" type="float" value="0.05" label="FDR target." help="An m_score cutoff achieving and FDR smaller fdr_target will be selected. Calculated as FDR = decoys*FFT/targets" />
241 <param name="n_replica" type="integer" value="2" label="Number Replicates." help="Number of measurements within at least one condition that have to pass the mscore threshold for this transition." />
242 </when>
243 <when value="calc_fdr_replica_no"/>
244 </conditional>
245 <conditional name="conditional_max_pep">
246 <param name="filter_max_pep" type="select" label="Filter for a maximum number of peptides per protein" >
247 <option value="filter_max_pep_yes" selected="True">Yes</option>
248 <option value="filter_max_pep_no">No</option>
249 </param>
250 <when value="filter_max_pep_yes">
251 <param name="n_peptides_max" type="integer" value="10" label="Maximum number of peptides per protein." help="Maximum number of highest intense peptides to filter the data on." />
252 </when>
253 <when value="filter_max_pep_no"/>
254 </conditional>
255 <conditional name="conditional_min_pep">
256 <param name="filter_min_pep" type="select" label="Filter for a proteins that are supported by a minimum number of peptides" >
257 <option value="filter_min_pep_yes" selected="True">Yes</option>
258 <option value="filter_min_pep_no">No</option>
259 </param>
260 <when value="filter_min_pep_yes">
261 <param name="n_peptides_min" type="integer" value="2" label="Minimum number of peptides per protein" help="Number of minimal number of peptide IDs associated with a protein ID in order to be kept in the dataset." />
262 </when>
263 <when value="filter_min_pep_no"/>
264 </conditional>
265 </when>
266 <when value="no_swath2stats"/>
267 </conditional>
268 </inputs>
269 <outputs>
270 <data name="export_file" format="tabular" label="${tool.name} on ${on_string}: export.tabular" >
271 <filter>conditional_output['format'] != 'score_plots'</filter>
272 </data>
273 <data name="score_plots" format="pdf" label="${tool.name} on ${on_string}: score_plots.pdf" >
274 <filter>conditional_output['format'] == 'score_plots'</filter>
275 </data>
276 <data name="summary" format="pdf" from_work_dir="summary.pdf" label = "${tool.name} on ${on_string}: summary.pdf">
277 <filter>conditional_swath2stats['swath2stats'] == 'yes_swath2stats'</filter>
278 </data>
279 <data name="peptide_signal" format="tabular" label="${tool.name} on ${on_string}: peptide_signal.tabular" from_work_dir="peptide_signal.tabular" >
280 <filter>conditional_swath2stats['swath2stats'] == 'yes_swath2stats'</filter>
281 </data>
282 <data name="protein_signal" format="tabular" label="${tool.name} on ${on_string}: protein_signal.tabular" from_work_dir="protein_signal.tabular" >
283 <filter>conditional_swath2stats['swath2stats'] == 'yes_swath2stats'</filter>
284 </data>
285 <data name="msstats_input" format="tabular" label="${tool.name} on ${on_string}: msstats_input.tabular" from_work_dir="msstats_input.tabular" >
286 <filter>conditional_swath2stats['swath2stats'] == 'yes_swath2stats'</filter>
287 </data>
288 </outputs>
289 <tests>
290 <test expect_num_outputs="1">
291 <param name="input" value="protein2.osw" ftype="osw" />
292 <param name="format" value="legacy_merged" />
293 <param name="max_global_peptide_qvalue" value="0.2" />
294 <conditional name="conditional_swath2stats">
295 <param name="swath2stats" value="no_swath2stats"/>
296 </conditional>
297 <output name="export_file" file="output.tabular" />
298 </test>
299 <test expect_num_outputs="1">
300 <param name="input" value="protein2.osw" ftype="osw" />
301 <param name="format" value="score_plots" />
302 <conditional name="conditional_swath2stats">
303 <param name="swath2stats" value="no_swath2stats"/>
304 </conditional>
305 <output name="score_plots" file="score_plots.pdf" />
306 </test>
307 <test expect_failure="true">
308 <param name="input" value="protein2.osw" ftype="osw" />
309 <param name="format" value="legacy_merged" />
310 <conditional name="conditional_swath2stats">
311 <param name="study_design" value="study_design.tabular" ftype="tabular" />
312 <conditional name="conditional_fdr_replica">
313 <param name="calc_fdr_replica" value="calc_fdr_replica_no"/>
314 </conditional>
315 <conditional name="conditional_max_pep">
316 <param name="filter_max_pep" value="filter_max_pep_no" />
317 </conditional>
318 <conditional name="conditional_min_pep">
319 <param name="filter_min_pep" value="filter_min_pep_no" />
320 </conditional>
321 </conditional>
322 <assert_stderr>
323 <has_text text="replacement has 1 row, data has 0" />
324 </assert_stderr>
325 </test>
326 </tests>
327 <help>
328 <![CDATA[
329 **What it does**
330
331 PyProphet: Semi-supervised learning and scoring of OpenSWATH results.
332
333 Export tabular (tsv) tables.
334
335 Optional SWATH2stats output. SWATH2stats is intended to transform SWATH data from the OpenSWATH software into a format readable by other statistics packages while performing filtering, annotation and FDR estimation.
336
337 **Study desing file for SWATH2stats**
338
339 - Tabular file with columns that are named: Filename, Condition, BioReplicate, Run.
340 - The Filename should be part or the same as the original filenames used in OpenSWATH workflow
341 - The Condition should be a
342 - The BioReplicate is corresponds to the biological replicate
343 - The Run is the number of the run in which the sample was measured
344
345 ::
346
347 Filename Condition BioReplicate Run
348 healthy1.mzml healthy 1 1
349 healthy2.mzml healthy 2 2
350 diseased1.mzml diseased 3 3
351 ...
352 ...
353
354
355 PyProphet is a Python re-implementation of the mProphet algorithm (Reiter 2010 Nature Methods) optimized for SWATH-MS data acquired by data-independent acquisition (DIA). The algorithm was originally published in (Telemann 2014 Bioinformatics) and has since been extended to support new data types and analysis modes (Rosenberger 2017, Nature biotechnology and Nature methods).
356
357 For more information, visit @link@
358
359 ]]>
360 </help>
361 <expand macro="citations">
362 <citation type="doi">10.1371/journal.pone.0153160</citation>
363 </expand>
364 </tool>