comparison encyclopedia_quantify.xml @ 0:4081e4faa4ab draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/encyclopedia/tools/encyclopedia commit d94002fc79f552c8a64ffca86298396b1568df97"
author galaxyp
date Mon, 14 Sep 2020 17:06:06 +0000
parents
children 1c5cbf8f79ce
comparison
equal deleted inserted replaced
-1:000000000000 0:4081e4faa4ab
1 <tool id="encyclopedia_quantify" name="EncyclopeDIA Quantify" version="@VERSION@.0">
2 <description>samples from Data-Independent Acquisition (DIA) MS/MS Data</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <command detect_errors="aggressive"><![CDATA[
8 @SEARCH2LIB_CMDS@
9 ]]></command>
10 <inputs>
11 <expand macro="scan_inputs"/>
12 <expand macro="lib_input" optional="false" libhelp="Use a Chromatogram elib from SearchToLib"/>
13 <expand macro="fasta_input"/>
14 <expand macro="target_fasta"/>
15 <expand macro="options_section"/>
16 <param argument="-a" type="boolean" truevalue="true" falsevalue="false" checked="true" label="align between files" help="retention-time alignment of peptides should be enabled when quantifying samples"/>
17 <param name="select_outputs" type="select" label="Select outputs" multiple="true">
18 <option value="log" selected="true">log</option>
19 <option value="elib" selected="true">elib</option>
20 <option value="features" selected="false">concatenated_features.txt</option>
21 <option value="results" selected="true">concatenated_results.txt</option>
22 <option value="decoy" selected="false">concatenated_decoy.txt</option>
23 <option value="rt_plots" selected="false">Retention Time Plots</option>
24 <option value="rt_tables" selected="false">Retention Time Tables</option>
25 <option value="peptides" selected="true">peptides.txt (requires align between files)</option>
26 <option value="proteins" selected="true">proteins.txt (requires align between files)</option>
27 </param>
28 </inputs>
29 <outputs>
30 <data name="log" format="txt" label="${tool.name} ${on_string} log" from_work_dir="search2lib.log">
31 <filter>'log' in select_outputs</filter>
32 </data>
33 <data name="elib" format="elib" label="${tool.name} ${on_string} elib" from_work_dir="chromatogram_library.elib">
34 <filter>'elib' in select_outputs</filter>
35 </data>
36 <data name="features" format="tabular" label="${tool.name} ${on_string} concatenated_features.txt" from_work_dir="inputs/chromatogram_library_concatenated_features.txt">
37 <filter>'features' in select_outputs</filter>
38 <actions>
39 <action name="column_names" type="metadata" default="id,TD,ScanNr,topN,rank,peakZScore,peakCalibratedScore,deltaSn,avgIdotp,midIdotp,peakScore,peakWeightedScore,NCI,CIMassErrMean,CIMassErrVar,precursorMassErrMean,precursorMassErrVar,peakSimilarity,sampledTimes,midTime,spectraNorm,pepLength,charge2,charge3,precursorMz,sequence,protein" />
40 </actions>
41 </data>
42 <data name="results" format="tabular" label="${tool.name} ${on_string} concatenated_results.txt" from_work_dir="inputs/chromatogram_library_concatenated_results.txt">
43 <filter>'results' in select_outputs</filter>
44 <actions>
45 <action name="column_names" type="metadata" default="PSMId,score,q-value,posterior_error_prob,peptide,proteinIds" />
46 </actions>
47 </data>
48 <data name="decoy" format="tabular" label="${tool.name} ${on_string} concatenated_decoy.txt" from_work_dir="inputs/chromatogram_library_concatenated_decoy.txt">
49 <filter>'decoy' in select_outputs</filter>
50 <actions>
51 <action name="column_names" type="metadata" default="PSMId,score,q-value,posterior_error_prob,peptide,proteinIds" />
52 </actions>
53 </data>
54 <collection name="rt_plots" type="list" label="${tool.name} - ${on_string}: Retention Time Plots">
55 <filter>library and 'rt_plots' in select_outputs</filter>
56 <discover_datasets pattern="(?P&lt;designation&gt;.+\.pdf)" ext="pdf" directory="inputs"/>
57 </collection>
58 <collection name="rt_tables" type="list" label="${tool.name} - ${on_string}: Retention Time Tables">
59 <filter>library and 'rt_tables' in select_outputs</filter>
60 <discover_datasets pattern="(?P&lt;designation&gt;.+\.mzML\..+\.rt_fit\.txt)" ext="tabular" directory="inputs"/>
61 </collection>
62 <data name="peptides" format="tabular" label="${tool.name} ${on_string} peptides.txt" from_work_dir="chromatogram_library.elib.peptides.txt">
63 <filter>a and 'peptides' in select_outputs</filter>
64 <actions>
65 <action name="column_names" type="metadata" default="Peptide,Protein,numFragments" />
66 </actions>
67 </data>
68 <data name="proteins" format="tabular" label="${tool.name} ${on_string} proteins.txt" from_work_dir="chromatogram_library.elib.proteins.txt">
69 <filter>a and 'proteins' in select_outputs</filter>
70 <actions>
71 <action name="column_names" type="metadata" default="Protein,NumPeptides,PeptideSequences" />
72 </actions>
73 </data>
74 </outputs>
75 <tests>
76 <test>
77 <param name="scan_inputs" ftype="mzml" value="BCS_hela_wide_500_900_1.mzML,BCS_hela_wide_500_900_2.mzML"/>
78 <param name="library" ftype="elib" value="BCS_hela.elib"/>
79 <param name="fasta" ftype="fasta" value="uniprot_human.fasta"/>
80 <output name="results" ftype="tabular">
81 <assert_contents>
82 <has_text text="GIEQAVQSHAVAEEEAR"/>
83 </assert_contents>
84 </output>
85 <output name="peptides" ftype="tabular">
86 <assert_contents>
87 <has_text text="AYPLADAHLTK"/>
88 </assert_contents>
89 </output>
90 </test>
91 </tests>
92 <help><![CDATA[
93
94 **EncyclopeDIA Quantify**
95
96 @ENCYCLOPEDIA_WIKI@
97
98 EncyclopeDIA Quantify retention-time aligns peptides from the chromatogram library and produces quantitation results.
99
100
101 **Inputs**
102
103 - Spectrum files in mzML format
104 - A chromatogram library that can be generated by SearchToLib
105 - A protein data base in fasta format
106
107 @MSCONVERT_HELP@
108
109 **Outputs**
110
111 - A log file
112 - A Chromatogram Library (.elib)
113 - The identified features in tabular format
114 Feature values of scans that are used by percolator to determine matches.
115 - The identified Peptide Spectral Match results in tabular format
116 Columns: PSMId, score, q-value, posterior_error_prob, peptide, proteinIds
117 - The identified peptides in tabular format
118 Per peptide: the normalized intensity for each scan file.
119 Columns: Peptide, Protein, numFragments, intensity_in_file1, intensity_in_file2, ...
120 - The identified proteins in tabular format
121 Per protein: the normalized intensity for each scan file.
122 Columns: Protein, NumPeptides, PeptideSequences, intensity_in_file1, intensity_in_file2, ...
123
124
125 **Typical DIA SearchToLib Workflow**
126
127 Two sets of Mass Spec MS/MS DIA data are collected for the experiment. In addition to collecting wide-window DIA experiments on each quantitative replicate, a pool containing peptides from every condition is measured using several staggered narrow-window DIA experiments.
128
129 1. SearchToLib is first run with the pooled narrow-window mzML files to create a combined DIA elib chromatogram library.
130 If a Spectral library argument is provided, for example from **Prosit**, SearchToLIB uses EncyclopeDIA to search each input spectrum mzML file.
131 Otherwise, SearchToLIB uses Walnut, a FASTA database search engine for DIA data that uses PECAN-style scoring.
132
133
134 * Prosit_ generates a predicted spectrum library of fragmentation patterns and retention times for every +2H and +3H tryptic peptide in a FASTA database, with up to one missed cleavage.
135
136
137 2. EncyclopeDIA Quantify is then run on the wide-window quantitative replicate mzML files using that chromatogram library, with the *align between files* option, to produce quantification results.
138
139 .. image:: SearchToLib_Workflow.png
140 :width: 810
141 :height: 580
142
143 .. _Prosit: https://www.proteomicsdb.org/prosit
144
145 ]]></help>
146 <expand macro="citations" />
147 </tool>