Mercurial > repos > galaxyp > encyclopedia_searchtolib
comparison encyclopedia_searchtolib.xml @ 0:62a718b76f62 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/encyclopedia/tools/encyclopedia commit d94002fc79f552c8a64ffca86298396b1568df97"
author | galaxyp |
---|---|
date | Mon, 14 Sep 2020 17:06:51 +0000 |
parents | |
children | 36880dfd9fa7 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:62a718b76f62 |
---|---|
1 <tool id="encyclopedia_searchtolib" name="SearchToLib" version="@VERSION@.0"> | |
2 <description>Build a Chromatogram Library from Data-Independent Acquisition (DIA) MS/MS Data</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements" /> | |
7 <command detect_errors="aggressive"><![CDATA[ | |
8 @SEARCH2LIB_CMDS@ | |
9 ]]></command> | |
10 <inputs> | |
11 <expand macro="scan_inputs"/> | |
12 <expand macro="lib_input" optional="true" libhelp="Use a Prosit dlib spectral library to make a chromatogram elib using EncyclopeDIA, or else leave blank to make a Chromatogram library from just the fasta using Walnut"/> | |
13 <expand macro="fasta_input"/> | |
14 <expand macro="target_fasta"/> | |
15 <expand macro="options_section"/> | |
16 <param argument="-a" type="boolean" truevalue="true" falsevalue="false" checked="false" label="align between files" help="retention-time alignment of peptides is generally not needed when when building a library from narrow-window spectrums"/> | |
17 <param name="select_outputs" type="select" label="Select outputs" multiple="true"> | |
18 <option value="log" selected="true">log</option> | |
19 <option value="elib" selected="true">elib</option> | |
20 <option value="features" selected="false">concatenated_features.txt</option> | |
21 <option value="results" selected="false">concatenated_results.txt</option> | |
22 <option value="decoy" selected="false">concatenated_decoy.txt</option> | |
23 <option value="rt_plots" selected="false">Retention Time Plots (requires library)</option> | |
24 <option value="rt_tables" selected="false">Retention Time Tables (requires library)</option> | |
25 <option value="peptides" selected="false">peptides.txt (requires align between files)</option> | |
26 <option value="proteins" selected="false">proteins.txt (requires align between files)</option> | |
27 </param> | |
28 </inputs> | |
29 <outputs> | |
30 <data name="log" format="txt" label="${tool.name} ${on_string} log" from_work_dir="search2lib.log"> | |
31 <filter>'log' in select_outputs</filter> | |
32 </data> | |
33 <data name="elib" format="elib" label="${tool.name} ${on_string} elib" from_work_dir="chromatogram_library.elib"> | |
34 <filter>'elib' in select_outputs</filter> | |
35 </data> | |
36 <data name="features" format="tabular" label="${tool.name} ${on_string} concatenated_features.txt" from_work_dir="inputs/chromatogram_library_concatenated_features.txt"> | |
37 <filter>'features' in select_outputs</filter> | |
38 <actions> | |
39 <action name="column_names" type="metadata" default="id,TD,ScanNr,topN,rank,peakZScore,peakCalibratedScore,deltaSn,avgIdotp,midIdotp,peakScore,peakWeightedScore,NCI,CIMassErrMean,CIMassErrVar,precursorMassErrMean,precursorMassErrVar,peakSimilarity,sampledTimes,midTime,spectraNorm,pepLength,charge2,charge3,precursorMz,sequence,protein" /> | |
40 </actions> | |
41 </data> | |
42 <data name="results" format="tabular" label="${tool.name} ${on_string} concatenated_results.txt" from_work_dir="inputs/chromatogram_library_concatenated_results.txt"> | |
43 <filter>'results' in select_outputs</filter> | |
44 <actions> | |
45 <action name="column_names" type="metadata" default="PSMId,score,q-value,posterior_error_prob,peptide,proteinIds" /> | |
46 </actions> | |
47 </data> | |
48 <data name="decoy" format="tabular" label="${tool.name} ${on_string} concatenated_decoy.txt" from_work_dir="inputs/chromatogram_library_concatenated_decoy.txt"> | |
49 <filter>'decoy' in select_outputs</filter> | |
50 <actions> | |
51 <action name="column_names" type="metadata" default="PSMId,score,q-value,posterior_error_prob,peptide,proteinIds" /> | |
52 </actions> | |
53 </data> | |
54 <collection name="rt_plots" type="list" label="${tool.name} - ${on_string}: Retention Time Plots"> | |
55 <filter>library and 'rt_plots' in select_outputs</filter> | |
56 <discover_datasets pattern="(?P<designation>.+\.pdf)" ext="pdf" directory="inputs"/> | |
57 </collection> | |
58 <collection name="rt_tables" type="list" label="${tool.name} - ${on_string}: Retention Time Tables"> | |
59 <filter>library and 'rt_tables' in select_outputs</filter> | |
60 <discover_datasets pattern="(?P<designation>.+\.mzML\..*\.rt_fit\.txt)" ext="tabular" directory="inputs"/> | |
61 </collection> | |
62 <data name="peptides" format="tabular" label="${tool.name} ${on_string} peptides.txt" from_work_dir="chromatogram_library.elib.peptides.txt"> | |
63 <filter>a and 'peptides' in select_outputs</filter> | |
64 <actions> | |
65 <action name="column_names" type="metadata" default="Peptide,Protein,numFragments" /> | |
66 </actions> | |
67 </data> | |
68 <data name="proteins" format="tabular" label="${tool.name} ${on_string} proteins.txt" from_work_dir="chromatogram_library.elib.proteins.txt"> | |
69 <filter>a and 'proteins' in select_outputs</filter> | |
70 <actions> | |
71 <action name="column_names" type="metadata" default="Protein,NumPeptides,PeptideSequences" /> | |
72 </actions> | |
73 </data> | |
74 </outputs> | |
75 <tests> | |
76 <test> | |
77 <param name="scan_inputs" ftype="mzml" value="BCS_hela_narrow_3_1.mzML,BCS_hela_narrow_3_2.mzML"/> | |
78 <param name="library" ftype="dlib" value="small_pan_human_library.dlib"/> | |
79 <param name="fasta" ftype="fasta" value="uniprot_human.fasta"/> | |
80 <param name="select_outputs" value="log,elib,features,results"/> | |
81 <output name="results" ftype="tabular"> | |
82 <assert_contents> | |
83 <has_text text="QDSAAVGFDYK"/> | |
84 </assert_contents> | |
85 </output> | |
86 </test> | |
87 <test> | |
88 <param name="scan_inputs" ftype="mzml" value="BCS_hela_narrow_3_1.mzML,BCS_hela_narrow_3_2.mzML"/> | |
89 <param name="fasta" ftype="fasta" value="uniprot_human.fasta"/> | |
90 <param name="select_outputs" value="log,elib,features,results"/> | |
91 <output name="results" ftype="tabular"> | |
92 <assert_contents> | |
93 <has_text text="QDSAAVGFDYK"/> | |
94 </assert_contents> | |
95 </output> | |
96 </test> | |
97 </tests> | |
98 <help><![CDATA[ | |
99 **SearchToLIB** | |
100 | |
101 @ENCYCLOPEDIA_WIKI@ | |
102 | |
103 SearchToLIB uses the EncyclopeDIA algorithm, or the Walnut (Pecan) algorithm, to search Data-Independent Acquisition (DIA) MS/MS spectrum files and creates a DIA elib chromatogram library for EncyclopeDIA DIA quantitation search. | |
104 | |
105 | |
106 **Inputs** | |
107 | |
108 - Spectrum files in mzML format | |
109 - A protein data base in fasta format | |
110 - An optional DDA Spectral library (.dlib) that can be generated by Prosit | |
111 - *SearchToLIB uses Enclopedia if the Prosit dlib is provided, otherwise it uses Walnut with just a fasta.* | |
112 | |
113 @MSCONVERT_HELP@ | |
114 | |
115 **Outputs** | |
116 | |
117 - A log file | |
118 - A Chromatogram Library (.elib) | |
119 - The identified features in tabular format | |
120 Feature values of scans that are used by percolator to determine matches. | |
121 - The identified Peptide Spectral Match results in tabular format | |
122 Columns: PSMId, score, q-value, posterior_error_prob, peptide, proteinIds | |
123 - The identified peptides in tabular format | |
124 Per peptide: the normalized intensity for each scan file. | |
125 Columns: Peptide, Protein, numFragments, intensity_in_file1, intensity_in_file2, ... | |
126 - The identified proteins in tabular format | |
127 Per protein: the normalized intensity for each scan file. | |
128 Columns: Protein, NumPeptides, PeptideSequences, intensity_in_file1, intensity_in_file2, ... | |
129 | |
130 **Typical DIA Workflow** | |
131 | |
132 Two sets of Mass Spec MS/MS DIA data are collected for the experiment. In addition to collecting wide-window DIA experiments on each quantitative replicate, a pool containing peptides from every condition is measured using several staggered narrow-window DIA experiments. | |
133 | |
134 1. SearchToLib is first run with the pooled narrow-window mzML files to create a combined DIA elib chromatogram library. | |
135 If a Spectral library argument is provided, for example from **Prosit**, SearchToLIB uses EncyclopeDIA to search each input spectrum mzML file. | |
136 Otherwise, SearchToLIB uses Walnut, a FASTA database search engine for DIA data that uses PECAN-style scoring. | |
137 | |
138 | |
139 * Prosit_ generates a predicted spectrum library of fragmentation patterns and retention times for every +2H and +3H tryptic peptide in a FASTA database, with up to one missed cleavage. | |
140 | |
141 | |
142 2. EncyclopeDIA Quantify is then run on the wide-window quantitative replicate mzML files using that chromatogram library to produce quantification results. | |
143 | |
144 .. image:: SearchToLib_Workflow.png | |
145 :width: 810 | |
146 :height: 580 | |
147 | |
148 .. _Prosit: https://www.proteomicsdb.org/prosit | |
149 | |
150 ]]></help> | |
151 <expand macro="citations" /> | |
152 </tool> |