comparison metanovo.xml @ 0:9025f297a511 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/metanovo commit 97229d4157cf21c8a55433cafdc477d76e0f1c89"
author galaxyp
date Tue, 29 Mar 2022 16:54:19 +0000
parents
children 6066b729f9aa
comparison
equal deleted inserted replaced
-1:000000000000 0:9025f297a511
1 <tool id="metanovo" name="MetaNovo" version="@TOOL_VERSION@+galaxy0" profile="20.09">
2 <description>
3 Produce targeted databases for mass spectrometry analysis.
4 </description>
5 <requirements>
6 <requirement type="package" version="@TOOL_VERSION@">metanovo</requirement>
7 </requirements>
8 <macros>
9 <token name="@TOOL_VERSION@">1.9.4</token>
10 <token name="@VERSION_SUFFIX@">0</token>
11 <token name="@SUBSTITUTION_RX@">[^\w\-\.]</token>
12 <import>macros_modifications.xml</import>
13 </macros>
14 <command>
15 <![CDATA[
16 #set $mgf_dir = 'mgf_files'
17 #set $fasta_dir = 'fasta_file'
18 #set fasta_name = re.sub('@SUBSTITUTION_RX@', '_', str($input_fasta.element_identifier))
19 mkdir $mgf_dir &&
20 mkdir $fasta_dir &&
21 ln -s '$input_fasta' '$fasta_dir/$fasta_name' &&
22
23 #if $input_type.type == "collection"
24 #set mgf_names = [re.sub('@SUBSTITUTION_RX@', '_', str($n.element_identifier)) for $n in $input_type.input_mgf_collection]
25 #for $mgf_name in $mgf_names:
26 ln -s '$input' '$mgf_dir/$mgf_name' &&
27 #end for
28 #else
29 #set mgf_name = re.sub('@SUBSTITUTION_RX@', '_', str($input_type.input_mgf.element_identifier))
30 ln -s '$input_mgf' '$mgf_dir/$mgf_name' &&
31 #end if
32
33 cat $metanovo_config > config.sh &&
34 metanovo.sh config.sh
35 ]]>
36 </command>
37
38 <configfiles>
39 <configfile name="metanovo_config"><![CDATA[#slurp
40 #import re
41 MGF_FOLDER=mgf_files
42 #set fasta_name = re.sub('@SUBSTITUTION_RX@', '_', str($input_fasta.element_identifier))
43 FASTA_FILE=fasta_file/'$fasta_name'
44 OUTPUT_FOLDER=.
45 CHUNKSIZE=$processing_control.CHUNKSIZE
46 THREAD_LIMIT=$processing_control.THREAD_LIMIT
47 JVM_Xmx='$processing_control.JVM_Xmx'
48 JVM_Xms='$processing_control.JVM_Xms'
49 mn_specificity='$metanovo_parameters.mn_specificity'
50 mn_enzymes='$metanovo_parameters.mn_enzymes'
51 mn_max_missed_cleavages=$metanovo_parameters.mn_max_missed_cleavages
52 dg_pepnovo=0
53 dg_pnovo=0
54 dg_novor=0
55 dg_directag=1
56 prec_tol=$spectrum_matching_parameters.prec_tol
57 prec_ppm=$spectrum_matching_parameters.prec_ppm
58 frag_tol=$spectrum_matching_parameters.frag_tol
59 frag_ppm=$spectrum_matching_parameters.frag_ppm
60 digestion=$spectrum_matching_parameters.digestion
61 enzyme='$spectrum_matching_parameters.enzyme'
62 specificity=$spectrum_matching_parameters.specificity
63 mc='$spectrum_matching_parameters.mc'
64 fixed_mods="$spectrum_matching_parameters.fixed_mods"
65 variable_mods="$spectrum_matching_parameters.variable_mods"
66 min_charge=$spectrum_matching_parameters.min_charge
67 max_charge=$spectrum_matching_parameters.max_charge
68 fi='$spectrum_matching_parameters.fi'
69 ri='$spectrum_matching_parameters.ri'
70 min_isotope='$spectrum_matching_parameters.min_isotope'
71 max_isotope='$spectrum_matching_parameters.max_isotope'
72 annotation_level=$spectrum_annotation.annotation_level
73 annotation_high_resolution=$spectrum_annotation.annotation_high_resolution
74 sequence_index_type=$sequence_matching.sequence_index_type
75 sequence_matching_type=$sequence_matching.sequence_matching_type
76 sequence_matching_x=$sequence_matching.sequence_matching_x
77 import_peptide_length_min=$import_filters.import_peptide_length_min
78 import_peptide_length_max=$import_filters.import_peptide_length_max
79 import_precursor_mz_ppm=$import_filters.import_precursor_mz_ppm
80 exclude_unknown_ptms=$import_filters.exclude_unknown_ptms
81 ptm_score=$ptm_localization.ptm_score
82 score_neutral_losses=$ptm_localization.score_neutral_losses
83 ptm_sequence_matching_type=$ptm_localization.ptm_sequence_matching_type
84 ptm_alignment=$ptm_localization.ptm_alignment
85 useGeneMapping=$gene_annotation.useGeneMapping
86 updateGeneMapping=$gene_annotation.updateGeneMapping
87 simplify_groups=$protein_inference.simplify_groups
88 simplify_score=$protein_inference.simplify_score
89 simplify_enzymaticity=$protein_inference.simplify_enzymaticity
90 simplify_evidence=$protein_inference.simplify_evidence
91 simplify_uncharacterized=$protein_inference.simplify_uncharacterized
92 psm_fdr=$validation_levels.psm_fdr
93 peptide_fdr=$validation_levels.peptide_fdr
94 protein_fdr=$validation_levels.protein_fdr
95 group_psms=$validation_levels.group_psms
96 group_peptides=$validation_levels.group_peptides
97 merge_subgroups=$validation_levels.merge_subgroups
98 protein_fraction_mw_confidence='$fraction_analysis.protein_fraction_mw_confidence'
99 pepnovo_hitlist_length=1
100 pepnovo_estimate_charge=1
101 pepnovo_correct_prec_mass=1
102 pepnovo_discard_spectra=1
103 pepnovo_fragmentation_model='CID_IT_TRYP'
104 pepnovo_generate_blast=0
105 directag_tic_cutoff=$directag.directag_tic_cutoff
106 directag_max_peak_count=$directag.directag_max_peak_count
107 directag_intensity_classes=$directag.directag_intensity_classes
108 directag_adjust_precursor=$directag.directag_adjust_precursor
109 directag_min_adjustment='$directag.directag_min_adjustment'
110 directag_max_adjustment='$directag.directag_max_adjustment'
111 directag_adjustment_step='$directag.directag_adjustment_step'
112 directag_charge_states='$directag.directag_charge_states'
113 directag_ms_charge_state='$directag.directag_ms_charge_state'
114 directag_duplicate_spectra='$directag.directag_duplicate_spectra'
115 directag_deisotoping='$directag.directag_deisotoping'
116 directag_isotope_tolerance='$directag.directag_isotope_tolerance'
117 directag_complement_tolerance='$directag.directag_complement_tolerance'
118 directag_tag_length='$directag.directag_tag_length'
119 directag_max_var_mods='$directag.directag_max_var_mods'
120 directag_max_tag_count='$directag.directag_max_tag_count'
121 directag_intensity_weight='$directag.directag_intensity_weight'
122 directag_fidelity_weight='$directag.directag_fidelity_weight'
123 directag_complement_weight='$directag.directag_complement_weight'
124 novor_fragmentation=HCD
125 novor_mass_analyzer=Trap
126 ]]></configfile>
127 </configfiles>
128
129 <inputs>
130 <conditional name="input_type">
131 <param name="type" type="select" label="MGF Input Type" help="Submit either a single file, or a collection of files.">
132 <option selected="true" value="single">Single file</option>
133 <option value="collection">Collection</option>
134 </param>
135 <when value="single">
136 <param name="input_mgf" type="data" format="mgf" optional="true" label="MGF File" />
137 </when>
138 <when value="collection">
139 <param name="input_mgf_collection" type="data_collection" optional="true" label="MGF Collection" />
140 </when>
141 </conditional>
142
143 <param name="input_fasta" type="data" format="fasta" label="FASTA File" />
144
145 <section name="processing_control" expanded="False" title="Processing Control">
146 <param name="CHUNKSIZE" label="Size to split fasta for parallel processing" value="100000" type="integer" optional="true"/>
147 <param name="THREAD_LIMIT" label="How many threads to use per node" value="2" type="integer" optional="true"/>
148 <param name="JVM_Xmx" label="Maximum memory allocated to each Java thread" value="10000M" type="text" optional="true"/>
149 <param name="JVM_Xms" label="Minimum memory allocated to each Java thread" value="1024M" type="text" optional="true"/>
150 </section>
151 <section name="metanovo_parameters" expanded="False" title="MetaNovo Parameters">
152 <param name="mn_specificity" argument="-mn_specificity" label="Enzyme Specificity" type="select">
153 <option selected="true" value="specific">specific</option>
154 <option value="semi-specific">semi-specific</option>
155 <option value="unspecific">unspecific</option>
156 </param>
157 <param name="mn_enzymes" argument="-mn_enzymes" label="Enzyme Rule" type="select">
158 <option value="Trypsin">Trypsin</option>
159 <option selected="true" value="Trypsin, no P rule">Trypsin, no P rule</option>
160 <option value="Whole protein">Whole protein</option>
161 </param>
162 <param name="mn_max_missed_cleavages" argument="-mn_max_missed_cleavages" label="Number of enzymatic missed cleavages" value="2" type="integer" optional="true"/>
163 </section>
164 <section name="spectrum_matching_parameters" expanded="False" title="Spectrum Matching Parameters">
165 <param name="prec_tol" argument="-prec_tol" label="Precursor ion mass tolerance" value="10.0" type="float" optional="true"/>
166 <param name="prec_ppm" argument="-prec_ppm" label="Precursor ion tolerance unit" type="select">
167 <option value="0">Da</option>
168 <option selected="true" value="1">ppm</option>
169 </param>
170 <param name="frag_tol" argument="-frag_tol" label="Fragment ion mass tolerance" value="0.05" type="float" optional="true"/>
171 <param name="frag_ppm" argument="-frag_ppm" label="Fragment ion tolerance unit" type="select">
172 <option selected="true" value="0">Da</option>
173 <option value="1">ppm</option>
174 </param>
175 <param name="digestion" argument="-digestion" label="Digestion" type="select">
176 <option selected="true" value="0">Enzyme</option>
177 <option value="1">Unspecific</option>
178 <option value="2">Whole Protein</option>
179 </param>
180 <param name="enzyme" argument="-enzyme" label="Enzyme" type="select" multiple="true">
181 <option value="Trypsin">Trypsin</option>
182 <option selected="true" value="Trypsin (no P rule)">Trypsin (no P rule)</option>
183 <option value="Arg-C">Arg-C</option>
184 <option value="Arg-C (no P rule)">Arg-C (no P rule)</option>
185 <option value="Arg-N">Arg-N</option>
186 <option value="Glu-C">Glu-C</option>
187 <option value="Lys-C">Lys-C</option>
188 <option value="Lys-C (no P rule)">Lys-C (no P rule)</option>
189 <option value="Lys-N">Lys-N</option>
190 <option value="Asp-N">Asp-N</option>
191 <option value="Asp-N (ambic)">Asp-N (ambic)</option>
192 <option value="Chymotrypsin">Chymotrypsin</option>
193 <option value="Chymotrypsin (no P rule)">Chymotrypsin (no P rule)</option>
194 <option value="Pepsin A">Pepsin A</option>
195 <option value="CNBr">CNBr</option>
196 <option value="Thermolysin">Thermolysin</option>
197 <option value="LysargiNase">LysargiNase</option>
198 </param>
199 <param name="specificity" argument="-specificity" label="Specificity" type="select">
200 <option selected="true" value="0">Specific</option>
201 <option value="1">Semi-Specific</option>
202 <option value="2">N-term Specific</option>
203 <option value="3">C-term Specific</option>
204 </param>
205 <param name="mc" argument="-mc" label="Number of allowed missed cleavages" value="2" type="text" optional="true" help="If more than one enzyme was used, please provide the missed cleavages for every enzyme in the same order, with a comma separated list, e.g. &quot;2, 1&quot;."/>
206 <param name="fixed_mods" argument="-fixed_mods" label="Fixed modifications as comma separated list" type="select" multiple="true">
207 <expand macro="fixed_modifications"/>
208 </param>
209 <param name="variable_mods" argument="-variable_mods" label="Variable modifications as comma separated list" type="select" multiple="true">
210 <expand macro="variable_modifications"/>
211 </param>
212 <param name="min_charge" argument="-min_charge" label="Minimal charge to search for" value="2" type="integer" optional="true"/>
213 <param name="max_charge" argument="-max_charge" label="Maximal charge to search for" value="4" type="integer" optional="true"/>
214 <param name="fi" argument="-fi" label="Type of forward ion searched" value="b" type="text" optional="true"/>
215 <param name="ri" argument="-ri" label="Type of rewind ion searched" value="y" type="text" optional="true"/>
216 <param name="min_isotope" argument="-min_isotope" label="Minimum precursor isotope" value="0" type="integer" optional="true"/>
217 <param name="max_isotope" argument="-max_isotope" label="Maximum precursor isotope" value="1" type="integer" optional="true"/>
218 </section>
219 <section name="spectrum_annotation" expanded="False" title="Spectrum Annotation">
220 <param name="annotation_level" argument="-annotation_level" label="The intensity threshold to consider for annotation" value="0.75" type="float" optional="true" help="Using percentiles, 0.75 means that the 25% most intense peaks will be annotated."/>
221 <param name="annotation_high_resolution" argument="-annotation_high_resolution" label="If true the most accurate peak will be selected within the m/z tolerance." truevalue="1" falsevalue="0" type="boolean" checked="true"/>
222 </section>
223 <section name="sequence_matching" expanded="False" title="Sequence Matching">
224 <param name="sequence_index_type" argument="-sequence_index_type" label="sequence_index_type (deprecated)" value="0" type="integer" optional="true"/>
225 <param name="sequence_matching_type" argument="-sequence_matching_type" label="The peptide to protein sequence matching type" type="select">
226 <option value="0">Character Sequence</option>
227 <option value="1">Amino Acids</option>
228 <option selected="true" value="2">Indistinguishable Amino Acids</option>
229 </param>
230 <param name="sequence_matching_x" argument="-sequence_matching_x" label="The maximal share of Xs in a sequence, 0.25 means 25% of X's" value="0.25" type="float" optional="true"/>
231 </section>
232 <section name="import_filters" expanded="False" title="Import Filters">
233 <param name="import_peptide_length_min" argument="-import_peptide_length_min" label="The minimal peptide length to consider when importing identification files" value="8" type="integer" optional="true"/>
234 <param name="import_peptide_length_max" argument="-import_peptide_length_max" label="The maximal peptide length to consider when importing identification files" value="30" type="integer" optional="true"/>
235 <param name="import_precursor_mz_ppm" argument="-import_precurosor_mz_ppm [sic]" label="Maximal precursor ion deviation unit" type="select">
236 <option selected="true" value="0">Da</option>
237 <option value="1">ppm</option>
238 </param>
239 <param name="exclude_unknown_ptms" argument="-exclude_unknown_ptms" label="Peptides presenting unrecognized PTMs will be excluded" truevalue="1" falsevalue="0" type="boolean" checked="true"/>
240 </section>
241 <section name="ptm_localization" expanded="False" title="PTM Localization">
242 <param name="ptm_score" argument="-ptm_score" label="The PTM probabilistic score to use for modification localization" type="select">
243 <option value="0">A-score</option>
244 <option selected="true" value="1">PhosphoRS</option>
245 <option value="2">None</option>
246 </param>
247 <param name="score_neutral_losses" argument="-score_neutral_losses" label="Include neutral losses in spectrum annotation of the PTM score" truevalue="1" falsevalue="0" type="boolean" checked="false"/>
248 <param name="ptm_sequence_matching_type" argument="-ptm_sequence_matching_type" label="The modification to peptide sequence matching type" type="select">
249 <option value="0">Character Sequence</option>
250 <option selected="true" value="1">Amino Acids</option>
251 <option value="2">Indistinguishable Amino Acids</option>
252 </param>
253 <param name="ptm_alignment" argument="-ptm_alignment" label="Align peptide ambiguously localized PTMs on confident sites" truevalue="1" falsevalue="0" type="boolean" checked="true"/>
254 </section>
255 <section name="gene_annotation" expanded="False" title="Gene Annotation">
256 <param name="useGeneMapping" argument="-useGeneMapping" label="Use and save gene mappings along with the project" truevalue="1" falsevalue="0" type="boolean" checked="true" help="UniProt databases only"/>
257 <param name="updateGeneMapping" argument="-updateGeneMapping" label="Update gene mappings automatically from Ensembl" truevalue="1" falsevalue="0" type="boolean" checked="true" help="UniProt databases only"/>
258 </section>
259 <section name="protein_inference" expanded="False" title="Protein Inference">
260 <param name="simplify_groups" argument="-simplify_groups" label="Simplify protein groups" truevalue="1" falsevalue="0" type="boolean" checked="true"/>
261 <param name="simplify_score" argument="-simplify_score" label="Simplify protein groups based on the PeptideShaker target/decoy score" truevalue="1" falsevalue="0" type="boolean" checked="true"/>
262 <param name="simplify_enzymaticity" argument="-simplify_enzymaticity" label="Simplify protein groups based on the peptide enzymaticity" truevalue="1" falsevalue="0" type="boolean" checked="true"/>
263 <param name="simplify_evidence" argument="-simplify_evidence" label="Simplify protein groups based on the UniProt protein evidence" truevalue="1" falsevalue="0" type="boolean" checked="true"/>
264 <param name="simplify_uncharacterized" argument="-simplify_uncharacterized" label="Simplify protein groups based on the protein characterization" truevalue="1" falsevalue="0" type="boolean" checked="true"/>
265 </section>
266 <section name="validation_levels" expanded="False" title="Validation Levels">
267 <param name="psm_fdr" argument="-psm_fdr" label="FDR at the PSM level in percent" value="1" type="integer" optional="true"/>
268 <param name="peptide_fdr" argument="-peptide_fdr" label="FDR at the peptide level in percent" value="1" type="integer" optional="true"/>
269 <param name="protein_fdr" argument="-protein_fdr" label="FDR at the protein level in percent" value="1" type="integer" optional="true"/>
270 <param name="group_psms" argument="-group_psms" label="Group PSMs by charge for scoring and validation" truevalue="1" falsevalue="0" type="boolean" checked="true"/>
271 <param name="group_peptides" argument="-group_peptides" label="Group peptides by modification status for scoring and validation" truevalue="1" falsevalue="0" type="boolean" checked="true"/>
272 <param name="merge_subgroups" argument="-group_peptides" label="Merge small PSM and peptide groups for scoring and validation" truevalue="1" falsevalue="0" type="boolean" checked="true"/>
273 </section>
274 <section name="fraction_analysis" expanded="False" title="Fraction Analysis">
275 <param name="protein_fraction_mw_confidence" argument="-protein_fraction_mw_confidence" label="Minimum confidence required for a protein in the fraction MW plot" value="95.0" type="float" optional="true"/>
276 </section>
277 <section name="directag" expanded="False" title="DirecTag">
278 <param name="directag_tic_cutoff" argument="-directag_tic_cutoff" label="TIC cutoff in percent" value="85" type="integer" optional="true"/>
279 <param name="directag_max_peak_count" argument="-directag_max_peak_count" label="Max peak count" value="400" type="integer" optional="true"/>
280 <param name="directag_intensity_classes" argument="-directag_intensity_classes" label="Number of intensity classes" value="3" type="integer" optional="true"/>
281 <param name="directag_adjust_precursor" argument="-directag_adjust_precursor" label="Adjust precursor" truevalue="1" falsevalue="0" type="boolean" checked="false"/>
282 <param name="directag_min_adjustment" argument="-directag_min_adjustment" label="Minimum precursor adjustment" value="-2.5" type="float" optional="true"/>
283 <param name="directag_max_adjustment" argument="-directag_max_adjustment" label="Maximum precursor adjustment" value="2.5" type="float" optional="true"/>
284 <param name="directag_adjustment_step" argument="-directag_adjustment_step" label="Precursor adjustment step" value="0.1" type="float" optional="true"/>
285 <param name="directag_charge_states" argument="-directag_charge_states" label="Number of charge states considered" value="3" type="integer" optional="true"/>
286 <param name="directag_ms_charge_state" argument="-directag_ms_charge_state" label="Use charge state from M spectrum" truevalue="1" falsevalue="0" type="boolean" checked="false"/>
287 <param name="directag_duplicate_spectra" argument="-directag_duplicate_spectra" label="Duplicate spectra per charge" truevalue="1" falsevalue="0" type="boolean" checked="true"/>
288 <param name="directag_deisotoping" argument="-directag_deisotoping" label="Deisotoping mode" type="select">
289 <option selected="true" value="0">No deisotoping</option>
290 <option value="1">Precursor only</option>
291 <option value="2">Precursor and candidate</option>
292 </param>
293 <param name="directag_isotope_tolerance" argument="-directag_isotope_tolerance" label="Isotope mz tolerance" value="0.25" type="float" optional="true"/>
294 <param name="directag_complement_tolerance" argument="-directag_complement_tolerance" label="Complement mz tolerance" value="0.5" type="float" optional="true"/>
295 <param name="directag_tag_length" argument="-directag_tag_length" label="Tag length" value="4" type="integer" optional="true"/>
296 <param name="directag_max_var_mods" argument="-directag_max_var_mods" label="Maximum variable modifications per sequence" value="2" type="integer" optional="true"/>
297 <param name="directag_max_tag_count" argument="-directag_max_tag_count" label="Maximum tag count" value="5" type="integer" optional="true"/>
298 <param name="directag_intensity_weight" argument="-directag_intensity_weight" label="Intensity score weight" value="1.0" type="float" optional="true"/>
299 <param name="directag_fidelity_weight" argument="-directag_fidelity_weight" label="Fidelity score weight" value="1.0" type="float" optional="true"/>
300 <param name="directag_complement_weight" argument="-directag_complement_weight" label="Complement score weight" value="1.0" type="float" optional="true"/>
301 </section>
302 </inputs>
303 <outputs>
304 <data name="output_fasta" format="fasta" from_work_dir="metanovo/metanovo.fasta" label="MetaNovo Output FASTA"/>
305 <data name="output_csv" format="csv" from_work_dir="metanovo/metanovo.csv" label="MetaNovo Output CSV"/>
306 </outputs>
307 <help><![CDATA[
308 **MetaNovo**
309
310 MetaNovo searches MS/MS data against a FASTA database of known proteins.
311
312 Two outputs are produced:
313
314 - MetaNovo Output FASTA: the matching proteins produced by the search.
315 - MetaNovo Output CSV: information about the job and other useful metadata.
316
317 Two inputs are required: an MGF file or files and a FASTA database file.
318
319 Two different input types are available for the MGF input. The correct input configuration depends on the desired use case, as outlined below:
320
321 ======================================================= =============
322 Use case Configuration
323 ======================================================= =============
324 Single input MGF file, single output FASTA file **Single file** input with **Single dataset** selected
325 Multiple input MGF files, multiple output FASTA files\* **Single file** input with **Multiple datasets** OR **Dataset collection** selected
326 Multiple input MGF files, single output FASTA file **Collection** input
327 ======================================================= =============
328
329 **\*** One for each MGF file.
330
331 In the second use case, a separate MetaNovo job is spawned for each input MGF. In the third use case, a single MetaNovo job runs with all MGF files in the collection as input.
332
333 If the third use case fails due to memory limitations, users are recommended to use the second option. The multiple output FASTA databases may be merged to generate a reduced, compact database.
334 ]]>
335 </help>
336 <citations>
337 <citation type="doi">10.1101/605550</citation>
338 </citations>
339 </tool>