diff metanovo.xml @ 0:9025f297a511 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/metanovo commit 97229d4157cf21c8a55433cafdc477d76e0f1c89"
author galaxyp
date Tue, 29 Mar 2022 16:54:19 +0000
parents
children 6066b729f9aa
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metanovo.xml	Tue Mar 29 16:54:19 2022 +0000
@@ -0,0 +1,339 @@
+<tool id="metanovo" name="MetaNovo" version="@TOOL_VERSION@+galaxy0" profile="20.09">
+  <description>
+    Produce targeted databases for mass spectrometry analysis.
+  </description>
+  <requirements>
+    <requirement type="package" version="@TOOL_VERSION@">metanovo</requirement>
+  </requirements>
+  <macros>
+    <token name="@TOOL_VERSION@">1.9.4</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@SUBSTITUTION_RX@">[^\w\-\.]</token>
+    <import>macros_modifications.xml</import>
+  </macros>
+  <command>
+    <![CDATA[
+      #set $mgf_dir = 'mgf_files'
+      #set $fasta_dir = 'fasta_file'
+      #set fasta_name = re.sub('@SUBSTITUTION_RX@', '_', str($input_fasta.element_identifier))
+      mkdir $mgf_dir &&
+      mkdir $fasta_dir &&
+      ln -s '$input_fasta' '$fasta_dir/$fasta_name' &&
+
+      #if $input_type.type == "collection"
+      #set mgf_names = [re.sub('@SUBSTITUTION_RX@', '_', str($n.element_identifier)) for $n in $input_type.input_mgf_collection]
+      #for $mgf_name in $mgf_names:
+      ln -s '$input' '$mgf_dir/$mgf_name' &&
+      #end for
+      #else
+      #set mgf_name = re.sub('@SUBSTITUTION_RX@', '_', str($input_type.input_mgf.element_identifier))
+      ln -s '$input_mgf' '$mgf_dir/$mgf_name' &&
+      #end if
+
+      cat $metanovo_config > config.sh &&
+      metanovo.sh config.sh
+    ]]>
+  </command>
+
+  <configfiles>
+    <configfile name="metanovo_config"><![CDATA[#slurp
+#import re
+MGF_FOLDER=mgf_files
+#set fasta_name = re.sub('@SUBSTITUTION_RX@', '_', str($input_fasta.element_identifier))
+FASTA_FILE=fasta_file/'$fasta_name'
+OUTPUT_FOLDER=.
+CHUNKSIZE=$processing_control.CHUNKSIZE
+THREAD_LIMIT=$processing_control.THREAD_LIMIT
+JVM_Xmx='$processing_control.JVM_Xmx'
+JVM_Xms='$processing_control.JVM_Xms'
+mn_specificity='$metanovo_parameters.mn_specificity'
+mn_enzymes='$metanovo_parameters.mn_enzymes'
+mn_max_missed_cleavages=$metanovo_parameters.mn_max_missed_cleavages
+dg_pepnovo=0
+dg_pnovo=0
+dg_novor=0
+dg_directag=1
+prec_tol=$spectrum_matching_parameters.prec_tol
+prec_ppm=$spectrum_matching_parameters.prec_ppm
+frag_tol=$spectrum_matching_parameters.frag_tol
+frag_ppm=$spectrum_matching_parameters.frag_ppm
+digestion=$spectrum_matching_parameters.digestion
+enzyme='$spectrum_matching_parameters.enzyme'
+specificity=$spectrum_matching_parameters.specificity
+mc='$spectrum_matching_parameters.mc'
+fixed_mods="$spectrum_matching_parameters.fixed_mods"
+variable_mods="$spectrum_matching_parameters.variable_mods"
+min_charge=$spectrum_matching_parameters.min_charge
+max_charge=$spectrum_matching_parameters.max_charge
+fi='$spectrum_matching_parameters.fi'
+ri='$spectrum_matching_parameters.ri'
+min_isotope='$spectrum_matching_parameters.min_isotope'
+max_isotope='$spectrum_matching_parameters.max_isotope'
+annotation_level=$spectrum_annotation.annotation_level
+annotation_high_resolution=$spectrum_annotation.annotation_high_resolution
+sequence_index_type=$sequence_matching.sequence_index_type
+sequence_matching_type=$sequence_matching.sequence_matching_type
+sequence_matching_x=$sequence_matching.sequence_matching_x
+import_peptide_length_min=$import_filters.import_peptide_length_min
+import_peptide_length_max=$import_filters.import_peptide_length_max
+import_precursor_mz_ppm=$import_filters.import_precursor_mz_ppm
+exclude_unknown_ptms=$import_filters.exclude_unknown_ptms
+ptm_score=$ptm_localization.ptm_score
+score_neutral_losses=$ptm_localization.score_neutral_losses
+ptm_sequence_matching_type=$ptm_localization.ptm_sequence_matching_type
+ptm_alignment=$ptm_localization.ptm_alignment
+useGeneMapping=$gene_annotation.useGeneMapping
+updateGeneMapping=$gene_annotation.updateGeneMapping
+simplify_groups=$protein_inference.simplify_groups
+simplify_score=$protein_inference.simplify_score
+simplify_enzymaticity=$protein_inference.simplify_enzymaticity
+simplify_evidence=$protein_inference.simplify_evidence
+simplify_uncharacterized=$protein_inference.simplify_uncharacterized
+psm_fdr=$validation_levels.psm_fdr
+peptide_fdr=$validation_levels.peptide_fdr
+protein_fdr=$validation_levels.protein_fdr
+group_psms=$validation_levels.group_psms
+group_peptides=$validation_levels.group_peptides
+merge_subgroups=$validation_levels.merge_subgroups
+protein_fraction_mw_confidence='$fraction_analysis.protein_fraction_mw_confidence'
+pepnovo_hitlist_length=1
+pepnovo_estimate_charge=1
+pepnovo_correct_prec_mass=1
+pepnovo_discard_spectra=1
+pepnovo_fragmentation_model='CID_IT_TRYP'
+pepnovo_generate_blast=0
+directag_tic_cutoff=$directag.directag_tic_cutoff
+directag_max_peak_count=$directag.directag_max_peak_count
+directag_intensity_classes=$directag.directag_intensity_classes
+directag_adjust_precursor=$directag.directag_adjust_precursor
+directag_min_adjustment='$directag.directag_min_adjustment'
+directag_max_adjustment='$directag.directag_max_adjustment'
+directag_adjustment_step='$directag.directag_adjustment_step'
+directag_charge_states='$directag.directag_charge_states'
+directag_ms_charge_state='$directag.directag_ms_charge_state'
+directag_duplicate_spectra='$directag.directag_duplicate_spectra'
+directag_deisotoping='$directag.directag_deisotoping'
+directag_isotope_tolerance='$directag.directag_isotope_tolerance'
+directag_complement_tolerance='$directag.directag_complement_tolerance'
+directag_tag_length='$directag.directag_tag_length'
+directag_max_var_mods='$directag.directag_max_var_mods'
+directag_max_tag_count='$directag.directag_max_tag_count'
+directag_intensity_weight='$directag.directag_intensity_weight'
+directag_fidelity_weight='$directag.directag_fidelity_weight'
+directag_complement_weight='$directag.directag_complement_weight'
+novor_fragmentation=HCD
+novor_mass_analyzer=Trap
+ ]]></configfile>
+  </configfiles>
+
+  <inputs>
+    <conditional name="input_type">
+      <param name="type" type="select" label="MGF Input Type" help="Submit either a single file, or a collection of files.">
+        <option selected="true" value="single">Single file</option>
+        <option value="collection">Collection</option>
+      </param>
+      <when value="single">
+        <param name="input_mgf" type="data" format="mgf" optional="true" label="MGF File" />
+      </when>
+      <when value="collection">
+        <param name="input_mgf_collection" type="data_collection" optional="true" label="MGF Collection" />
+      </when>
+    </conditional>
+
+    <param name="input_fasta" type="data" format="fasta" label="FASTA File" />
+
+    <section name="processing_control" expanded="False" title="Processing Control">
+      <param name="CHUNKSIZE" label="Size to split fasta for parallel processing" value="100000" type="integer" optional="true"/>
+      <param name="THREAD_LIMIT" label="How many threads to use per node" value="2" type="integer" optional="true"/>
+      <param name="JVM_Xmx" label="Maximum memory allocated to each Java thread" value="10000M" type="text" optional="true"/>
+      <param name="JVM_Xms" label="Minimum memory allocated to each Java thread" value="1024M" type="text" optional="true"/>
+    </section>
+    <section name="metanovo_parameters" expanded="False" title="MetaNovo Parameters">
+      <param name="mn_specificity" argument="-mn_specificity" label="Enzyme Specificity" type="select">
+        <option selected="true" value="specific">specific</option>
+        <option value="semi-specific">semi-specific</option>
+        <option value="unspecific">unspecific</option>
+      </param>
+      <param name="mn_enzymes" argument="-mn_enzymes" label="Enzyme Rule" type="select">
+        <option value="Trypsin">Trypsin</option>
+        <option selected="true" value="Trypsin, no P rule">Trypsin, no P rule</option>
+        <option value="Whole protein">Whole protein</option>
+      </param>
+      <param name="mn_max_missed_cleavages" argument="-mn_max_missed_cleavages" label="Number of enzymatic missed cleavages" value="2" type="integer" optional="true"/>
+    </section>
+    <section name="spectrum_matching_parameters" expanded="False" title="Spectrum Matching Parameters">
+      <param name="prec_tol" argument="-prec_tol" label="Precursor ion mass tolerance" value="10.0" type="float" optional="true"/>
+      <param name="prec_ppm" argument="-prec_ppm" label="Precursor ion tolerance unit" type="select">
+        <option value="0">Da</option>
+        <option selected="true" value="1">ppm</option>
+      </param>
+      <param name="frag_tol" argument="-frag_tol" label="Fragment ion mass tolerance" value="0.05" type="float" optional="true"/>
+      <param name="frag_ppm" argument="-frag_ppm" label="Fragment ion tolerance unit" type="select">
+        <option selected="true" value="0">Da</option>
+        <option value="1">ppm</option>
+      </param>
+      <param name="digestion" argument="-digestion" label="Digestion" type="select">
+        <option selected="true" value="0">Enzyme</option>
+        <option value="1">Unspecific</option>
+        <option value="2">Whole Protein</option>
+      </param>
+      <param name="enzyme" argument="-enzyme" label="Enzyme" type="select" multiple="true">
+        <option value="Trypsin">Trypsin</option>
+        <option selected="true" value="Trypsin (no P rule)">Trypsin (no P rule)</option>
+        <option value="Arg-C">Arg-C</option>
+        <option value="Arg-C (no P rule)">Arg-C (no P rule)</option>
+        <option value="Arg-N">Arg-N</option>
+        <option value="Glu-C">Glu-C</option>
+        <option value="Lys-C">Lys-C</option>
+        <option value="Lys-C (no P rule)">Lys-C (no P rule)</option>
+        <option value="Lys-N">Lys-N</option>
+        <option value="Asp-N">Asp-N</option>
+        <option value="Asp-N (ambic)">Asp-N (ambic)</option>
+        <option value="Chymotrypsin">Chymotrypsin</option>
+        <option value="Chymotrypsin (no P rule)">Chymotrypsin (no P rule)</option>
+        <option value="Pepsin A">Pepsin A</option>
+        <option value="CNBr">CNBr</option>
+        <option value="Thermolysin">Thermolysin</option>
+        <option value="LysargiNase">LysargiNase</option>
+      </param>
+      <param name="specificity" argument="-specificity" label="Specificity" type="select">
+          <option selected="true" value="0">Specific</option>
+          <option value="1">Semi-Specific</option>
+          <option value="2">N-term Specific</option>
+          <option value="3">C-term Specific</option>
+      </param>
+      <param name="mc" argument="-mc" label="Number of allowed missed cleavages" value="2" type="text" optional="true" help="If more than one enzyme was used, please provide the missed cleavages for every enzyme in the same order, with a comma separated list, e.g. &quot;2, 1&quot;."/>
+      <param name="fixed_mods" argument="-fixed_mods" label="Fixed modifications as comma separated list" type="select" multiple="true">
+        <expand macro="fixed_modifications"/>
+      </param>
+      <param name="variable_mods" argument="-variable_mods" label="Variable modifications as comma separated list" type="select" multiple="true">
+        <expand macro="variable_modifications"/>
+      </param>
+      <param name="min_charge" argument="-min_charge" label="Minimal charge to search for" value="2" type="integer" optional="true"/>
+      <param name="max_charge" argument="-max_charge" label="Maximal charge to search for" value="4" type="integer" optional="true"/>
+      <param name="fi" argument="-fi" label="Type of forward ion searched" value="b" type="text" optional="true"/>
+      <param name="ri" argument="-ri" label="Type of rewind ion searched" value="y" type="text" optional="true"/>
+      <param name="min_isotope" argument="-min_isotope" label="Minimum precursor isotope" value="0" type="integer" optional="true"/>
+      <param name="max_isotope" argument="-max_isotope" label="Maximum precursor isotope" value="1" type="integer" optional="true"/>
+    </section>
+    <section name="spectrum_annotation" expanded="False" title="Spectrum Annotation">
+      <param name="annotation_level" argument="-annotation_level" label="The intensity threshold to consider for annotation" value="0.75" type="float" optional="true" help="Using percentiles, 0.75 means that the 25% most intense peaks will be annotated."/>
+      <param name="annotation_high_resolution" argument="-annotation_high_resolution" label="If true the most accurate peak will be selected within the m/z tolerance." truevalue="1" falsevalue="0" type="boolean" checked="true"/>
+    </section>
+    <section name="sequence_matching" expanded="False" title="Sequence Matching">
+      <param name="sequence_index_type" argument="-sequence_index_type" label="sequence_index_type (deprecated)" value="0" type="integer" optional="true"/>
+      <param name="sequence_matching_type" argument="-sequence_matching_type" label="The peptide to protein sequence matching type" type="select">
+          <option value="0">Character Sequence</option>
+          <option value="1">Amino Acids</option>
+          <option selected="true" value="2">Indistinguishable Amino Acids</option>
+      </param>
+      <param name="sequence_matching_x" argument="-sequence_matching_x" label="The maximal share of Xs in a sequence, 0.25 means 25% of X's" value="0.25" type="float" optional="true"/>
+    </section>
+    <section name="import_filters" expanded="False" title="Import Filters">
+      <param name="import_peptide_length_min" argument="-import_peptide_length_min" label="The minimal peptide length to consider when importing identification files" value="8" type="integer" optional="true"/>
+      <param name="import_peptide_length_max" argument="-import_peptide_length_max" label="The maximal peptide length to consider when importing identification files" value="30" type="integer" optional="true"/>
+      <param name="import_precursor_mz_ppm" argument="-import_precurosor_mz_ppm [sic]" label="Maximal precursor ion deviation unit" type="select">
+        <option selected="true" value="0">Da</option>
+        <option value="1">ppm</option>
+      </param>
+      <param name="exclude_unknown_ptms" argument="-exclude_unknown_ptms" label="Peptides presenting unrecognized PTMs will be excluded" truevalue="1" falsevalue="0" type="boolean" checked="true"/>
+    </section>
+    <section name="ptm_localization" expanded="False" title="PTM Localization">
+      <param name="ptm_score" argument="-ptm_score" label="The PTM probabilistic score to use for modification localization" type="select">
+        <option value="0">A-score</option>
+        <option selected="true" value="1">PhosphoRS</option>
+        <option value="2">None</option>
+      </param>
+      <param name="score_neutral_losses" argument="-score_neutral_losses" label="Include neutral losses in spectrum annotation of the PTM score" truevalue="1" falsevalue="0" type="boolean" checked="false"/>
+      <param name="ptm_sequence_matching_type" argument="-ptm_sequence_matching_type" label="The modification to peptide sequence matching type" type="select">
+          <option value="0">Character Sequence</option>
+          <option selected="true" value="1">Amino Acids</option>
+          <option value="2">Indistinguishable Amino Acids</option>
+      </param>
+      <param name="ptm_alignment" argument="-ptm_alignment" label="Align peptide ambiguously localized PTMs on confident sites" truevalue="1" falsevalue="0" type="boolean" checked="true"/>
+    </section>
+    <section name="gene_annotation" expanded="False" title="Gene Annotation">
+      <param name="useGeneMapping" argument="-useGeneMapping" label="Use and save gene mappings along with the project" truevalue="1" falsevalue="0" type="boolean" checked="true"  help="UniProt databases only"/>
+      <param name="updateGeneMapping" argument="-updateGeneMapping" label="Update gene mappings automatically from Ensembl" truevalue="1" falsevalue="0" type="boolean" checked="true" help="UniProt databases only"/>
+    </section>
+    <section name="protein_inference" expanded="False" title="Protein Inference">
+      <param name="simplify_groups" argument="-simplify_groups" label="Simplify protein groups" truevalue="1" falsevalue="0" type="boolean" checked="true"/>
+      <param name="simplify_score" argument="-simplify_score" label="Simplify protein groups based on the PeptideShaker target/decoy score" truevalue="1" falsevalue="0" type="boolean" checked="true"/>
+      <param name="simplify_enzymaticity" argument="-simplify_enzymaticity" label="Simplify protein groups based on the peptide enzymaticity" truevalue="1" falsevalue="0" type="boolean" checked="true"/>
+      <param name="simplify_evidence" argument="-simplify_evidence" label="Simplify protein groups based on the UniProt protein evidence" truevalue="1" falsevalue="0" type="boolean" checked="true"/>
+      <param name="simplify_uncharacterized" argument="-simplify_uncharacterized" label="Simplify protein groups based on the protein characterization" truevalue="1" falsevalue="0" type="boolean" checked="true"/>
+    </section>
+    <section name="validation_levels" expanded="False" title="Validation Levels">
+      <param name="psm_fdr" argument="-psm_fdr" label="FDR at the PSM level in percent" value="1" type="integer" optional="true"/>
+      <param name="peptide_fdr" argument="-peptide_fdr" label="FDR at the peptide level in percent" value="1" type="integer" optional="true"/>
+      <param name="protein_fdr" argument="-protein_fdr" label="FDR at the protein level in percent" value="1" type="integer" optional="true"/>
+      <param name="group_psms" argument="-group_psms" label="Group PSMs by charge for scoring and validation" truevalue="1" falsevalue="0" type="boolean" checked="true"/>
+      <param name="group_peptides" argument="-group_peptides" label="Group peptides by modification status for scoring and validation" truevalue="1" falsevalue="0" type="boolean" checked="true"/>
+      <param name="merge_subgroups" argument="-group_peptides" label="Merge small PSM and peptide groups for scoring and validation" truevalue="1" falsevalue="0" type="boolean" checked="true"/>
+    </section>
+    <section name="fraction_analysis" expanded="False" title="Fraction Analysis">
+      <param name="protein_fraction_mw_confidence" argument="-protein_fraction_mw_confidence" label="Minimum confidence required for a protein in the fraction MW plot" value="95.0" type="float" optional="true"/>
+    </section>
+    <section name="directag" expanded="False" title="DirecTag">
+      <param name="directag_tic_cutoff" argument="-directag_tic_cutoff" label="TIC cutoff in percent" value="85" type="integer" optional="true"/>
+      <param name="directag_max_peak_count" argument="-directag_max_peak_count" label="Max peak count" value="400" type="integer" optional="true"/>
+      <param name="directag_intensity_classes" argument="-directag_intensity_classes" label="Number of intensity classes" value="3" type="integer" optional="true"/>
+      <param name="directag_adjust_precursor" argument="-directag_adjust_precursor" label="Adjust precursor" truevalue="1" falsevalue="0" type="boolean" checked="false"/>
+      <param name="directag_min_adjustment" argument="-directag_min_adjustment" label="Minimum precursor adjustment" value="-2.5" type="float" optional="true"/>
+      <param name="directag_max_adjustment" argument="-directag_max_adjustment" label="Maximum precursor adjustment" value="2.5" type="float" optional="true"/>
+      <param name="directag_adjustment_step" argument="-directag_adjustment_step" label="Precursor adjustment step" value="0.1" type="float" optional="true"/>
+      <param name="directag_charge_states" argument="-directag_charge_states" label="Number of charge states considered" value="3" type="integer" optional="true"/>
+      <param name="directag_ms_charge_state" argument="-directag_ms_charge_state" label="Use charge state from M spectrum" truevalue="1" falsevalue="0" type="boolean" checked="false"/>
+      <param name="directag_duplicate_spectra" argument="-directag_duplicate_spectra" label="Duplicate spectra per charge" truevalue="1" falsevalue="0" type="boolean" checked="true"/>
+      <param name="directag_deisotoping" argument="-directag_deisotoping" label="Deisotoping mode" type="select">
+        <option selected="true" value="0">No deisotoping</option>
+        <option value="1">Precursor only</option>
+        <option value="2">Precursor and candidate</option>
+      </param>
+      <param name="directag_isotope_tolerance" argument="-directag_isotope_tolerance" label="Isotope mz tolerance" value="0.25" type="float" optional="true"/>
+      <param name="directag_complement_tolerance" argument="-directag_complement_tolerance" label="Complement mz tolerance" value="0.5" type="float" optional="true"/>
+      <param name="directag_tag_length" argument="-directag_tag_length" label="Tag length" value="4" type="integer" optional="true"/>
+      <param name="directag_max_var_mods" argument="-directag_max_var_mods" label="Maximum variable modifications per sequence" value="2" type="integer" optional="true"/>
+      <param name="directag_max_tag_count" argument="-directag_max_tag_count" label="Maximum tag count" value="5" type="integer" optional="true"/>
+      <param name="directag_intensity_weight" argument="-directag_intensity_weight" label="Intensity score weight" value="1.0" type="float" optional="true"/>
+      <param name="directag_fidelity_weight" argument="-directag_fidelity_weight" label="Fidelity score weight" value="1.0" type="float" optional="true"/>
+      <param name="directag_complement_weight" argument="-directag_complement_weight" label="Complement score weight" value="1.0" type="float" optional="true"/>
+    </section>
+  </inputs>
+  <outputs>
+    <data name="output_fasta" format="fasta" from_work_dir="metanovo/metanovo.fasta" label="MetaNovo Output FASTA"/>
+    <data name="output_csv" format="csv" from_work_dir="metanovo/metanovo.csv" label="MetaNovo Output CSV"/>
+  </outputs>
+  <help><![CDATA[
+**MetaNovo**
+
+MetaNovo searches MS/MS data against a FASTA database of known proteins.
+
+Two outputs are produced:
+
+- MetaNovo Output FASTA: the matching proteins produced by the search.
+- MetaNovo Output CSV: information about the job and other useful metadata.
+
+Two inputs are required: an MGF file or files and a FASTA database file.
+
+Two different input types are available for the MGF input. The correct input configuration depends on the desired use case, as outlined below:
+
+=======================================================  =============
+Use case                                                 Configuration
+=======================================================  =============
+Single input MGF file, single output FASTA file          **Single file** input with **Single dataset** selected
+Multiple input MGF files, multiple output FASTA files\*  **Single file** input with **Multiple datasets** OR **Dataset collection** selected
+Multiple input MGF files, single output FASTA file       **Collection** input
+=======================================================  =============
+
+**\*** One for each MGF file.
+
+In the second use case, a separate MetaNovo job is spawned for each input MGF. In the third use case, a single MetaNovo job runs with all MGF files in the collection as input.
+
+If the third use case fails due to memory limitations, users are recommended to use the second option. The multiple output FASTA databases may be merged to generate a reduced, compact database.
+    ]]>
+  </help>
+  <citations>
+      <citation type="doi">10.1101/605550</citation>
+  </citations>
+</tool>