Mercurial > repos > galaxyp > metanovo
diff metanovo.xml @ 0:9025f297a511 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/metanovo commit 97229d4157cf21c8a55433cafdc477d76e0f1c89"
author | galaxyp |
---|---|
date | Tue, 29 Mar 2022 16:54:19 +0000 |
parents | |
children | 6066b729f9aa |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/metanovo.xml Tue Mar 29 16:54:19 2022 +0000 @@ -0,0 +1,339 @@ +<tool id="metanovo" name="MetaNovo" version="@TOOL_VERSION@+galaxy0" profile="20.09"> + <description> + Produce targeted databases for mass spectrometry analysis. + </description> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">metanovo</requirement> + </requirements> + <macros> + <token name="@TOOL_VERSION@">1.9.4</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@SUBSTITUTION_RX@">[^\w\-\.]</token> + <import>macros_modifications.xml</import> + </macros> + <command> + <![CDATA[ + #set $mgf_dir = 'mgf_files' + #set $fasta_dir = 'fasta_file' + #set fasta_name = re.sub('@SUBSTITUTION_RX@', '_', str($input_fasta.element_identifier)) + mkdir $mgf_dir && + mkdir $fasta_dir && + ln -s '$input_fasta' '$fasta_dir/$fasta_name' && + + #if $input_type.type == "collection" + #set mgf_names = [re.sub('@SUBSTITUTION_RX@', '_', str($n.element_identifier)) for $n in $input_type.input_mgf_collection] + #for $mgf_name in $mgf_names: + ln -s '$input' '$mgf_dir/$mgf_name' && + #end for + #else + #set mgf_name = re.sub('@SUBSTITUTION_RX@', '_', str($input_type.input_mgf.element_identifier)) + ln -s '$input_mgf' '$mgf_dir/$mgf_name' && + #end if + + cat $metanovo_config > config.sh && + metanovo.sh config.sh + ]]> + </command> + + <configfiles> + <configfile name="metanovo_config"><![CDATA[#slurp +#import re +MGF_FOLDER=mgf_files +#set fasta_name = re.sub('@SUBSTITUTION_RX@', '_', str($input_fasta.element_identifier)) +FASTA_FILE=fasta_file/'$fasta_name' +OUTPUT_FOLDER=. +CHUNKSIZE=$processing_control.CHUNKSIZE +THREAD_LIMIT=$processing_control.THREAD_LIMIT +JVM_Xmx='$processing_control.JVM_Xmx' +JVM_Xms='$processing_control.JVM_Xms' +mn_specificity='$metanovo_parameters.mn_specificity' +mn_enzymes='$metanovo_parameters.mn_enzymes' +mn_max_missed_cleavages=$metanovo_parameters.mn_max_missed_cleavages +dg_pepnovo=0 +dg_pnovo=0 +dg_novor=0 +dg_directag=1 +prec_tol=$spectrum_matching_parameters.prec_tol +prec_ppm=$spectrum_matching_parameters.prec_ppm +frag_tol=$spectrum_matching_parameters.frag_tol +frag_ppm=$spectrum_matching_parameters.frag_ppm +digestion=$spectrum_matching_parameters.digestion +enzyme='$spectrum_matching_parameters.enzyme' +specificity=$spectrum_matching_parameters.specificity +mc='$spectrum_matching_parameters.mc' +fixed_mods="$spectrum_matching_parameters.fixed_mods" +variable_mods="$spectrum_matching_parameters.variable_mods" +min_charge=$spectrum_matching_parameters.min_charge +max_charge=$spectrum_matching_parameters.max_charge +fi='$spectrum_matching_parameters.fi' +ri='$spectrum_matching_parameters.ri' +min_isotope='$spectrum_matching_parameters.min_isotope' +max_isotope='$spectrum_matching_parameters.max_isotope' +annotation_level=$spectrum_annotation.annotation_level +annotation_high_resolution=$spectrum_annotation.annotation_high_resolution +sequence_index_type=$sequence_matching.sequence_index_type +sequence_matching_type=$sequence_matching.sequence_matching_type +sequence_matching_x=$sequence_matching.sequence_matching_x +import_peptide_length_min=$import_filters.import_peptide_length_min +import_peptide_length_max=$import_filters.import_peptide_length_max +import_precursor_mz_ppm=$import_filters.import_precursor_mz_ppm +exclude_unknown_ptms=$import_filters.exclude_unknown_ptms +ptm_score=$ptm_localization.ptm_score +score_neutral_losses=$ptm_localization.score_neutral_losses +ptm_sequence_matching_type=$ptm_localization.ptm_sequence_matching_type +ptm_alignment=$ptm_localization.ptm_alignment +useGeneMapping=$gene_annotation.useGeneMapping +updateGeneMapping=$gene_annotation.updateGeneMapping +simplify_groups=$protein_inference.simplify_groups +simplify_score=$protein_inference.simplify_score +simplify_enzymaticity=$protein_inference.simplify_enzymaticity +simplify_evidence=$protein_inference.simplify_evidence +simplify_uncharacterized=$protein_inference.simplify_uncharacterized +psm_fdr=$validation_levels.psm_fdr +peptide_fdr=$validation_levels.peptide_fdr +protein_fdr=$validation_levels.protein_fdr +group_psms=$validation_levels.group_psms +group_peptides=$validation_levels.group_peptides +merge_subgroups=$validation_levels.merge_subgroups +protein_fraction_mw_confidence='$fraction_analysis.protein_fraction_mw_confidence' +pepnovo_hitlist_length=1 +pepnovo_estimate_charge=1 +pepnovo_correct_prec_mass=1 +pepnovo_discard_spectra=1 +pepnovo_fragmentation_model='CID_IT_TRYP' +pepnovo_generate_blast=0 +directag_tic_cutoff=$directag.directag_tic_cutoff +directag_max_peak_count=$directag.directag_max_peak_count +directag_intensity_classes=$directag.directag_intensity_classes +directag_adjust_precursor=$directag.directag_adjust_precursor +directag_min_adjustment='$directag.directag_min_adjustment' +directag_max_adjustment='$directag.directag_max_adjustment' +directag_adjustment_step='$directag.directag_adjustment_step' +directag_charge_states='$directag.directag_charge_states' +directag_ms_charge_state='$directag.directag_ms_charge_state' +directag_duplicate_spectra='$directag.directag_duplicate_spectra' +directag_deisotoping='$directag.directag_deisotoping' +directag_isotope_tolerance='$directag.directag_isotope_tolerance' +directag_complement_tolerance='$directag.directag_complement_tolerance' +directag_tag_length='$directag.directag_tag_length' +directag_max_var_mods='$directag.directag_max_var_mods' +directag_max_tag_count='$directag.directag_max_tag_count' +directag_intensity_weight='$directag.directag_intensity_weight' +directag_fidelity_weight='$directag.directag_fidelity_weight' +directag_complement_weight='$directag.directag_complement_weight' +novor_fragmentation=HCD +novor_mass_analyzer=Trap + ]]></configfile> + </configfiles> + + <inputs> + <conditional name="input_type"> + <param name="type" type="select" label="MGF Input Type" help="Submit either a single file, or a collection of files."> + <option selected="true" value="single">Single file</option> + <option value="collection">Collection</option> + </param> + <when value="single"> + <param name="input_mgf" type="data" format="mgf" optional="true" label="MGF File" /> + </when> + <when value="collection"> + <param name="input_mgf_collection" type="data_collection" optional="true" label="MGF Collection" /> + </when> + </conditional> + + <param name="input_fasta" type="data" format="fasta" label="FASTA File" /> + + <section name="processing_control" expanded="False" title="Processing Control"> + <param name="CHUNKSIZE" label="Size to split fasta for parallel processing" value="100000" type="integer" optional="true"/> + <param name="THREAD_LIMIT" label="How many threads to use per node" value="2" type="integer" optional="true"/> + <param name="JVM_Xmx" label="Maximum memory allocated to each Java thread" value="10000M" type="text" optional="true"/> + <param name="JVM_Xms" label="Minimum memory allocated to each Java thread" value="1024M" type="text" optional="true"/> + </section> + <section name="metanovo_parameters" expanded="False" title="MetaNovo Parameters"> + <param name="mn_specificity" argument="-mn_specificity" label="Enzyme Specificity" type="select"> + <option selected="true" value="specific">specific</option> + <option value="semi-specific">semi-specific</option> + <option value="unspecific">unspecific</option> + </param> + <param name="mn_enzymes" argument="-mn_enzymes" label="Enzyme Rule" type="select"> + <option value="Trypsin">Trypsin</option> + <option selected="true" value="Trypsin, no P rule">Trypsin, no P rule</option> + <option value="Whole protein">Whole protein</option> + </param> + <param name="mn_max_missed_cleavages" argument="-mn_max_missed_cleavages" label="Number of enzymatic missed cleavages" value="2" type="integer" optional="true"/> + </section> + <section name="spectrum_matching_parameters" expanded="False" title="Spectrum Matching Parameters"> + <param name="prec_tol" argument="-prec_tol" label="Precursor ion mass tolerance" value="10.0" type="float" optional="true"/> + <param name="prec_ppm" argument="-prec_ppm" label="Precursor ion tolerance unit" type="select"> + <option value="0">Da</option> + <option selected="true" value="1">ppm</option> + </param> + <param name="frag_tol" argument="-frag_tol" label="Fragment ion mass tolerance" value="0.05" type="float" optional="true"/> + <param name="frag_ppm" argument="-frag_ppm" label="Fragment ion tolerance unit" type="select"> + <option selected="true" value="0">Da</option> + <option value="1">ppm</option> + </param> + <param name="digestion" argument="-digestion" label="Digestion" type="select"> + <option selected="true" value="0">Enzyme</option> + <option value="1">Unspecific</option> + <option value="2">Whole Protein</option> + </param> + <param name="enzyme" argument="-enzyme" label="Enzyme" type="select" multiple="true"> + <option value="Trypsin">Trypsin</option> + <option selected="true" value="Trypsin (no P rule)">Trypsin (no P rule)</option> + <option value="Arg-C">Arg-C</option> + <option value="Arg-C (no P rule)">Arg-C (no P rule)</option> + <option value="Arg-N">Arg-N</option> + <option value="Glu-C">Glu-C</option> + <option value="Lys-C">Lys-C</option> + <option value="Lys-C (no P rule)">Lys-C (no P rule)</option> + <option value="Lys-N">Lys-N</option> + <option value="Asp-N">Asp-N</option> + <option value="Asp-N (ambic)">Asp-N (ambic)</option> + <option value="Chymotrypsin">Chymotrypsin</option> + <option value="Chymotrypsin (no P rule)">Chymotrypsin (no P rule)</option> + <option value="Pepsin A">Pepsin A</option> + <option value="CNBr">CNBr</option> + <option value="Thermolysin">Thermolysin</option> + <option value="LysargiNase">LysargiNase</option> + </param> + <param name="specificity" argument="-specificity" label="Specificity" type="select"> + <option selected="true" value="0">Specific</option> + <option value="1">Semi-Specific</option> + <option value="2">N-term Specific</option> + <option value="3">C-term Specific</option> + </param> + <param name="mc" argument="-mc" label="Number of allowed missed cleavages" value="2" type="text" optional="true" help="If more than one enzyme was used, please provide the missed cleavages for every enzyme in the same order, with a comma separated list, e.g. "2, 1"."/> + <param name="fixed_mods" argument="-fixed_mods" label="Fixed modifications as comma separated list" type="select" multiple="true"> + <expand macro="fixed_modifications"/> + </param> + <param name="variable_mods" argument="-variable_mods" label="Variable modifications as comma separated list" type="select" multiple="true"> + <expand macro="variable_modifications"/> + </param> + <param name="min_charge" argument="-min_charge" label="Minimal charge to search for" value="2" type="integer" optional="true"/> + <param name="max_charge" argument="-max_charge" label="Maximal charge to search for" value="4" type="integer" optional="true"/> + <param name="fi" argument="-fi" label="Type of forward ion searched" value="b" type="text" optional="true"/> + <param name="ri" argument="-ri" label="Type of rewind ion searched" value="y" type="text" optional="true"/> + <param name="min_isotope" argument="-min_isotope" label="Minimum precursor isotope" value="0" type="integer" optional="true"/> + <param name="max_isotope" argument="-max_isotope" label="Maximum precursor isotope" value="1" type="integer" optional="true"/> + </section> + <section name="spectrum_annotation" expanded="False" title="Spectrum Annotation"> + <param name="annotation_level" argument="-annotation_level" label="The intensity threshold to consider for annotation" value="0.75" type="float" optional="true" help="Using percentiles, 0.75 means that the 25% most intense peaks will be annotated."/> + <param name="annotation_high_resolution" argument="-annotation_high_resolution" label="If true the most accurate peak will be selected within the m/z tolerance." truevalue="1" falsevalue="0" type="boolean" checked="true"/> + </section> + <section name="sequence_matching" expanded="False" title="Sequence Matching"> + <param name="sequence_index_type" argument="-sequence_index_type" label="sequence_index_type (deprecated)" value="0" type="integer" optional="true"/> + <param name="sequence_matching_type" argument="-sequence_matching_type" label="The peptide to protein sequence matching type" type="select"> + <option value="0">Character Sequence</option> + <option value="1">Amino Acids</option> + <option selected="true" value="2">Indistinguishable Amino Acids</option> + </param> + <param name="sequence_matching_x" argument="-sequence_matching_x" label="The maximal share of Xs in a sequence, 0.25 means 25% of X's" value="0.25" type="float" optional="true"/> + </section> + <section name="import_filters" expanded="False" title="Import Filters"> + <param name="import_peptide_length_min" argument="-import_peptide_length_min" label="The minimal peptide length to consider when importing identification files" value="8" type="integer" optional="true"/> + <param name="import_peptide_length_max" argument="-import_peptide_length_max" label="The maximal peptide length to consider when importing identification files" value="30" type="integer" optional="true"/> + <param name="import_precursor_mz_ppm" argument="-import_precurosor_mz_ppm [sic]" label="Maximal precursor ion deviation unit" type="select"> + <option selected="true" value="0">Da</option> + <option value="1">ppm</option> + </param> + <param name="exclude_unknown_ptms" argument="-exclude_unknown_ptms" label="Peptides presenting unrecognized PTMs will be excluded" truevalue="1" falsevalue="0" type="boolean" checked="true"/> + </section> + <section name="ptm_localization" expanded="False" title="PTM Localization"> + <param name="ptm_score" argument="-ptm_score" label="The PTM probabilistic score to use for modification localization" type="select"> + <option value="0">A-score</option> + <option selected="true" value="1">PhosphoRS</option> + <option value="2">None</option> + </param> + <param name="score_neutral_losses" argument="-score_neutral_losses" label="Include neutral losses in spectrum annotation of the PTM score" truevalue="1" falsevalue="0" type="boolean" checked="false"/> + <param name="ptm_sequence_matching_type" argument="-ptm_sequence_matching_type" label="The modification to peptide sequence matching type" type="select"> + <option value="0">Character Sequence</option> + <option selected="true" value="1">Amino Acids</option> + <option value="2">Indistinguishable Amino Acids</option> + </param> + <param name="ptm_alignment" argument="-ptm_alignment" label="Align peptide ambiguously localized PTMs on confident sites" truevalue="1" falsevalue="0" type="boolean" checked="true"/> + </section> + <section name="gene_annotation" expanded="False" title="Gene Annotation"> + <param name="useGeneMapping" argument="-useGeneMapping" label="Use and save gene mappings along with the project" truevalue="1" falsevalue="0" type="boolean" checked="true" help="UniProt databases only"/> + <param name="updateGeneMapping" argument="-updateGeneMapping" label="Update gene mappings automatically from Ensembl" truevalue="1" falsevalue="0" type="boolean" checked="true" help="UniProt databases only"/> + </section> + <section name="protein_inference" expanded="False" title="Protein Inference"> + <param name="simplify_groups" argument="-simplify_groups" label="Simplify protein groups" truevalue="1" falsevalue="0" type="boolean" checked="true"/> + <param name="simplify_score" argument="-simplify_score" label="Simplify protein groups based on the PeptideShaker target/decoy score" truevalue="1" falsevalue="0" type="boolean" checked="true"/> + <param name="simplify_enzymaticity" argument="-simplify_enzymaticity" label="Simplify protein groups based on the peptide enzymaticity" truevalue="1" falsevalue="0" type="boolean" checked="true"/> + <param name="simplify_evidence" argument="-simplify_evidence" label="Simplify protein groups based on the UniProt protein evidence" truevalue="1" falsevalue="0" type="boolean" checked="true"/> + <param name="simplify_uncharacterized" argument="-simplify_uncharacterized" label="Simplify protein groups based on the protein characterization" truevalue="1" falsevalue="0" type="boolean" checked="true"/> + </section> + <section name="validation_levels" expanded="False" title="Validation Levels"> + <param name="psm_fdr" argument="-psm_fdr" label="FDR at the PSM level in percent" value="1" type="integer" optional="true"/> + <param name="peptide_fdr" argument="-peptide_fdr" label="FDR at the peptide level in percent" value="1" type="integer" optional="true"/> + <param name="protein_fdr" argument="-protein_fdr" label="FDR at the protein level in percent" value="1" type="integer" optional="true"/> + <param name="group_psms" argument="-group_psms" label="Group PSMs by charge for scoring and validation" truevalue="1" falsevalue="0" type="boolean" checked="true"/> + <param name="group_peptides" argument="-group_peptides" label="Group peptides by modification status for scoring and validation" truevalue="1" falsevalue="0" type="boolean" checked="true"/> + <param name="merge_subgroups" argument="-group_peptides" label="Merge small PSM and peptide groups for scoring and validation" truevalue="1" falsevalue="0" type="boolean" checked="true"/> + </section> + <section name="fraction_analysis" expanded="False" title="Fraction Analysis"> + <param name="protein_fraction_mw_confidence" argument="-protein_fraction_mw_confidence" label="Minimum confidence required for a protein in the fraction MW plot" value="95.0" type="float" optional="true"/> + </section> + <section name="directag" expanded="False" title="DirecTag"> + <param name="directag_tic_cutoff" argument="-directag_tic_cutoff" label="TIC cutoff in percent" value="85" type="integer" optional="true"/> + <param name="directag_max_peak_count" argument="-directag_max_peak_count" label="Max peak count" value="400" type="integer" optional="true"/> + <param name="directag_intensity_classes" argument="-directag_intensity_classes" label="Number of intensity classes" value="3" type="integer" optional="true"/> + <param name="directag_adjust_precursor" argument="-directag_adjust_precursor" label="Adjust precursor" truevalue="1" falsevalue="0" type="boolean" checked="false"/> + <param name="directag_min_adjustment" argument="-directag_min_adjustment" label="Minimum precursor adjustment" value="-2.5" type="float" optional="true"/> + <param name="directag_max_adjustment" argument="-directag_max_adjustment" label="Maximum precursor adjustment" value="2.5" type="float" optional="true"/> + <param name="directag_adjustment_step" argument="-directag_adjustment_step" label="Precursor adjustment step" value="0.1" type="float" optional="true"/> + <param name="directag_charge_states" argument="-directag_charge_states" label="Number of charge states considered" value="3" type="integer" optional="true"/> + <param name="directag_ms_charge_state" argument="-directag_ms_charge_state" label="Use charge state from M spectrum" truevalue="1" falsevalue="0" type="boolean" checked="false"/> + <param name="directag_duplicate_spectra" argument="-directag_duplicate_spectra" label="Duplicate spectra per charge" truevalue="1" falsevalue="0" type="boolean" checked="true"/> + <param name="directag_deisotoping" argument="-directag_deisotoping" label="Deisotoping mode" type="select"> + <option selected="true" value="0">No deisotoping</option> + <option value="1">Precursor only</option> + <option value="2">Precursor and candidate</option> + </param> + <param name="directag_isotope_tolerance" argument="-directag_isotope_tolerance" label="Isotope mz tolerance" value="0.25" type="float" optional="true"/> + <param name="directag_complement_tolerance" argument="-directag_complement_tolerance" label="Complement mz tolerance" value="0.5" type="float" optional="true"/> + <param name="directag_tag_length" argument="-directag_tag_length" label="Tag length" value="4" type="integer" optional="true"/> + <param name="directag_max_var_mods" argument="-directag_max_var_mods" label="Maximum variable modifications per sequence" value="2" type="integer" optional="true"/> + <param name="directag_max_tag_count" argument="-directag_max_tag_count" label="Maximum tag count" value="5" type="integer" optional="true"/> + <param name="directag_intensity_weight" argument="-directag_intensity_weight" label="Intensity score weight" value="1.0" type="float" optional="true"/> + <param name="directag_fidelity_weight" argument="-directag_fidelity_weight" label="Fidelity score weight" value="1.0" type="float" optional="true"/> + <param name="directag_complement_weight" argument="-directag_complement_weight" label="Complement score weight" value="1.0" type="float" optional="true"/> + </section> + </inputs> + <outputs> + <data name="output_fasta" format="fasta" from_work_dir="metanovo/metanovo.fasta" label="MetaNovo Output FASTA"/> + <data name="output_csv" format="csv" from_work_dir="metanovo/metanovo.csv" label="MetaNovo Output CSV"/> + </outputs> + <help><![CDATA[ +**MetaNovo** + +MetaNovo searches MS/MS data against a FASTA database of known proteins. + +Two outputs are produced: + +- MetaNovo Output FASTA: the matching proteins produced by the search. +- MetaNovo Output CSV: information about the job and other useful metadata. + +Two inputs are required: an MGF file or files and a FASTA database file. + +Two different input types are available for the MGF input. The correct input configuration depends on the desired use case, as outlined below: + +======================================================= ============= +Use case Configuration +======================================================= ============= +Single input MGF file, single output FASTA file **Single file** input with **Single dataset** selected +Multiple input MGF files, multiple output FASTA files\* **Single file** input with **Multiple datasets** OR **Dataset collection** selected +Multiple input MGF files, single output FASTA file **Collection** input +======================================================= ============= + +**\*** One for each MGF file. + +In the second use case, a separate MetaNovo job is spawned for each input MGF. In the third use case, a single MetaNovo job runs with all MGF files in the collection as input. + +If the third use case fails due to memory limitations, users are recommended to use the second option. The multiple output FASTA databases may be merged to generate a reduced, compact database. + ]]> + </help> + <citations> + <citation type="doi">10.1101/605550</citation> + </citations> +</tool>