Mercurial > repos > galaxyp > metanovo
view metanovo.xml @ 4:7a5ff5359b13 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/metanovo commit c220dc85d59698a73b0f173d46e269e27264d6d8"
author | galaxyp |
---|---|
date | Fri, 22 Apr 2022 13:31:08 +0000 |
parents | 4a851c02f558 |
children | d6dcd3173bdf |
line wrap: on
line source
<tool id="metanovo" name="MetaNovo" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.09"> <description> Produce targeted databases for mass spectrometry analysis. </description> <requirements> <requirement type="package" version="@TOOL_VERSION@">metanovo</requirement> </requirements> <macros> <token name="@TOOL_VERSION@">1.9.4</token> <token name="@VERSION_SUFFIX@">4</token> <token name="@SUBSTITUTION_RX@">[^\w\-\.]</token> <import>macros_modifications.xml</import> </macros> <command> <![CDATA[ #import re #set $mgf_dir = 'mgf_files' #set $fasta_dir = 'fasta_file' #set fasta_name = re.sub('@SUBSTITUTION_RX@', '_', str($input_fasta.element_identifier)) mkdir $mgf_dir && mkdir $fasta_dir && ln -s '$input_fasta' '$fasta_dir/$fasta_name' && #if $input_type.type == "collection" #for $input_mgf_item in $input_type.input_mgf_collection: #set mgf_name = re.sub('@SUBSTITUTION_RX@', '_', str($input_mgf_item.element_identifier)) ln -s '$input_mgf_item' '$mgf_dir/$mgf_name' && #end for #else #set mgf_name = re.sub('@SUBSTITUTION_RX@', '_', str($input_type.input_mgf.element_identifier)) ln -s '$input_type.input_mgf' '$mgf_dir/$mgf_name' && #end if ## the number of threads should be number of available threads-1 according to the docs threads=\${GALAXY_SLOTS:-3} && if [ \$threads -gt 1 ]; then (( threads-- )); fi && echo "THREAD_LIMIT=\$threads" >> config.sh && metanovo.sh config.sh ]]> </command> <configfiles> <configfile filename="config.sh"><![CDATA[#slurp #import re MGF_FOLDER=mgf_files #set fasta_name = re.sub('@SUBSTITUTION_RX@', '_', str($input_fasta.element_identifier)) FASTA_FILE=fasta_file/'$fasta_name' OUTPUT_FOLDER=. CHUNKSIZE=$processing_control.CHUNKSIZE JVM_Xmx='10000M' JVM_Xms='1024M' mn_specificity='$metanovo_parameters.mn_specificity' mn_enzymes='$metanovo_parameters.mn_enzymes' mn_max_missed_cleavages=$metanovo_parameters.mn_max_missed_cleavages dg_pepnovo=0 dg_pnovo=0 dg_novor=0 dg_directag=1 prec_tol=$spectrum_matching_parameters.prec_tol prec_ppm=$spectrum_matching_parameters.prec_ppm frag_tol=$spectrum_matching_parameters.frag_tol frag_ppm=$spectrum_matching_parameters.frag_ppm digestion=$spectrum_matching_parameters.digestion enzyme='$spectrum_matching_parameters.enzyme' specificity=$spectrum_matching_parameters.specificity mc='$spectrum_matching_parameters.mc' fixed_mods="$spectrum_matching_parameters.fixed_mods" variable_mods="$spectrum_matching_parameters.variable_mods" min_charge=$spectrum_matching_parameters.min_charge max_charge=$spectrum_matching_parameters.max_charge fi='$spectrum_matching_parameters.fi' ri='$spectrum_matching_parameters.ri' min_isotope='$spectrum_matching_parameters.min_isotope' max_isotope='$spectrum_matching_parameters.max_isotope' annotation_level=$spectrum_annotation.annotation_level annotation_high_resolution=$spectrum_annotation.annotation_high_resolution sequence_index_type=$sequence_matching.sequence_index_type sequence_matching_type=$sequence_matching.sequence_matching_type sequence_matching_x=$sequence_matching.sequence_matching_x import_peptide_length_min=$import_filters.import_peptide_length_min import_peptide_length_max=$import_filters.import_peptide_length_max import_precursor_mz_ppm=$import_filters.import_precursor_mz_ppm exclude_unknown_ptms=$import_filters.exclude_unknown_ptms ptm_score=$ptm_localization.ptm_score score_neutral_losses=$ptm_localization.score_neutral_losses ptm_sequence_matching_type=$ptm_localization.ptm_sequence_matching_type ptm_alignment=$ptm_localization.ptm_alignment useGeneMapping=$gene_annotation.useGeneMapping updateGeneMapping=$gene_annotation.updateGeneMapping simplify_groups=$protein_inference.simplify_groups simplify_score=$protein_inference.simplify_score simplify_enzymaticity=$protein_inference.simplify_enzymaticity simplify_evidence=$protein_inference.simplify_evidence simplify_uncharacterized=$protein_inference.simplify_uncharacterized psm_fdr=$validation_levels.psm_fdr peptide_fdr=$validation_levels.peptide_fdr protein_fdr=$validation_levels.protein_fdr group_psms=$validation_levels.group_psms group_peptides=$validation_levels.group_peptides merge_subgroups=$validation_levels.merge_subgroups protein_fraction_mw_confidence='$fraction_analysis.protein_fraction_mw_confidence' pepnovo_hitlist_length=1 pepnovo_estimate_charge=1 pepnovo_correct_prec_mass=1 pepnovo_discard_spectra=1 pepnovo_fragmentation_model='CID_IT_TRYP' pepnovo_generate_blast=0 directag_tic_cutoff=$directag.directag_tic_cutoff directag_max_peak_count=$directag.directag_max_peak_count directag_intensity_classes=$directag.directag_intensity_classes directag_adjust_precursor=$directag.directag_adjust_precursor directag_min_adjustment='$directag.directag_min_adjustment' directag_max_adjustment='$directag.directag_max_adjustment' directag_adjustment_step='$directag.directag_adjustment_step' directag_charge_states='$directag.directag_charge_states' directag_ms_charge_state='$directag.directag_ms_charge_state' directag_duplicate_spectra='$directag.directag_duplicate_spectra' directag_deisotoping='$directag.directag_deisotoping' directag_isotope_tolerance='$directag.directag_isotope_tolerance' directag_complement_tolerance='$directag.directag_complement_tolerance' directag_tag_length='$directag.directag_tag_length' directag_max_var_mods='$directag.directag_max_var_mods' directag_max_tag_count='$directag.directag_max_tag_count' directag_intensity_weight='$directag.directag_intensity_weight' directag_fidelity_weight='$directag.directag_fidelity_weight' directag_complement_weight='$directag.directag_complement_weight' novor_fragmentation=HCD novor_mass_analyzer=Trap ]]></configfile> </configfiles> <inputs> <conditional name="input_type"> <param name="type" type="select" label="MGF Input Type" help="Submit either a single file, or a collection of files."> <option selected="true" value="single">Single file</option> <option value="collection">Collection</option> </param> <when value="single"> <param name="input_mgf" type="data" format="mgf" optional="true" label="MGF File" /> </when> <when value="collection"> <param name="input_mgf_collection" type="data_collection" optional="true" label="MGF Collection" /> </when> </conditional> <param name="input_fasta" type="data" format="fasta" label="FASTA File" /> <section name="processing_control" expanded="False" title="Processing Control"> <param name="CHUNKSIZE" label="Size to split fasta for parallel processing" value="100000" type="integer" optional="true"/> </section> <section name="metanovo_parameters" expanded="False" title="MetaNovo Parameters"> <param name="mn_specificity" argument="-mn_specificity" label="Enzyme Specificity" type="select"> <option selected="true" value="specific">specific</option> <option value="semi-specific">semi-specific</option> <option value="unspecific">unspecific</option> </param> <param name="mn_enzymes" argument="-mn_enzymes" label="Enzyme Rule" type="select"> <option value="Trypsin">Trypsin</option> <option selected="true" value="Trypsin, no P rule">Trypsin, no P rule</option> <option value="Whole protein">Whole protein</option> </param> <param name="mn_max_missed_cleavages" argument="-mn_max_missed_cleavages" label="Number of enzymatic missed cleavages" value="2" type="integer" optional="true"/> </section> <section name="spectrum_matching_parameters" expanded="False" title="Spectrum Matching Parameters"> <param name="prec_tol" argument="-prec_tol" label="Precursor ion mass tolerance" value="10.0" type="float" optional="true"/> <param name="prec_ppm" argument="-prec_ppm" label="Precursor ion tolerance unit" type="select"> <option value="0">Da</option> <option selected="true" value="1">ppm</option> </param> <param name="frag_tol" argument="-frag_tol" label="Fragment ion mass tolerance" value="0.05" type="float" optional="true"/> <param name="frag_ppm" argument="-frag_ppm" label="Fragment ion tolerance unit" type="select"> <option selected="true" value="0">Da</option> <option value="1">ppm</option> </param> <param name="digestion" argument="-digestion" label="Digestion" type="select"> <option selected="true" value="0">Enzyme</option> <option value="1">Unspecific</option> <option value="2">Whole Protein</option> </param> <param name="enzyme" argument="-enzyme" label="Enzyme" type="select" multiple="true"> <option value="Trypsin">Trypsin</option> <option selected="true" value="Trypsin (no P rule)">Trypsin (no P rule)</option> <option value="Arg-C">Arg-C</option> <option value="Arg-C (no P rule)">Arg-C (no P rule)</option> <option value="Arg-N">Arg-N</option> <option value="Glu-C">Glu-C</option> <option value="Lys-C">Lys-C</option> <option value="Lys-C (no P rule)">Lys-C (no P rule)</option> <option value="Lys-N">Lys-N</option> <option value="Asp-N">Asp-N</option> <option value="Asp-N (ambic)">Asp-N (ambic)</option> <option value="Chymotrypsin">Chymotrypsin</option> <option value="Chymotrypsin (no P rule)">Chymotrypsin (no P rule)</option> <option value="Pepsin A">Pepsin A</option> <option value="CNBr">CNBr</option> <option value="Thermolysin">Thermolysin</option> <option value="LysargiNase">LysargiNase</option> </param> <param name="specificity" argument="-specificity" label="Specificity" type="select"> <option selected="true" value="0">Specific</option> <option value="1">Semi-Specific</option> <option value="2">N-term Specific</option> <option value="3">C-term Specific</option> </param> <param name="mc" argument="-mc" label="Number of allowed missed cleavages" value="2" type="text" optional="true" help="If more than one enzyme was used, please provide the missed cleavages for every enzyme in the same order, with a comma separated list, e.g. "2, 1"."/> <param name="fixed_mods" argument="-fixed_mods" label="Fixed modifications as comma separated list" type="select" multiple="true"> <expand macro="fixed_modifications"/> </param> <param name="variable_mods" argument="-variable_mods" label="Variable modifications as comma separated list" type="select" multiple="true"> <expand macro="variable_modifications"/> </param> <param name="min_charge" argument="-min_charge" label="Minimal charge to search for" value="2" type="integer" optional="true"/> <param name="max_charge" argument="-max_charge" label="Maximal charge to search for" value="4" type="integer" optional="true"/> <param name="fi" argument="-fi" label="Type of forward ion searched" value="b" type="text" optional="true"/> <param name="ri" argument="-ri" label="Type of rewind ion searched" value="y" type="text" optional="true"/> <param name="min_isotope" argument="-min_isotope" label="Minimum precursor isotope" value="0" type="integer" optional="true"/> <param name="max_isotope" argument="-max_isotope" label="Maximum precursor isotope" value="1" type="integer" optional="true"/> </section> <section name="spectrum_annotation" expanded="False" title="Spectrum Annotation"> <param name="annotation_level" argument="-annotation_level" label="The intensity threshold to consider for annotation" value="0.75" type="float" optional="true" help="Using percentiles, 0.75 means that the 25% most intense peaks will be annotated."/> <param name="annotation_high_resolution" argument="-annotation_high_resolution" label="If true the most accurate peak will be selected within the m/z tolerance." truevalue="1" falsevalue="0" type="boolean" checked="true"/> </section> <section name="sequence_matching" expanded="False" title="Sequence Matching"> <param name="sequence_index_type" argument="-sequence_index_type" label="sequence_index_type (deprecated)" value="0" type="integer" optional="true"/> <param name="sequence_matching_type" argument="-sequence_matching_type" label="The peptide to protein sequence matching type" type="select"> <option value="0">Character Sequence</option> <option value="1">Amino Acids</option> <option selected="true" value="2">Indistinguishable Amino Acids</option> </param> <param name="sequence_matching_x" argument="-sequence_matching_x" label="The maximal share of Xs in a sequence, 0.25 means 25% of X's" value="0.25" type="float" optional="true"/> </section> <section name="import_filters" expanded="False" title="Import Filters"> <param name="import_peptide_length_min" argument="-import_peptide_length_min" label="The minimal peptide length to consider when importing identification files" value="8" type="integer" optional="true"/> <param name="import_peptide_length_max" argument="-import_peptide_length_max" label="The maximal peptide length to consider when importing identification files" value="30" type="integer" optional="true"/> <param name="import_precursor_mz_ppm" argument="-import_precurosor_mz_ppm [sic]" label="Maximal precursor ion deviation unit" type="select"> <option selected="true" value="0">Da</option> <option value="1">ppm</option> </param> <param name="exclude_unknown_ptms" argument="-exclude_unknown_ptms" label="Peptides presenting unrecognized PTMs will be excluded" truevalue="1" falsevalue="0" type="boolean" checked="true"/> </section> <section name="ptm_localization" expanded="False" title="PTM Localization"> <param name="ptm_score" argument="-ptm_score" label="The PTM probabilistic score to use for modification localization" type="select"> <option value="0">A-score</option> <option selected="true" value="1">PhosphoRS</option> <option value="2">None</option> </param> <param name="score_neutral_losses" argument="-score_neutral_losses" label="Include neutral losses in spectrum annotation of the PTM score" truevalue="1" falsevalue="0" type="boolean" checked="false"/> <param name="ptm_sequence_matching_type" argument="-ptm_sequence_matching_type" label="The modification to peptide sequence matching type" type="select"> <option value="0">Character Sequence</option> <option selected="true" value="1">Amino Acids</option> <option value="2">Indistinguishable Amino Acids</option> </param> <param name="ptm_alignment" argument="-ptm_alignment" label="Align peptide ambiguously localized PTMs on confident sites" truevalue="1" falsevalue="0" type="boolean" checked="true"/> </section> <section name="gene_annotation" expanded="False" title="Gene Annotation"> <param name="useGeneMapping" argument="-useGeneMapping" label="Use and save gene mappings along with the project" truevalue="1" falsevalue="0" type="boolean" checked="true" help="UniProt databases only"/> <param name="updateGeneMapping" argument="-updateGeneMapping" label="Update gene mappings automatically from Ensembl" truevalue="1" falsevalue="0" type="boolean" checked="true" help="UniProt databases only"/> </section> <section name="protein_inference" expanded="False" title="Protein Inference"> <param name="simplify_groups" argument="-simplify_groups" label="Simplify protein groups" truevalue="1" falsevalue="0" type="boolean" checked="true"/> <param name="simplify_score" argument="-simplify_score" label="Simplify protein groups based on the PeptideShaker target/decoy score" truevalue="1" falsevalue="0" type="boolean" checked="true"/> <param name="simplify_enzymaticity" argument="-simplify_enzymaticity" label="Simplify protein groups based on the peptide enzymaticity" truevalue="1" falsevalue="0" type="boolean" checked="true"/> <param name="simplify_evidence" argument="-simplify_evidence" label="Simplify protein groups based on the UniProt protein evidence" truevalue="1" falsevalue="0" type="boolean" checked="true"/> <param name="simplify_uncharacterized" argument="-simplify_uncharacterized" label="Simplify protein groups based on the protein characterization" truevalue="1" falsevalue="0" type="boolean" checked="true"/> </section> <section name="validation_levels" expanded="False" title="Validation Levels"> <param name="psm_fdr" argument="-psm_fdr" label="FDR at the PSM level in percent" value="1" type="integer" optional="true"/> <param name="peptide_fdr" argument="-peptide_fdr" label="FDR at the peptide level in percent" value="1" type="integer" optional="true"/> <param name="protein_fdr" argument="-protein_fdr" label="FDR at the protein level in percent" value="1" type="integer" optional="true"/> <param name="group_psms" argument="-group_psms" label="Group PSMs by charge for scoring and validation" truevalue="1" falsevalue="0" type="boolean" checked="true"/> <param name="group_peptides" argument="-group_peptides" label="Group peptides by modification status for scoring and validation" truevalue="1" falsevalue="0" type="boolean" checked="true"/> <param name="merge_subgroups" argument="-group_peptides" label="Merge small PSM and peptide groups for scoring and validation" truevalue="1" falsevalue="0" type="boolean" checked="true"/> </section> <section name="fraction_analysis" expanded="False" title="Fraction Analysis"> <param name="protein_fraction_mw_confidence" argument="-protein_fraction_mw_confidence" label="Minimum confidence required for a protein in the fraction MW plot" value="95.0" type="float" optional="true"/> </section> <section name="directag" expanded="False" title="DirecTag"> <param name="directag_tic_cutoff" argument="-directag_tic_cutoff" label="TIC cutoff in percent" value="85" type="integer" optional="true"/> <param name="directag_max_peak_count" argument="-directag_max_peak_count" label="Max peak count" value="400" type="integer" optional="true"/> <param name="directag_intensity_classes" argument="-directag_intensity_classes" label="Number of intensity classes" value="3" type="integer" optional="true"/> <param name="directag_adjust_precursor" argument="-directag_adjust_precursor" label="Adjust precursor" truevalue="1" falsevalue="0" type="boolean" checked="false"/> <param name="directag_min_adjustment" argument="-directag_min_adjustment" label="Minimum precursor adjustment" value="-2.5" type="float" optional="true"/> <param name="directag_max_adjustment" argument="-directag_max_adjustment" label="Maximum precursor adjustment" value="2.5" type="float" optional="true"/> <param name="directag_adjustment_step" argument="-directag_adjustment_step" label="Precursor adjustment step" value="0.1" type="float" optional="true"/> <param name="directag_charge_states" argument="-directag_charge_states" label="Number of charge states considered" value="3" type="integer" optional="true"/> <param name="directag_ms_charge_state" argument="-directag_ms_charge_state" label="Use charge state from M spectrum" truevalue="1" falsevalue="0" type="boolean" checked="false"/> <param name="directag_duplicate_spectra" argument="-directag_duplicate_spectra" label="Duplicate spectra per charge" truevalue="1" falsevalue="0" type="boolean" checked="true"/> <param name="directag_deisotoping" argument="-directag_deisotoping" label="Deisotoping mode" type="select"> <option selected="true" value="0">No deisotoping</option> <option value="1">Precursor only</option> <option value="2">Precursor and candidate</option> </param> <param name="directag_isotope_tolerance" argument="-directag_isotope_tolerance" label="Isotope mz tolerance" value="0.25" type="float" optional="true"/> <param name="directag_complement_tolerance" argument="-directag_complement_tolerance" label="Complement mz tolerance" value="0.5" type="float" optional="true"/> <param name="directag_tag_length" argument="-directag_tag_length" label="Tag length" value="4" type="integer" optional="true"/> <param name="directag_max_var_mods" argument="-directag_max_var_mods" label="Maximum variable modifications per sequence" value="2" type="integer" optional="true"/> <param name="directag_max_tag_count" argument="-directag_max_tag_count" label="Maximum tag count" value="5" type="integer" optional="true"/> <param name="directag_intensity_weight" argument="-directag_intensity_weight" label="Intensity score weight" value="1.0" type="float" optional="true"/> <param name="directag_fidelity_weight" argument="-directag_fidelity_weight" label="Fidelity score weight" value="1.0" type="float" optional="true"/> <param name="directag_complement_weight" argument="-directag_complement_weight" label="Complement score weight" value="1.0" type="float" optional="true"/> </section> </inputs> <outputs> <data name="output_fasta" format="fasta" from_work_dir="metanovo/metanovo.fasta" label="${tool.name} on ${on_string}: FASTA"/> <data name="output_csv" format="csv" from_work_dir="metanovo/metanovo.csv" label="${tool.name} on ${on_string}: CSV"/> </outputs> <tests> <test expect_num_outputs="2"> <param name="input_mgf" value="sample_data_1.mgf" ftype="mgf"/> <param name="input_fasta" value="sample_fasta_single.fasta" ftype="fasta"/> <output name="output_csv" ftype="csv"> <assert_contents> <!-- Check header. --> <has_text text=",index,Accession,Record,ID,PeptideCount,Peptides,ScanCount,Scans,Organism,Length,File,Sample sample_data_1 (msms),SAF sample_data_1,NSAF sample_data_1,Summed_NSAF,Protein_Prob,Organism_Prob,MSMS_Percent,Combined_Prob"/> </assert_contents> </output> <output name="output_fasta" ftype="fasta" file="sample_output_single.fasta"/> </test> <test expect_num_outputs="2"> <param name="type" value="collection"/> <param name="input_mgf_collection"> <collection type="list"> <element name="sample_data_1.mgf" value="sample_data_1.mgf" /> <element name="sample_data_2.mgf" value="sample_data_2.mgf" /> </collection> </param> <param name="input_fasta" value="sample_fasta_collection.fasta" ftype="fasta"/> <output name="output_csv" ftype="csv"> <assert_contents> <!-- Check header. --> <has_text text=",index,Accession,File,ID,Length,Organism,PeptideCount,Peptides,Record,SAF sample_data_1,SAF sample_data_2,Sample sample_data_1 (msms),Sample sample_data_2 (msms),ScanCount,Scans,NSAF sample_data_1,NSAF sample_data_2,Summed_NSAF,Protein_Prob,Organism_Prob,MSMS_Percent,Combined_Prob"/> </assert_contents> </output> <output name="output_fasta" ftype="fasta" file="sample_output_collection.fasta"/> </test> </tests> <help><![CDATA[ **MetaNovo** MetaNovo searches MS/MS data against a FASTA database of known proteins. Two outputs are produced: - MetaNovo Output FASTA: the matching proteins produced by the search. - MetaNovo Output CSV: information about the job and other useful metadata. Two inputs are required: an MGF file or files and a FASTA database file. Two different input types are available for the MGF input. The correct input configuration depends on the desired use case, as outlined below: ======================================================= ============= Use case Configuration ======================================================= ============= Single input MGF file, single output FASTA file **Single file** input with **Single dataset** selected Multiple input MGF files, multiple output FASTA files\* **Single file** input with **Multiple datasets** OR **Dataset collection** selected Multiple input MGF files, single output FASTA file **Collection** input ======================================================= ============= **\*** One for each MGF file. In the second use case, a separate MetaNovo job is spawned for each input MGF. In the third use case, a single MetaNovo job runs with all MGF files in the collection as input. If the third use case fails due to memory limitations, users are recommended to use the second option. The multiple output FASTA databases may be merged to generate a reduced, compact database. ]]> </help> <citations> <citation type="doi">10.1101/605550</citation> </citations> </tool>