Mercurial > repos > galaxyp > openms_idposteriorerrorprobability
view IDPosteriorErrorProbability.xml @ 15:93e6ec445d5d draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 5c080b1e2b99f1c88f4557e9fec8c45c9d23b906
author | galaxyp |
---|---|
date | Fri, 14 Jun 2024 21:40:19 +0000 |
parents | 986e03d3201e |
children |
line wrap: on
line source
<!--This is a configuration file for the integration of a tools into Galaxy (https://galaxyproject.org/). This file was automatically generated using CTDConverter.--> <!--Proposed Tool Section: [Identification Processing]--> <tool id="IDPosteriorErrorProbability" name="IDPosteriorErrorProbability" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05"> <description>Estimates probabilities for incorrectly assigned peptide sequences and a set of search engine scores using a mixture model</description> <macros> <token name="@EXECUTABLE@">IDPosteriorErrorProbability</token> <import>macros.xml</import> </macros> <expand macro="requirements"/> <expand macro="stdio"/> <command detect_errors="exit_code"><![CDATA[@QUOTE_FOO@ @EXT_FOO@ #import re ## Preprocessing mkdir in && cp '$in' 'in/${re.sub("[^\w\-_]", "_", $in.element_identifier)}.$gxy2omsext($in.ext)' && mkdir out && #if "out_plot_FLAG" in str($OPTIONAL_OUTPUTS).split(',') mkdir out_plot && #end if ## Main program call set -o pipefail && @EXECUTABLE@ -write_ctd ./ && python3 '$__tool_directory__/fill_ctd.py' '@EXECUTABLE@.ctd' '$args_json' '$hardcoded_json' && @EXECUTABLE@ -ini @EXECUTABLE@.ctd -in 'in/${re.sub("[^\w\-_]", "_", $in.element_identifier)}.$gxy2omsext($in.ext)' -out 'out/output.${gxy2omsext("idxml")}' #if "out_plot_FLAG" in str($OPTIONAL_OUTPUTS).split(',') -out_plot 'out_plot/output.${gxy2omsext("txt")}' #end if #if len(str($OPTIONAL_OUTPUTS).split(',')) == 0 | tee '$stdout' #end if ## Postprocessing && mv 'out/output.${gxy2omsext("idxml")}' '$out' #if "out_plot_FLAG" in str($OPTIONAL_OUTPUTS).split(',') && mv 'out_plot/output.${gxy2omsext("txt")}' '$out_plot' #end if #if "ctd_out_FLAG" in $OPTIONAL_OUTPUTS && mv '@EXECUTABLE@.ctd' '$ctd_out' #end if]]></command> <configfiles> <inputs name="args_json" data_style="paths"/> <configfile name="hardcoded_json"><![CDATA[{"log": "log.txt", "threads": "\${GALAXY_SLOTS:-1}", "no_progress": true}]]></configfile> </configfiles> <inputs> <param argument="-in" type="data" format="idxml" label="input file" help=" select idxml data sets(s)"/> <param argument="-split_charge" type="boolean" truevalue="true" falsevalue="false" checked="false" label="The search engine scores are split by charge if this flag is set" help="Thus, for each charge state a new model will be computed"/> <param argument="-top_hits_only" type="boolean" truevalue="true" falsevalue="false" checked="false" label="If set only the top hits of every PeptideIdentification will be used" help=""/> <param argument="-ignore_bad_data" type="boolean" truevalue="true" falsevalue="false" checked="false" label="If set errors will be written but ignored" help="Useful for pipelines with many datasets where only a few are bad, but the pipeline should run through"/> <param argument="-prob_correct" type="boolean" truevalue="true" falsevalue="false" checked="false" label="If set scores will be calculated as '1 - ErrorProbabilities' and can be interpreted as probabilities for correct identifications" help=""/> <section name="fit_algorithm" title="Algorithm parameter subsection" help="" expanded="false"> <param name="number_of_bins" argument="-fit_algorithm:number_of_bins" type="integer" value="100" label="Number of bins used for visualization" help="Only needed if each iteration step of the EM-Algorithm will be visualized"/> <param name="incorrectly_assigned" argument="-fit_algorithm:incorrectly_assigned" type="select" label="for 'Gumbel', the Gumbel distribution is used to plot incorrectly assigned sequences" help="For 'Gauss', the Gauss distribution is used"> <option value="Gumbel" selected="true">Gumbel</option> <option value="Gauss">Gauss</option> <expand macro="list_string_san" name="incorrectly_assigned"/> </param> <param name="max_nr_iterations" argument="-fit_algorithm:max_nr_iterations" type="integer" value="1000" label="Bounds the number of iterations for the EM algorithm when convergence is slow" help=""/> <param name="neg_log_delta" argument="-fit_algorithm:neg_log_delta" type="integer" value="6" label="The negative logarithm of the convergence threshold for the likelihood increase" help=""/> <param name="outlier_handling" argument="-fit_algorithm:outlier_handling" type="select" label="What to do with outliers:" help="- ignore_iqr_outliers: ignore outliers outside of 3*IQR from Q1/Q3 for fitting. - set_iqr_to_closest_valid: set IQR-based outliers to the last valid value for fitting. - ignore_extreme_percentiles: ignore everything outside 99th and 1st percentile (also removes equal values like potential censored max values in XTandem). - none: do nothing"> <option value="ignore_iqr_outliers" selected="true">ignore_iqr_outliers</option> <option value="set_iqr_to_closest_valid">set_iqr_to_closest_valid</option> <option value="ignore_extreme_percentiles">ignore_extreme_percentiles</option> <option value="none">none</option> <expand macro="list_string_san" name="outlier_handling"/> </param> </section> <expand macro="adv_opts_macro"> <param argument="-fdr_for_targets_smaller" type="float" value="0.05" label="Only used, when top_hits_only set" help="Additionally, target/decoy information should be available. The score_type must be q-value from an previous False Discovery Rate run"/> <param argument="-force" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Overrides tool-specific checks" help=""/> <param argument="-test" type="hidden" value="False" label="Enables the test mode (needed for internal use only)" help="" optional="true"> <expand macro="list_string_san" name="test"/> </param> </expand> <param name="OPTIONAL_OUTPUTS" type="select" optional="true" multiple="true" label="Optional outputs"> <option value="out_plot_FLAG">out_plot (txt file (if gnuplot is available, a corresponding PDF will be created as well.))</option> <option value="ctd_out_FLAG">Output used ctd (ini) configuration file</option> </param> </inputs> <outputs> <data name="out" label="${tool.name} on ${on_string}: out" format="idxml"/> <data name="out_plot" label="${tool.name} on ${on_string}: out_plot" format="txt"> <filter>OPTIONAL_OUTPUTS is not None and "out_plot_FLAG" in OPTIONAL_OUTPUTS</filter> </data> <data name="ctd_out" format="xml" label="${tool.name} on ${on_string}: ctd"> <filter>OPTIONAL_OUTPUTS is not None and "ctd_out_FLAG" in OPTIONAL_OUTPUTS</filter> </data> </outputs> <tests> <!-- TOPP_IDPosteriorErrorProbability_1 --> <test expect_num_outputs="2"> <section name="adv_opts"> <param name="fdr_for_targets_smaller" value="0.05"/> <param name="force" value="false"/> <param name="test" value="true"/> </section> <param name="in" value="IDPosteriorErrorProbability_Mascot_input.idXML"/> <output name="out" value="IDPosteriorErrorProbability_Mascot_output.idXML" compare="sim_size" delta_frac="0.7" ftype="idxml"/> <param name="split_charge" value="false"/> <param name="top_hits_only" value="false"/> <param name="ignore_bad_data" value="false"/> <param name="prob_correct" value="false"/> <section name="fit_algorithm"> <param name="number_of_bins" value="100"/> <param name="incorrectly_assigned" value="Gumbel"/> <param name="max_nr_iterations" value="1000"/> <param name="neg_log_delta" value="6"/> <param name="outlier_handling" value="ignore_iqr_outliers"/> </section> <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG"/> <output name="ctd_out" ftype="xml"> <assert_contents> <is_valid_xml/> </assert_contents> </output> <assert_stdout> <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/> </assert_stdout> </test> <!-- TOPP_IDPosteriorErrorProbability_2 --> <test expect_num_outputs="2"> <section name="adv_opts"> <param name="fdr_for_targets_smaller" value="0.05"/> <param name="force" value="false"/> <param name="test" value="true"/> </section> <param name="in" value="IDPosteriorErrorProbability_XTandem_input.idXML"/> <output name="out" value="IDPosteriorErrorProbability_XTandem_output.idXML" compare="sim_size" delta_frac="0.7" ftype="idxml"/> <param name="split_charge" value="false"/> <param name="top_hits_only" value="false"/> <param name="ignore_bad_data" value="false"/> <param name="prob_correct" value="false"/> <section name="fit_algorithm"> <param name="number_of_bins" value="100"/> <param name="incorrectly_assigned" value="Gumbel"/> <param name="max_nr_iterations" value="1000"/> <param name="neg_log_delta" value="6"/> <param name="outlier_handling" value="ignore_iqr_outliers"/> </section> <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG"/> <output name="ctd_out" ftype="xml"> <assert_contents> <is_valid_xml/> </assert_contents> </output> <assert_stdout> <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/> </assert_stdout> </test> <!-- TOPP_IDPosteriorErrorProbability_3 --> <test expect_num_outputs="2"> <section name="adv_opts"> <param name="fdr_for_targets_smaller" value="0.05"/> <param name="force" value="false"/> <param name="test" value="true"/> </section> <param name="in" value="IDPosteriorErrorProbability_OMSSA_input.idXML"/> <output name="out" value="IDPosteriorErrorProbability_OMSSA_output.idXML" compare="sim_size" delta_frac="0.7" ftype="idxml"/> <param name="split_charge" value="false"/> <param name="top_hits_only" value="false"/> <param name="ignore_bad_data" value="false"/> <param name="prob_correct" value="false"/> <section name="fit_algorithm"> <param name="number_of_bins" value="100"/> <param name="incorrectly_assigned" value="Gumbel"/> <param name="max_nr_iterations" value="1000"/> <param name="neg_log_delta" value="6"/> <param name="outlier_handling" value="ignore_iqr_outliers"/> </section> <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG"/> <output name="ctd_out" ftype="xml"> <assert_contents> <is_valid_xml/> </assert_contents> </output> <assert_stdout> <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/> </assert_stdout> </test> <!-- TOPP_IDPosteriorErrorProbability_4 --> <test expect_num_outputs="2"> <section name="adv_opts"> <param name="fdr_for_targets_smaller" value="0.05"/> <param name="force" value="false"/> <param name="test" value="true"/> </section> <param name="in" value="IDPosteriorErrorProbability_OMSSA_input2.idXML"/> <output name="out" value="IDPosteriorErrorProbability_OMSSA_output2.idXML" compare="sim_size" delta_frac="0.7" ftype="idxml"/> <param name="split_charge" value="true"/> <param name="top_hits_only" value="false"/> <param name="ignore_bad_data" value="false"/> <param name="prob_correct" value="false"/> <section name="fit_algorithm"> <param name="number_of_bins" value="100"/> <param name="incorrectly_assigned" value="Gumbel"/> <param name="max_nr_iterations" value="1000"/> <param name="neg_log_delta" value="6"/> <param name="outlier_handling" value="ignore_iqr_outliers"/> </section> <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG"/> <output name="ctd_out" ftype="xml"> <assert_contents> <is_valid_xml/> </assert_contents> </output> <assert_stdout> <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/> </assert_stdout> </test> <!-- TOPP_IDPosteriorErrorProbability_5 --> <test expect_num_outputs="2"> <section name="adv_opts"> <param name="fdr_for_targets_smaller" value="0.05"/> <param name="force" value="false"/> <param name="test" value="true"/> </section> <param name="in" value="IDPosteriorErrorProbability_XTandem_input2.idXML"/> <output name="out" value="IDPosteriorErrorProbability_XTandem_output2.idXML" compare="sim_size" delta_frac="0.7" ftype="idxml"/> <param name="split_charge" value="true"/> <param name="top_hits_only" value="false"/> <param name="ignore_bad_data" value="false"/> <param name="prob_correct" value="false"/> <section name="fit_algorithm"> <param name="number_of_bins" value="100"/> <param name="incorrectly_assigned" value="Gumbel"/> <param name="max_nr_iterations" value="1000"/> <param name="neg_log_delta" value="6"/> <param name="outlier_handling" value="ignore_iqr_outliers"/> </section> <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG"/> <output name="ctd_out" ftype="xml"> <assert_contents> <is_valid_xml/> </assert_contents> </output> <assert_stdout> <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/> </assert_stdout> </test> <!-- TOPP_IDPosteriorErrorProbability_6 --> <test expect_num_outputs="2"> <section name="adv_opts"> <param name="fdr_for_targets_smaller" value="0.05"/> <param name="force" value="false"/> <param name="test" value="true"/> </section> <param name="in" value="IDPosteriorErrorProbability_Mascot_input2.idXML"/> <output name="out" value="IDPosteriorErrorProbability_Mascot_output2.idXML" compare="sim_size" delta_frac="0.7" ftype="idxml"/> <param name="split_charge" value="true"/> <param name="top_hits_only" value="false"/> <param name="ignore_bad_data" value="false"/> <param name="prob_correct" value="false"/> <section name="fit_algorithm"> <param name="number_of_bins" value="100"/> <param name="incorrectly_assigned" value="Gumbel"/> <param name="max_nr_iterations" value="1000"/> <param name="neg_log_delta" value="6"/> <param name="outlier_handling" value="ignore_iqr_outliers"/> </section> <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG"/> <output name="ctd_out" ftype="xml"> <assert_contents> <is_valid_xml/> </assert_contents> </output> <assert_stdout> <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/> </assert_stdout> </test> <!-- TOPP_IDPosteriorErrorProbability_7 --> <test expect_num_outputs="2"> <section name="adv_opts"> <param name="fdr_for_targets_smaller" value="0.05"/> <param name="force" value="false"/> <param name="test" value="true"/> </section> <param name="in" value="IDPosteriorErrorProbability_bad_data.idXML"/> <output name="out" value="IDPosteriorErrorProbability_bad_data_output.tmp" compare="sim_size" delta_frac="0.7" ftype="idxml"/> <param name="split_charge" value="false"/> <param name="top_hits_only" value="false"/> <param name="ignore_bad_data" value="true"/> <param name="prob_correct" value="false"/> <section name="fit_algorithm"> <param name="number_of_bins" value="100"/> <param name="incorrectly_assigned" value="Gumbel"/> <param name="max_nr_iterations" value="1000"/> <param name="neg_log_delta" value="6"/> <param name="outlier_handling" value="ignore_iqr_outliers"/> </section> <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG"/> <output name="ctd_out" ftype="xml"> <assert_contents> <is_valid_xml/> </assert_contents> </output> <assert_stdout> <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/> </assert_stdout> </test> <!-- TOPP_IDPosteriorErrorProbability_8 --> <test expect_num_outputs="2"> <section name="adv_opts"> <param name="fdr_for_targets_smaller" value="0.05"/> <param name="force" value="false"/> <param name="test" value="true"/> </section> <param name="in" value="IDPosteriorErrorProbability_OMSSA_input.idXML"/> <output name="out" value="IDPosteriorErrorProbability_prob_correct_output.idXML" compare="sim_size" delta_frac="0.7" ftype="idxml"/> <param name="split_charge" value="false"/> <param name="top_hits_only" value="false"/> <param name="ignore_bad_data" value="false"/> <param name="prob_correct" value="true"/> <section name="fit_algorithm"> <param name="number_of_bins" value="100"/> <param name="incorrectly_assigned" value="Gumbel"/> <param name="max_nr_iterations" value="1000"/> <param name="neg_log_delta" value="6"/> <param name="outlier_handling" value="ignore_iqr_outliers"/> </section> <param name="OPTIONAL_OUTPUTS" value="ctd_out_FLAG"/> <output name="ctd_out" ftype="xml"> <assert_contents> <is_valid_xml/> </assert_contents> </output> <assert_stdout> <has_text_matching expression="@EXECUTABLE@ took .* \(wall\), .* \(CPU\), .* \(system\), .* \(user\)(; Peak Memory Usage: 32 MB)?."/> </assert_stdout> </test> </tests> <help><![CDATA[Estimates probabilities for incorrectly assigned peptide sequences and a set of search engine scores using a mixture model. For more information, visit https://openms.de/doxygen/release/3.1.0/html/TOPP_IDPosteriorErrorProbability.html]]></help> <expand macro="references"/> </tool>