Mercurial > repos > recetox > recetox_aplcms_recover_weaker_signals
diff recetox_aplcms_recover_weaker_signals.xml @ 2:472dc85ce7c5 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 506df2aef355b3791567283e1a175914f06b405a
author | recetox |
---|---|
date | Mon, 13 Feb 2023 10:28:35 +0000 |
parents | f9fb9d8fb710 |
children | c69a12bfc2fb |
line wrap: on
line diff
--- a/recetox_aplcms_recover_weaker_signals.xml Thu Jun 16 10:27:28 2022 +0000 +++ b/recetox_aplcms_recover_weaker_signals.xml Mon Feb 13 10:28:35 2023 +0000 @@ -1,143 +1,116 @@ -<tool id="recetox_aplcms_recover_weaker_signals" name="RECETOX apLCMS - recover weaker signals" version="@TOOL_VERSION@+galaxy1"> - <description>recover weaker signals from LC/MS spectra</description> +<tool id="recetox_aplcms_recover_weaker_signals" name="recetox-aplcms - recover weaker signals" version="@TOOL_VERSION@+galaxy0"> + <description>recover weaker signals from raw data using an aligned feature table</description> <macros> <import>macros.xml</import> - <import>macros_split.xml</import> + <import>help.xml</import> </macros> <expand macro="creator"/> + <expand macro="requirements"/> - <expand macro="requirements"/> <command detect_errors="aggressive"><![CDATA[ - sh ${symlink_inputs} && - Rscript -e 'source("${__tool_directory__}/utils.R")' -e 'source("${__tool_directory__}/main.R")' -e 'source("${run_script}")' + python '${__tool_directory__}/mzml_id_getter.py' '$input_file'; + Rscript -e 'source("${__tool_directory__}/utils.R")' -e 'source("${run_script}")' ]]></command> <configfiles> - <configfile name="symlink_inputs"> - #for $infile in $ms_files - ln -s '${infile}' '${infile.element_identifier}' - #end for - #for $infile in $extracted_files - ln -s '${infile}' 'extracted_${infile.element_identifier}' - #end for - #for $infile in $corrected_files - ln -s '${infile}' '${infile.element_identifier}' - #end for - </configfile> <configfile name="run_script"><![CDATA[ - #set filenames = str("', '").join([str($f.element_identifier) for $f in $ms_files]) - filenames <- sort_samples_by_acquisition_number(c('$filenames')) + sample_name <- readChar('sample_name.txt', file.info('sample_name.txt')\$size) + + if(is.na(sample_name)) { + stop("The mzML file does not contain run ID.") + } + + #set extracted_filenames = str("', '").join([str($f) for $f in $extracted_files]) + extracted_feature_tables <- load_parquet_collection(c('$extracted_filenames')) + sample_names <- unlist(lapply(extracted_feature_tables, load_sample_name)) + validate_sample_names(sample_names) + extracted_features <- select_table_with_sample_name(extracted_feature_tables, sample_name) - #set extracted_files = str("', '").join(["extracted_" + str($f.element_identifier) for $f in $extracted_files]) - extracted <- load_features(c('$extracted_files')) + #set corrected_filenames = str("', '").join([str($f) for $f in $corrected_files]) + corrected_feature_tables <- load_parquet_collection(c('$corrected_filenames')) + sample_names <- unlist(lapply(corrected_feature_tables, load_sample_name)) + validate_sample_names(sample_names) + corrected_features <- select_table_with_sample_name(corrected_feature_tables, sample_name) + + metadata <- load_data_from_parquet_file('$metadata_file') + rt_table <- load_data_from_parquet_file('$rt_file') + intensity_table <- load_data_from_parquet_file('$intensity_file') + + tolerances <- load_data_from_parquet_file('$tolerances') - #set corrected_files = str("', '").join([str($f.element_identifier) for $f in $corrected_files]) - corrected <- load_features(c('$corrected_files')) - - aligned <- load_aligned_features('$rt_cross_table_file', '$int_cross_table_file', '$tolerances_file') - - cluster <- as.integer(Sys.getenv('GALAXY_SLOTS', unset = 1)) - - recovered <- recover_signals(cluster, - filenames, - extracted, - corrected, - aligned, - $mz_tol, - $weak_signal_recovery.recover_mz_range, - $weak_signal_recovery.recover_chr_range, - $weak_signal_recovery.use_observed_range, - $min_bandwidth, - $max_bandwidth, - $weak_signal_recovery.recover_min_count + recovered <- recover.weaker( + filename = '$input_file', + sample_name = sample_name, + extracted_features = extracted_features, + adjusted_features = corrected_features, + metadata_table = metadata, + rt_table = rt_table, + intensity_table = intensity_table, + mz_tol = $mz_tol, + mz_tol_relative = get_mz_tol(tolerances), + rt_tol_relative = get_rt_tol(tolerances), + #if $recover_mz_range: + recover_mz_range = $recover_mz_range, + #else: + recover_mz_range = NA, + #end if + #if $recover_rt_range: + recover_rt_range = $recover_rt_range, + #else: + recover_rt_range = NA, + #end if + use_observed_range = $use_observed_range, + bandwidth = $bandwidth, + #if $min_bandwidth: + min_bandwidth = $min_bandwidth, + #else: + min_bandwidth = NA, + #end if + #if $max_bandwidth: + max_bandwidth = $max_bandwidth, + #else: + max_bandwidth = NA, + #end if + recover_min_count = $recover_min_count, + intensity_weighted = $intensity_weighted ) - aligned_feature_sample_table <- create_feature_sample_table(aligned) - recovered_feature_sample_table <- create_feature_sample_table(recovered) + recovered <- select_adjusted(recovered) - save_all_features(recovered, filenames) - save_all_feature_tables(aligned_feature_sample_table, recovered_feature_sample_table, - '${aligned_feature_sample_table}', '${recovered_feature_sample_table}', - '$output_format.out_format') + recovered <- save_sample_name(recovered, sample_name) + save_data_as_parquet_file(recovered, '$output_file') ]]></configfile> </configfiles> <inputs> - <param name="ms_files" type="data_collection" collection_type="list" format="mzdata,mzml,mzxml,netcdf" - label="Input data collection" help="Mass spectrometry file for peak extraction." /> + <param name="input_file" type="data" format="mzml" label="Input spectra data" + help="Mass spectrometry sample-wise features table." /> <param name="extracted_files" type="data_collection" collection_type="list" format="parquet" label="Input extracted feature samples collection" help="Mass spectrometry files containing feature samples." /> <param name="corrected_files" type="data_collection" collection_type="list" format="parquet" label="Input corrected feature samples collection" help="Mass spectrometry file containing corrected feature samples." /> - <param name="tolerances_file" type="data" format="parquet" label="Input tolerances" help="TBD"/> - <param name="rt_cross_table_file" type="data" format="parquet" label="Input rt cross table" help="TBD"/> - <param name="int_cross_table_file" type="data" format="parquet" label="Input int cross table" help="TBD"/> - <expand macro="mz_tol_macro"/> - <param name="min_bandwidth" type="float" optional="true" label="min_bandwidth (optional)" - help="The minimum bandwidth to use in the kernel smoother." /> - <param name="max_bandwidth" type="float" optional="true" label="max_bandwidth (optional)" - help="The maximum bandwidth to use in the kernel smoother." /> - <expand macro="weak_signal_recovery"/> - <expand macro="output_format"/> + <param name="metadata_file" type="data" format="parquet" label="Metadata table" + help="Peak metadata table from the align features step." /> + <param name="rt_file" type="data" format="parquet" label="RT table" + help="Table with retention times for features (rows) across samples (columns)." /> + <param name="intensity_file" type="data" format="parquet" label="Intensity table" + help="Table with intensities for features (rows) across samples (columns)." /> + <param label="Input tolerances values" name="tolerances" type="data" format="parquet" + help="Table containing tolerance values." /> + <expand macro="recover_weaker_params"/> + <expand macro="bandwidth_params"/> </inputs> <outputs> - <expand macro="unsupervised_outputs"> - <collection name="extracted_features" type="list" label="${tool.name} extracted_features on ${on_string}"> - <discover_datasets pattern="__designation__" directory="extracted" format="parquet" /> - </collection > - <collection name="corrected_features" type="list" label="${tool.name} corrected_features on ${on_string}"> - <discover_datasets pattern="__designation__" directory="corrected" format="parquet" /> - </collection > - </expand> + <data label="${tool.name} on ${on_string}" name="output_file" format="parquet" /> </outputs> <tests> - <test> - <param name="ms_files"> - <collection type="list"> - <element name="mbr_test0.mzml" value="mbr_test0.mzml"/> - <element name="mbr_test1.mzml" value="mbr_test1.mzml"/> - <element name="mbr_test2.mzml" value="mbr_test2.mzml"/> - </collection> - </param> - <param name="extracted_files"> - <collection type="list"> - <element name="extracted_features_0.parquet" value="extracted_expected/extracted_0.parquet"/> - <element name="extracted_features_1.parquet" value="extracted_expected/extracted_1.parquet"/> - <element name="extracted_features_2.parquet" value="extracted_expected/extracted_2.parquet"/> - </collection> - </param> - <param name="corrected_files"> - <collection type="list"> - <element name="corrected_features_0.parquet" value="corrected_expected/corrected_0.parquet"/> - <element name="corrected_features_1.parquet" value="corrected_expected/corrected_1.parquet"/> - <element name="corrected_features_2.parquet" value="corrected_expected/corrected_2.parquet"/> - </collection> - </param> - <param name="tolerances_file" value="tolerances.parquet" ftype="parquet"/> - <param name="rt_cross_table_file" value="rt_cross_table.parquet" ftype="parquet"/> - <param name="int_cross_table_file" value="int_cross_table.parquet" ftype="parquet"/> - <output name="recovered_feature_sample_table" ftype="parquet" - file="unsupervised_output/unsupervised_recovered_feature_sample_table.parquet"/> - <output name="aligned_feature_sample_table" ftype="parquet" - file="unsupervised_output/unsupervised_aligned_feature_sample_table.parquet"/> - <output_collection name="corrected_features" type="list"> - <element name="mbr_test0.parquet" file="unsupervised_output/corrected_features_0.parquet" ftype="parquet"/> - <element name="mbr_test1.parquet" file="unsupervised_output/corrected_features_1.parquet" ftype="parquet"/> - <element name="mbr_test2.parquet" file="unsupervised_output/corrected_features_2.parquet" ftype="parquet"/> - </output_collection> - <output_collection name="extracted_features" type="list"> - <element name="mbr_test0.parquet" file="unsupervised_output/extracted_features_0.parquet" ftype="parquet"/> - <element name="mbr_test1.parquet" file="unsupervised_output/extracted_features_1.parquet" ftype="parquet"/> - <element name="mbr_test2.parquet" file="unsupervised_output/extracted_features_2.parquet" ftype="parquet"/> - </output_collection> - </test> </tests> <help> <![CDATA[ - This is a tool which runs apLCMS recovery of weaker signals. + @RECOVER_WEAKER_SIGNALS_HELP@ @GENERAL_HELP@ ]]>