comparison recetox_aplcms_recover_weaker_signals.xml @ 2:472dc85ce7c5 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 506df2aef355b3791567283e1a175914f06b405a
author recetox
date Mon, 13 Feb 2023 10:28:35 +0000
parents f9fb9d8fb710
children c69a12bfc2fb
comparison
equal deleted inserted replaced
1:f9fb9d8fb710 2:472dc85ce7c5
1 <tool id="recetox_aplcms_recover_weaker_signals" name="RECETOX apLCMS - recover weaker signals" version="@TOOL_VERSION@+galaxy1"> 1 <tool id="recetox_aplcms_recover_weaker_signals" name="recetox-aplcms - recover weaker signals" version="@TOOL_VERSION@+galaxy0">
2 <description>recover weaker signals from LC/MS spectra</description> 2 <description>recover weaker signals from raw data using an aligned feature table</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 <import>macros_split.xml</import> 5 <import>help.xml</import>
6 </macros> 6 </macros>
7 <expand macro="creator"/> 7 <expand macro="creator"/>
8 <expand macro="requirements"/>
8 9
9 <expand macro="requirements"/>
10 <command detect_errors="aggressive"><![CDATA[ 10 <command detect_errors="aggressive"><![CDATA[
11 sh ${symlink_inputs} && 11 python '${__tool_directory__}/mzml_id_getter.py' '$input_file';
12 Rscript -e 'source("${__tool_directory__}/utils.R")' -e 'source("${__tool_directory__}/main.R")' -e 'source("${run_script}")' 12 Rscript -e 'source("${__tool_directory__}/utils.R")' -e 'source("${run_script}")'
13 ]]></command> 13 ]]></command>
14 <configfiles> 14 <configfiles>
15 <configfile name="symlink_inputs">
16 #for $infile in $ms_files
17 ln -s '${infile}' '${infile.element_identifier}'
18 #end for
19 #for $infile in $extracted_files
20 ln -s '${infile}' 'extracted_${infile.element_identifier}'
21 #end for
22 #for $infile in $corrected_files
23 ln -s '${infile}' '${infile.element_identifier}'
24 #end for
25 </configfile>
26 <configfile name="run_script"><![CDATA[ 15 <configfile name="run_script"><![CDATA[
27 #set filenames = str("', '").join([str($f.element_identifier) for $f in $ms_files]) 16 sample_name <- readChar('sample_name.txt', file.info('sample_name.txt')\$size)
28 filenames <- sort_samples_by_acquisition_number(c('$filenames'))
29 17
30 #set extracted_files = str("', '").join(["extracted_" + str($f.element_identifier) for $f in $extracted_files]) 18 if(is.na(sample_name)) {
31 extracted <- load_features(c('$extracted_files')) 19 stop("The mzML file does not contain run ID.")
20 }
32 21
33 #set corrected_files = str("', '").join([str($f.element_identifier) for $f in $corrected_files]) 22 #set extracted_filenames = str("', '").join([str($f) for $f in $extracted_files])
34 corrected <- load_features(c('$corrected_files')) 23 extracted_feature_tables <- load_parquet_collection(c('$extracted_filenames'))
24 sample_names <- unlist(lapply(extracted_feature_tables, load_sample_name))
25 validate_sample_names(sample_names)
26 extracted_features <- select_table_with_sample_name(extracted_feature_tables, sample_name)
35 27
36 aligned <- load_aligned_features('$rt_cross_table_file', '$int_cross_table_file', '$tolerances_file') 28 #set corrected_filenames = str("', '").join([str($f) for $f in $corrected_files])
29 corrected_feature_tables <- load_parquet_collection(c('$corrected_filenames'))
30 sample_names <- unlist(lapply(corrected_feature_tables, load_sample_name))
31 validate_sample_names(sample_names)
32 corrected_features <- select_table_with_sample_name(corrected_feature_tables, sample_name)
37 33
38 cluster <- as.integer(Sys.getenv('GALAXY_SLOTS', unset = 1)) 34 metadata <- load_data_from_parquet_file('$metadata_file')
35 rt_table <- load_data_from_parquet_file('$rt_file')
36 intensity_table <- load_data_from_parquet_file('$intensity_file')
39 37
40 recovered <- recover_signals(cluster, 38 tolerances <- load_data_from_parquet_file('$tolerances')
41 filenames, 39
42 extracted, 40 recovered <- recover.weaker(
43 corrected, 41 filename = '$input_file',
44 aligned, 42 sample_name = sample_name,
45 $mz_tol, 43 extracted_features = extracted_features,
46 $weak_signal_recovery.recover_mz_range, 44 adjusted_features = corrected_features,
47 $weak_signal_recovery.recover_chr_range, 45 metadata_table = metadata,
48 $weak_signal_recovery.use_observed_range, 46 rt_table = rt_table,
49 $min_bandwidth, 47 intensity_table = intensity_table,
50 $max_bandwidth, 48 mz_tol = $mz_tol,
51 $weak_signal_recovery.recover_min_count 49 mz_tol_relative = get_mz_tol(tolerances),
50 rt_tol_relative = get_rt_tol(tolerances),
51 #if $recover_mz_range:
52 recover_mz_range = $recover_mz_range,
53 #else:
54 recover_mz_range = NA,
55 #end if
56 #if $recover_rt_range:
57 recover_rt_range = $recover_rt_range,
58 #else:
59 recover_rt_range = NA,
60 #end if
61 use_observed_range = $use_observed_range,
62 bandwidth = $bandwidth,
63 #if $min_bandwidth:
64 min_bandwidth = $min_bandwidth,
65 #else:
66 min_bandwidth = NA,
67 #end if
68 #if $max_bandwidth:
69 max_bandwidth = $max_bandwidth,
70 #else:
71 max_bandwidth = NA,
72 #end if
73 recover_min_count = $recover_min_count,
74 intensity_weighted = $intensity_weighted
52 ) 75 )
53 76
54 aligned_feature_sample_table <- create_feature_sample_table(aligned) 77 recovered <- select_adjusted(recovered)
55 recovered_feature_sample_table <- create_feature_sample_table(recovered)
56 78
57 save_all_features(recovered, filenames) 79 recovered <- save_sample_name(recovered, sample_name)
58 save_all_feature_tables(aligned_feature_sample_table, recovered_feature_sample_table, 80 save_data_as_parquet_file(recovered, '$output_file')
59 '${aligned_feature_sample_table}', '${recovered_feature_sample_table}',
60 '$output_format.out_format')
61 ]]></configfile> 81 ]]></configfile>
62 </configfiles> 82 </configfiles>
63 83
64 <inputs> 84 <inputs>
65 <param name="ms_files" type="data_collection" collection_type="list" format="mzdata,mzml,mzxml,netcdf" 85 <param name="input_file" type="data" format="mzml" label="Input spectra data"
66 label="Input data collection" help="Mass spectrometry file for peak extraction." /> 86 help="Mass spectrometry sample-wise features table." />
67 <param name="extracted_files" type="data_collection" collection_type="list" format="parquet" 87 <param name="extracted_files" type="data_collection" collection_type="list" format="parquet"
68 label="Input extracted feature samples collection" help="Mass spectrometry files containing feature samples." /> 88 label="Input extracted feature samples collection" help="Mass spectrometry files containing feature samples." />
69 <param name="corrected_files" type="data_collection" collection_type="list" format="parquet" 89 <param name="corrected_files" type="data_collection" collection_type="list" format="parquet"
70 label="Input corrected feature samples collection" help="Mass spectrometry file containing corrected feature samples." /> 90 label="Input corrected feature samples collection" help="Mass spectrometry file containing corrected feature samples." />
71 <param name="tolerances_file" type="data" format="parquet" label="Input tolerances" help="TBD"/> 91 <param name="metadata_file" type="data" format="parquet" label="Metadata table"
72 <param name="rt_cross_table_file" type="data" format="parquet" label="Input rt cross table" help="TBD"/> 92 help="Peak metadata table from the align features step." />
73 <param name="int_cross_table_file" type="data" format="parquet" label="Input int cross table" help="TBD"/> 93 <param name="rt_file" type="data" format="parquet" label="RT table"
74 <expand macro="mz_tol_macro"/> 94 help="Table with retention times for features (rows) across samples (columns)." />
75 <param name="min_bandwidth" type="float" optional="true" label="min_bandwidth (optional)" 95 <param name="intensity_file" type="data" format="parquet" label="Intensity table"
76 help="The minimum bandwidth to use in the kernel smoother." /> 96 help="Table with intensities for features (rows) across samples (columns)." />
77 <param name="max_bandwidth" type="float" optional="true" label="max_bandwidth (optional)" 97 <param label="Input tolerances values" name="tolerances" type="data" format="parquet"
78 help="The maximum bandwidth to use in the kernel smoother." /> 98 help="Table containing tolerance values." />
79 <expand macro="weak_signal_recovery"/> 99 <expand macro="recover_weaker_params"/>
80 <expand macro="output_format"/> 100 <expand macro="bandwidth_params"/>
81 </inputs> 101 </inputs>
82 102
83 <outputs> 103 <outputs>
84 <expand macro="unsupervised_outputs"> 104 <data label="${tool.name} on ${on_string}" name="output_file" format="parquet" />
85 <collection name="extracted_features" type="list" label="${tool.name} extracted_features on ${on_string}">
86 <discover_datasets pattern="__designation__" directory="extracted" format="parquet" />
87 </collection >
88 <collection name="corrected_features" type="list" label="${tool.name} corrected_features on ${on_string}">
89 <discover_datasets pattern="__designation__" directory="corrected" format="parquet" />
90 </collection >
91 </expand>
92 </outputs> 105 </outputs>
93 106
94 <tests> 107 <tests>
95 <test>
96 <param name="ms_files">
97 <collection type="list">
98 <element name="mbr_test0.mzml" value="mbr_test0.mzml"/>
99 <element name="mbr_test1.mzml" value="mbr_test1.mzml"/>
100 <element name="mbr_test2.mzml" value="mbr_test2.mzml"/>
101 </collection>
102 </param>
103 <param name="extracted_files">
104 <collection type="list">
105 <element name="extracted_features_0.parquet" value="extracted_expected/extracted_0.parquet"/>
106 <element name="extracted_features_1.parquet" value="extracted_expected/extracted_1.parquet"/>
107 <element name="extracted_features_2.parquet" value="extracted_expected/extracted_2.parquet"/>
108 </collection>
109 </param>
110 <param name="corrected_files">
111 <collection type="list">
112 <element name="corrected_features_0.parquet" value="corrected_expected/corrected_0.parquet"/>
113 <element name="corrected_features_1.parquet" value="corrected_expected/corrected_1.parquet"/>
114 <element name="corrected_features_2.parquet" value="corrected_expected/corrected_2.parquet"/>
115 </collection>
116 </param>
117 <param name="tolerances_file" value="tolerances.parquet" ftype="parquet"/>
118 <param name="rt_cross_table_file" value="rt_cross_table.parquet" ftype="parquet"/>
119 <param name="int_cross_table_file" value="int_cross_table.parquet" ftype="parquet"/>
120 108
121 <output name="recovered_feature_sample_table" ftype="parquet"
122 file="unsupervised_output/unsupervised_recovered_feature_sample_table.parquet"/>
123 <output name="aligned_feature_sample_table" ftype="parquet"
124 file="unsupervised_output/unsupervised_aligned_feature_sample_table.parquet"/>
125 <output_collection name="corrected_features" type="list">
126 <element name="mbr_test0.parquet" file="unsupervised_output/corrected_features_0.parquet" ftype="parquet"/>
127 <element name="mbr_test1.parquet" file="unsupervised_output/corrected_features_1.parquet" ftype="parquet"/>
128 <element name="mbr_test2.parquet" file="unsupervised_output/corrected_features_2.parquet" ftype="parquet"/>
129 </output_collection>
130 <output_collection name="extracted_features" type="list">
131 <element name="mbr_test0.parquet" file="unsupervised_output/extracted_features_0.parquet" ftype="parquet"/>
132 <element name="mbr_test1.parquet" file="unsupervised_output/extracted_features_1.parquet" ftype="parquet"/>
133 <element name="mbr_test2.parquet" file="unsupervised_output/extracted_features_2.parquet" ftype="parquet"/>
134 </output_collection>
135 </test>
136 </tests> 109 </tests>
137 110
138 <help> 111 <help>
139 <![CDATA[ 112 <![CDATA[
140 This is a tool which runs apLCMS recovery of weaker signals. 113 @RECOVER_WEAKER_SIGNALS_HELP@
141 114
142 @GENERAL_HELP@ 115 @GENERAL_HELP@
143 ]]> 116 ]]>
144 </help> 117 </help>
145 118