Mercurial > repos > galaxyp > cardinal_preprocessing
comparison preprocessing.xml @ 13:6b36be80febb draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 39bd480e8813fa7a96b640150365577a69885d17-dirty"
author | galaxyp |
---|---|
date | Sun, 29 Nov 2020 23:45:08 +0000 |
parents | e0669b1854b1 |
children | accf9fb6ea01 |
comparison
equal
deleted
inserted
replaced
12:e0669b1854b1 | 13:6b36be80febb |
---|---|
1 <tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.1"> | 1 <tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.2"> |
2 <description> | 2 <description> |
3 mass spectrometry imaging preprocessing | 3 mass spectrometry imaging preprocessing |
4 </description> | 4 </description> |
5 <macros> | 5 <macros> |
6 <import>macros.xml</import> | 6 <import>macros.xml</import> |
45 | 45 |
46 @READING_MSIDATA_FULLY_COMPATIBLE@ | 46 @READING_MSIDATA_FULLY_COMPATIBLE@ |
47 | 47 |
48 | 48 |
49 ## remove duplicated coordinates, otherwise peak picking and log2 transformation will fail | 49 ## remove duplicated coordinates, otherwise peak picking and log2 transformation will fail |
50 msidata <- msidata[,!duplicated(coord(msidata)[,1:2])] | |
51 | 50 |
52 ## set variable to False | 51 ## set variable to False |
53 #set $used_peak_picking = False | 52 #set $used_peak_picking = False |
54 #set $used_peak_alignment = False | 53 #set $used_peak_alignment = False |
55 #set $continuous_format = False | 54 #set $continuous_format = False |
133 | 132 |
134 #if str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'gaussian': | 133 #if str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'gaussian': |
135 print('gaussian smoothing') | 134 print('gaussian smoothing') |
136 | 135 |
137 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, sd = $method.methods_conditional.methods_for_smoothing.sd_gaussian) | 136 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, sd = $method.methods_conditional.methods_for_smoothing.sd_gaussian) |
137 msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) | |
138 | 138 |
139 #elif str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'sgolay': | 139 #elif str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'sgolay': |
140 print('sgolay smoothing') | 140 print('sgolay smoothing') |
141 | 141 |
142 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters) | 142 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters) |
143 msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) | |
143 | 144 |
144 ## if selected replace negative intensities with zero | 145 ## if selected replace negative intensities with zero |
145 #if $method.methods_conditional.methods_for_smoothing.replace_negatives: | 146 #if $method.methods_conditional.methods_for_smoothing.replace_negatives: |
146 spectra(msidata)[spectra(msidata)<0] = 0 | 147 ## bring spectra matrix to disk |
148 spectra_df = as.matrix(spectra(msidata)) | |
149 spectra_df[spectra_df<0] = 0 | |
150 spectra(msidata) = spectra_df | |
147 #end if | 151 #end if |
148 | 152 |
149 #elif str($method.methods_conditional.methods_for_smoothing.smoothing_method) == 'ma': | 153 #elif str($method.methods_conditional.methods_for_smoothing.smoothing_method) == 'ma': |
150 print('moving average smoothing') | 154 print('moving average smoothing') |
151 | 155 |
152 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter) | 156 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter) |
157 msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) | |
153 | 158 |
154 #end if | 159 #end if |
155 msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) | |
156 | 160 |
157 ############################### QC ########################### | 161 ############################### QC ########################### |
158 | 162 |
159 maxfeatures =nrow(msidata) | 163 maxfeatures =nrow(msidata) |
160 pixelcount = ncol(msidata) | 164 pixelcount = ncol(msidata) |
168 | 172 |
169 | 173 |
170 ############################### Mz alignment ########################### | 174 ############################### Mz alignment ########################### |
171 | 175 |
172 #elif str( $method.methods_conditional.preprocessing_method ) == 'mz_alignment': | 176 #elif str( $method.methods_conditional.preprocessing_method ) == 'mz_alignment': |
173 print('M/z alignment') | 177 print('m/z alignment') |
174 ## M/z alignment | 178 ## M/z alignment |
175 | 179 |
176 #if str( $method.methods_conditional.mzalign_ref_type.align_reference_datatype) == 'align_table': | 180 #if str( $method.methods_conditional.mzalign_ref_type.align_reference_datatype) == 'align_table': |
177 | 181 |
178 reference_mz = read.delim("$method.methods_conditional.mzalign_ref_type.mz_tabular", header = $method.methods_conditional.mzalign_ref_type.feature_header, stringsAsFactors = FALSE) | 182 reference_mz = read.delim("$method.methods_conditional.mzalign_ref_type.mz_tabular", header = $method.methods_conditional.mzalign_ref_type.feature_header, stringsAsFactors = FALSE) |
181 msidata = mzAlign(msidata, ref=reference_mz, tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span) | 185 msidata = mzAlign(msidata, ref=reference_mz, tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span) |
182 | 186 |
183 | 187 |
184 #elif str( $method.methods_conditional.mzalign_ref_type.align_reference_datatype) == 'align_noref': | 188 #elif str( $method.methods_conditional.mzalign_ref_type.align_reference_datatype) == 'align_noref': |
185 | 189 |
186 msidata = mzAlign(msidata,tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", , quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span) | 190 msidata = mzAlign(msidata,tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span) |
187 | 191 |
188 #end if | 192 #end if |
189 | 193 |
190 msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) | 194 msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) |
191 | 195 |
330 ############################### Mass binning ########################### | 334 ############################### Mass binning ########################### |
331 | 335 |
332 #elif str( $method.methods_conditional.preprocessing_method) == 'Mass_binning': | 336 #elif str( $method.methods_conditional.preprocessing_method) == 'Mass_binning': |
333 print('mass binning') | 337 print('mass binning') |
334 | 338 |
335 #if str( $method.methods_conditional.mz_range.features_filtering) == 'change_mz_range': | 339 #if str($method.methods_conditional.mz_range.features_filtering) == 'change_mz_range': |
336 | |
337 #if str($processed_cond.processed_file) == "processed": | |
338 | 340 |
339 msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, from=$method.methods_conditional.mz_range.min_mz, to=$method.methods_conditional.mz_range.max_mz, units="$method.methods_conditional.bin_units", fun="$method.methods_conditional.bin_fun") | 341 msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, from=$method.methods_conditional.mz_range.min_mz, to=$method.methods_conditional.mz_range.max_mz, units="$method.methods_conditional.bin_units", fun="$method.methods_conditional.bin_fun") |
340 | |
341 #else | |
342 ## continuous file cannot be binned from m/z to m/z, therefore first cut m/z range and then do mzbin: | |
343 msidata = msidata[mz(msidata) >= $method.methods_conditional.mz_range.min_mz & mz(msidata) <= $method.methods_conditional.mz_range.max_mz,] | |
344 msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun="$method.methods_conditional.bin_fun") | |
345 #end if | |
346 | 342 |
347 | 343 |
348 #elif str( $method.methods_conditional.mz_range.features_filtering) == 'none': | 344 #elif str($method.methods_conditional.mz_range.features_filtering) == 'none': |
349 | 345 |
350 msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun=$method.methods_conditional.bin_fun) | 346 msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun=$method.methods_conditional.bin_fun) |
347 | |
348 #elif str($method.methods_conditional.mz_range.features_filtering) == 'bin_to_reference': | |
349 | |
350 bin_reference_mz = read.delim("$method.methods_conditional.mz_range.mz_tabular", header = $method.methods_conditional.mz_range.feature_header, stringsAsFactors = FALSE) | |
351 bin_reference_mz = bin_reference_mz[,$method.methods_conditional.mz_range.feature_column] | |
352 | |
353 msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun=$method.methods_conditional.bin_fun, | |
354 ref=bin_reference_mz) | |
351 | 355 |
352 #end if | 356 #end if |
353 | 357 |
354 msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) | 358 msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) |
355 | 359 |
383 | 387 |
384 #if str( $method.methods_conditional.transf_conditional.trans_type) == 'log2': | 388 #if str( $method.methods_conditional.transf_conditional.trans_type) == 'log2': |
385 print('log2 transformation') | 389 print('log2 transformation') |
386 | 390 |
387 ## replace 0 with NA to prevent Inf | 391 ## replace 0 with NA to prevent Inf |
388 spectra_df = spectra(msidata) ## convert into R matrix | 392 spectra_df = as.matrix(spectra(msidata)) ## convert into R matrix |
389 spectra_df[spectra_df ==0] = NA | 393 spectra_df[spectra_df ==0] = NA |
390 print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra_df)))) | 394 print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra_df)))) |
391 spectra(msidata) = spectra_df | 395 spectra(msidata) = spectra_df |
392 ## log transformation | 396 ## log transformation |
393 spectra(msidata) = log2(spectra(msidata)) | 397 spectra(msidata) = log2(spectra(msidata)) |
420 | 424 |
421 ############# Outputs: RData, imzml and QC report ############# | 425 ############# Outputs: RData, imzml and QC report ############# |
422 ################################################################################ | 426 ################################################################################ |
423 | 427 |
424 ## save msidata as imzML file, will only work if there is at least 1 m/z left | 428 ## save msidata as imzML file, will only work if there is at least 1 m/z left |
425 | |
426 #if str($imzml_output) == "cont_format": | |
427 #set $continuous_format = True | |
428 #end if | |
429 | 429 |
430 if (nrow(msidata) > 0){ | 430 if (nrow(msidata) > 0){ |
431 ## make sure that coordinates are integers | 431 ## make sure that coordinates are integers |
432 coord(msidata)\$y = as.integer(coord(msidata)\$y) | 432 coord(msidata)\$y = as.integer(coord(msidata)\$y) |
433 coord(msidata)\$x = as.integer(coord(msidata)\$x) | 433 coord(msidata)\$x = as.integer(coord(msidata)\$x) |
434 #if $used_peak_picking: | 434 ## only continuous files can currently be exported |
435 #if $continuous_format: | 435 msidata = as(msidata, "MSContinuousImagingExperiment") |
436 msidata = as(msidata, "MSContinuousImagingExperiment") | 436 writeImzML(msidata, "out") |
437 #end if | |
438 #elif $used_peak_alignment | |
439 #if $continuous_format: | |
440 msidata = as(msidata, "MSContinuousImagingExperiment") | |
441 #end if | |
442 #end if | |
443 writeImzML(msidata, "out") | |
444 } | 437 } |
445 | 438 |
446 plot(0,type='n',axes=FALSE,ann=FALSE) | 439 plot(0,type='n',axes=FALSE,ann=FALSE) |
447 rownames(QC_numbers) = c("min m/z", "max mz", "# features", "# spectra") | 440 rownames(QC_numbers) = c("min m/z", "max mz", "# features", "# spectra") |
448 grid.table(t(QC_numbers)) | 441 grid.table(t(QC_numbers)) |
618 <option value="mean" selected="True">mean</option> | 611 <option value="mean" selected="True">mean</option> |
619 <option value="sum">sum</option> | 612 <option value="sum">sum</option> |
620 </param> | 613 </param> |
621 <param name="replace_NA_bin" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="Binning can introduce NAs, should they be replaced with 0"/> | 614 <param name="replace_NA_bin" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="Binning can introduce NAs, should they be replaced with 0"/> |
622 <conditional name="mz_range"> | 615 <conditional name="mz_range"> |
623 <param name="features_filtering" type="select" label="Select m/z feature filtering option"> | 616 <param name="features_filtering" type="select" label="Select m/z options"> |
624 <option value="none" selected="True">none</option> | 617 <option value="none" selected="True">none</option> |
625 <option value="change_mz_range">change m/z range</option> | 618 <option value="change_mz_range">change m/z range</option> |
619 <option value="bin_to_reference">bin m/z to reference</option> | |
626 </param> | 620 </param> |
627 <when value="none"/> | 621 <when value="none"/> |
628 <when value="change_mz_range"> | 622 <when value="change_mz_range"> |
629 <param name="min_mz" type="float" value="1" label="Minimum value for m/z"/> | 623 <param name="min_mz" type="float" value="1" label="Minimum value for m/z"/> |
630 <param name="max_mz" type="float" value="10000" label="Maximum value for m/z"/> | 624 <param name="max_mz" type="float" value="10000" label="Maximum value for m/z"/> |
631 </when> | 625 </when> |
626 <when value="bin_to_reference"> | |
627 <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features as reference for binning. Only the m/z values from the tabular file will be kept."/> | |
628 </when> | |
632 </conditional> | 629 </conditional> |
633 </when> | 630 </when> |
634 <when value="Transformation"> | 631 <when value="Transformation"> |
635 <conditional name="transf_conditional"> | 632 <conditional name="transf_conditional"> |
636 <param name="trans_type" type="select" label="Intensity transformations" help="logarithm base 2 (log2) or squareroot (sqrt)"> | 633 <param name="trans_type" type="select" label="Intensity transformations" help="logarithm base 2 (log2) or squareroot (sqrt)"> |
643 <when value="sqrt"/> | 640 <when value="sqrt"/> |
644 </conditional> | 641 </conditional> |
645 </when> | 642 </when> |
646 </conditional> | 643 </conditional> |
647 </repeat> | 644 </repeat> |
648 <param name="imzml_output" type="boolean" label="imzML output in processed format" checked="True" truevalue="proc_format" falsevalue="cont_format"/> | |
649 </inputs> | 645 </inputs> |
650 <outputs> | 646 <outputs> |
651 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML"/> | 647 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML"/> |
652 <data format="pdf" name="QC_overview" from_work_dir="Preprocessing.pdf" label = "${tool.name} on ${on_string}: QC"/> | 648 <data format="pdf" name="QC_overview" from_work_dir="Preprocessing.pdf" label = "${tool.name} on ${on_string}: QC"/> |
653 </outputs> | 649 </outputs> |
664 </repeat> | 660 </repeat> |
665 <repeat name="methods"> | 661 <repeat name="methods"> |
666 <conditional name="methods_conditional"> | 662 <conditional name="methods_conditional"> |
667 <param name="preprocessing_method" value="Smoothing"/> | 663 <param name="preprocessing_method" value="Smoothing"/> |
668 <conditional name="methods_for_smoothing"> | 664 <conditional name="methods_for_smoothing"> |
669 <param name="smoothing_method" value="gaussian"/> | 665 <param name="smoothing_method" value="sgolay"/> |
670 <param name="sd_gaussian" value="4"/> | 666 </conditional> |
671 </conditional> | |
672 <param name="window_smoothing" value="9"/> | |
673 </conditional> | 667 </conditional> |
674 </repeat> | 668 </repeat> |
675 <repeat name="methods"> | 669 <repeat name="methods"> |
676 <conditional name="methods_conditional"> | 670 <conditional name="methods_conditional"> |
677 <param name="preprocessing_method" value="Peak_picking"/> | 671 <param name="preprocessing_method" value="Peak_picking"/> |
700 <conditional name="transf_conditional"> | 694 <conditional name="transf_conditional"> |
701 <param name="trans_type" value="sqrt"/> | 695 <param name="trans_type" value="sqrt"/> |
702 </conditional> | 696 </conditional> |
703 </conditional> | 697 </conditional> |
704 </repeat> | 698 </repeat> |
705 <param name="imzml_output" value="cont_format"/> | |
706 <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/> | 699 <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/> |
707 <output name="outfile_imzml" ftype="imzml" file="preprocessing_results1.imzml.txt" compare="sim_size"> | 700 <output name="outfile_imzml" ftype="imzml" file="preprocessing_results1.imzml.txt" compare="sim_size"> |
708 <extra_files type="file" file="preprocessing_results1.imzml" name="imzml" lines_diff="6"/> | 701 <extra_files type="file" file="preprocessing_results1.imzml" name="imzml" lines_diff="6"/> |
709 <extra_files type="file" file="preprocessing_results1.ibd" name="ibd" compare="sim_size"/> | 702 <extra_files type="file" file="preprocessing_results1.ibd" name="ibd" compare="sim_size"/> |
710 </output> | 703 </output> |
725 <repeat name="methods"> | 718 <repeat name="methods"> |
726 <conditional name="methods_conditional"> | 719 <conditional name="methods_conditional"> |
727 <param name="preprocessing_method" value="Peak_alignment"/> | 720 <param name="preprocessing_method" value="Peak_alignment"/> |
728 </conditional> | 721 </conditional> |
729 </repeat> | 722 </repeat> |
730 <param name="imzml_output" value="cont_format"/> | |
731 <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/> | 723 <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/> |
732 <output name="outfile_imzml" ftype="imzml" file="preprocessing_results2.imzml.txt" compare="sim_size"> | 724 <output name="outfile_imzml" ftype="imzml" file="preprocessing_results2.imzml.txt" compare="sim_size"> |
733 <extra_files type="file" file="preprocessing_results2.imzml" name="imzml" lines_diff="6"/> | 725 <extra_files type="file" file="preprocessing_results2.imzml" name="imzml" lines_diff="6"/> |
734 <extra_files type="file" file="preprocessing_results2.ibd" name="ibd" compare="sim_size"/> | 726 <extra_files type="file" file="preprocessing_results2.ibd" name="ibd" compare="sim_size"/> |
735 </output> | 727 </output> |
751 <param name="window_picking" value="5"/> | 743 <param name="window_picking" value="5"/> |
752 <param name="SNR_picking_method" value="2"/> | 744 <param name="SNR_picking_method" value="2"/> |
753 <conditional name="methods_for_picking"> | 745 <conditional name="methods_for_picking"> |
754 <param name="picking_method" value="mad"/> | 746 <param name="picking_method" value="mad"/> |
755 </conditional> | 747 </conditional> |
756 <param name="imzml_output" value="proc_format"/> | |
757 </conditional> | 748 </conditional> |
758 </repeat> | 749 </repeat> |
759 <repeat name="methods"> | 750 <repeat name="methods"> |
760 <conditional name="methods_conditional"> | 751 <conditional name="methods_conditional"> |
761 <param name="preprocessing_method" value="Peak_alignment"/> | 752 <param name="preprocessing_method" value="Peak_alignment"/> |
762 <param name="imzml_output" value="proc_format"/> | |
763 </conditional> | 753 </conditional> |
764 </repeat> | 754 </repeat> |
765 <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/> | 755 <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/> |
766 <output name="outfile_imzml" ftype="imzml" file="preprocessing_results3.imzml.txt" compare="sim_size"> | 756 <output name="outfile_imzml" ftype="imzml" file="preprocessing_results3.imzml.txt" compare="sim_size"> |
767 <extra_files type="file" file="preprocessing_results3.imzml" name="imzml" lines_diff="6"/> | 757 <extra_files type="file" file="preprocessing_results3.imzml" name="imzml" lines_diff="6"/> |
848 - Transformation: log2 or squareroot transformation of all intensities; when using log2 transformation zero intensities will become NA, this can lead to compatibility problems. | 838 - Transformation: log2 or squareroot transformation of all intensities; when using log2 transformation zero intensities will become NA, this can lead to compatibility problems. |
849 | 839 |
850 | 840 |
851 **Output** | 841 **Output** |
852 | 842 |
853 - MSI data as continuous or processed imzML file | 843 - MSI data as continuous imzML file |
854 - pdf with key values and four random mass spectra after each processing step | 844 - pdf with key values and four random mass spectra after each processing step |
855 | 845 |
856 ]]> | 846 ]]> |
857 </help> | 847 </help> |
858 <expand macro="citations"/> | 848 <expand macro="citations"/> |