# HG changeset patch # User galaxyp # Date 1614242497 0 # Node ID 593839e1f2c399ce4295497e2f8632f813e0e3e4 # Parent 8212e342e48269fcc3557238bfddf45d5c5703da "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msstats commit 4fe4a0b30469f52c937830d2f3c316f6b9667407" diff -r 8212e342e482 -r 593839e1f2c3 msstats.xml --- a/msstats.xml Thu Jan 28 20:48:40 2021 +0000 +++ b/msstats.xml Thu Feb 25 08:41:37 2021 +0000 @@ -1,4 +1,4 @@ - + statistical relative protein significance analysis in DDA, SRM and DIA Mass Spectrometry 3.22.0 @@ -174,66 +174,77 @@ censoredInt="$dp_options.censoredInt", #end if cutoffCensored="$dp_options.cutoffCensored", + #if $dp_options.maxQuantileforCensored == '' + maxQuantileforCensored = NULL) + #else maxQuantileforCensored = $dp_options.maxQuantileforCensored) - -#if 'processed_data' in $selected_outputs + #end if + +#if 'raw_data' in $dp_options.selected_outputs +write.table(raw, "raw.tsv", sep = "\t", quote = F, row.names = F, dec = ".") +#end if + +#if 'processed_data' in $dp_options.selected_outputs write.table(processed_data\$ProcessedData, "ProcessedData.tsv", sep = "\t", quote = F, row.names = F, dec = ".") #end if -#if 'runlevel_data' in $selected_outputs + +#if 'runlevel_data' in $dp_options.selected_outputs write.table(processed_data\$RunlevelData, "RunlevelData.tsv", sep = "\t", quote = F, row.names = F, dec = ".") #end if -#for $plot_type in $selected_outputs +#for $plot_type in $dp_options.out_plots_opt.selected_vis_outputs + + #if $plot_type[-4:] == "Plot" dataProcessPlots(data = processed_data, type = '$plot_type', - featureName = "$out_plots_opt.featureName", - #if $out_plots_opt.ylimUp: - ylimUp = $out_plots_opt.ylimUp, + featureName = "$dp_options.out_plots_opt.proc_plots_advanced.featureName", + #if $dp_options.out_plots_opt.proc_plots_advanced.ylimUp: + ylimUp = $dp_options.out_plots_opt.proc_plots_advanced.ylimUp, #end if - #if $out_plots_opt.ylimDown: - ylimDown = $out_plots_opt.ylimDown, + #if $dp_options.out_plots_opt.proc_plots_advanced.ylimDown: + ylimDown = $dp_options.out_plots_opt.proc_plots_advanced.ylimDown, #end if - scale = $out_plots_opt.scale, - interval = "$out_plots_opt.interval", - x.axis.size = $out_plots_opt.x_axis_size, - y.axis.size = $out_plots_opt.y_axis_size, - text.size = $out_plots_opt.text_size, - text.angle = $out_plots_opt.text_angle, - legend.size = $out_plots_opt.legend_size, - dot.size.profile = $out_plots_opt.dot_size_profile, - dot.size.condition = $out_plots_opt.dot_size_condition, - width = $out_plots_opt.width, - height = $out_plots_opt.height, - #if $out_plots_opt.which_Protein.select != 'list' - which.Protein = "$out_plots_opt.which_Protein.select", + scale = $dp_options.out_plots_opt.proc_plots_advanced.scale, + interval = "$dp_options.out_plots_opt.proc_plots_advanced.interval", + x.axis.size = $dp_options.out_plots_opt.proc_plots_advanced.x_axis_size, + y.axis.size = $dp_options.out_plots_opt.proc_plots_advanced.y_axis_size, + text.size = $dp_options.out_plots_opt.proc_plots_advanced.text_size, + text.angle = $dp_options.out_plots_opt.proc_plots_advanced.text_angle, + legend.size = $dp_options.out_plots_opt.proc_plots_advanced.legend_size, + dot.size.profile = $dp_options.out_plots_opt.proc_plots_advanced.dot_size_profile, + dot.size.condition = $dp_options.out_plots_opt.proc_plots_advanced.dot_size_condition, + width = $dp_options.out_plots_opt.width, + height = $dp_options.out_plots_opt.height, + #if $dp_options.out_plots_opt.which_Protein.select != 'list' + which.Protein = "$dp_options.out_plots_opt.which_Protein.select", #else - which.Protein = unlist(read.table("$out_plots_opt.which_Protein.protein_list", sep = "\n", header = FALSE), use.names = FALSE), + which.Protein = unlist(read.table("$dp_options.out_plots_opt.which_Protein.protein_list", sep = "\n", header = FALSE), use.names = FALSE), #end if - remove_uninformative_feature_outlier = $out_plots_opt.remove_uninformative_feature_outlier, + remove_uninformative_feature_outlier = $dp_options.out_plots_opt.proc_plots_advanced.remove_uninformative_feature_outlier, address="MSStats_only_") #end if #end for ## Quantifiaction -#if 'quant_sample_matrix' in $selected_outputs +#if 'quant_sample_matrix' in $dp_options.selected_outputs sampleQuantMatrix <- quantification(processed_data, type="Sample") write.table(sampleQuantMatrix, "SampleQuantificationMatrix.tsv", sep = "\t", quote = F, row.names = F, dec = ".") #end if -#if 'quant_sample_long' in $selected_outputs +#if 'quant_sample_long' in $dp_options.selected_outputs sampleQuantLong <- quantification(processed_data, type="Sample", format="long") write.table(sampleQuantLong, "SampleQuantificationLong.tsv", sep = "\t", quote = F, row.names = F, dec = ".") #end if -#if 'quant_group_matrix' in $selected_outputs +#if 'quant_group_matrix' in $dp_options.selected_outputs groupQuantMatrix <- quantification(processed_data, type="Group") write.table(groupQuantMatrix, "GroupQuantificationMatrix.tsv", sep = "\t", quote = F, row.names = F, dec = ".") #end if -#if 'quant_group_long' in $selected_outputs +#if 'quant_group_long' in $dp_options.selected_outputs groupQuantLong <- quantification(processed_data, type="Group", format="long") write.table(groupQuantLong, "GroupQuantificationLong.tsv", sep = "\t", quote = F, row.names = F, dec = ".") #end if @@ -272,22 +283,23 @@ ## Visualizations: -#for $plot_type in $group.select_outputs +#for $plot_type in $group.comparison_plots_opt.select_comparison_plots + #if $plot_type == "QQPlots" or $plot_type == "ResidualPlots" modelBasedQCPlots(data = comparisons, type = "$plot_type", - axis.size = $comparison_plots_opt.axis_size, - dot.size = $comparison_plots_opt.dot_size, - text.size = $comparison_plots_opt.text_size, - legend.size = $comparison_plots_opt.legend_size, - width = $comparison_plots_opt.width, - height = $comparison_plots_opt.height, - #if $comparison_plots_opt.which_Protein.select != 'list' - which.Protein = "$comparison_plots_opt.which_Protein.select", + axis.size = $group.comparison_plots_opt.comparison_vis_options.axis_size, + dot.size = $group.comparison_plots_opt.comparison_vis_options.dot_size, + text.size = $group.comparison_plots_opt.comparison_vis_options.text_size, + legend.size = $group.comparison_plots_opt.comparison_vis_options.legend_size, + width = $group.comparison_plots_opt.width, + height = $group.comparison_plots_opt.height, + #if $group.comparison_plots_opt.which_Protein.select != 'list' + which.Protein = "$group.comparison_plots_opt.which_Protein.select", #else - which.Protein = unlist(read.table("$comparison_plots_opt.which_Protein.protein_list", sep = "\n", header = FALSE), use.names = FALSE), + which.Protein = unlist(read.table("$group.comparison_plots_opt.which_Protein.protein_list", sep = "\n", header = FALSE), use.names = FALSE), #end if address="MSStats_group_") @@ -296,38 +308,38 @@ groupComparisonPlots(data = comparisons\$ComparisonResult, type = "$plot_type", - sig = $comparison_plots_opt.sig, - #if $comparison_plots_opt.FCcutoff: - FCcutoff = $comparison_plots_opt.FCcutoff, + sig = $group.comparison_plots_opt.comparison_vis_options.sig, + #if $group.comparison_plots_opt.comparison_vis_options.FCcutoff: + FCcutoff = $group.comparison_plots_opt.comparison_vis_options.FCcutoff, #end if - logBase.pvalue = $comparison_plots_opt.logBase_pvalue, - #if $comparison_plots_opt.ylimUp: - ylimUp = $comparison_plots_opt.ylimUp, + logBase.pvalue = $group.comparison_plots_opt.comparison_vis_options.logBase_pvalue, + #if $group.comparison_plots_opt.comparison_vis_options.ylimUp: + ylimUp = $group.comparison_plots_opt.comparison_vis_options.ylimUp, #end if - #if $comparison_plots_opt.ylimDown: - ylimDown = $comparison_plots_opt.ylimDown, + #if $group.comparison_plots_opt.comparison_vis_options.ylimDown: + ylimDown = $group.comparison_plots_opt.comparison_vis_options.ylimDown, #end if - x.axis.size = $comparison_plots_opt.x_axis_size, - y.axis.size = $comparison_plots_opt.y_axis_size, - dot.size = $comparison_plots_opt.dot_size, - text.size = $comparison_plots_opt.text_size, - text.angle = $comparison_plots_opt.text_angle, - legend.size = $comparison_plots_opt.legend_size, - ProteinName = $comparison_plots_opt.ProteinName, - colorkey = $comparison_plots_opt.colorkey, - numProtein = $comparison_plots_opt.numProtein, - clustering = "$comparison_plots_opt.clustering", - width = $comparison_plots_opt.width, - height = $comparison_plots_opt.height, - #if $comparison_plots_opt.which_Protein.select != 'list' - which.Protein = "$comparison_plots_opt.which_Protein.select", + x.axis.size = $group.comparison_plots_opt.comparison_vis_options.x_axis_size, + y.axis.size = $group.comparison_plots_opt.comparison_vis_options.y_axis_size, + dot.size = $group.comparison_plots_opt.comparison_vis_options.dot_size, + text.size = $group.comparison_plots_opt.comparison_vis_options.text_size, + text.angle = $group.comparison_plots_opt.comparison_vis_options.text_angle, + legend.size = $group.comparison_plots_opt.comparison_vis_options.legend_size, + ProteinName = $group.comparison_plots_opt.comparison_vis_options.ProteinName, + colorkey = $group.comparison_plots_opt.comparison_vis_options.colorkey, + numProtein = $group.comparison_plots_opt.comparison_vis_options.numProtein, + clustering = "$group.comparison_plots_opt.comparison_vis_options.clustering", + width = $group.comparison_plots_opt.width, + height = $group.comparison_plots_opt.height, + #if $group.comparison_plots_opt.which_Protein.select != 'list' + which.Protein = "$group.comparison_plots_opt.which_Protein.select", #else - which.Protein = unlist(read.table("$comparison_plots_opt.which_Protein.protein_list", sep = "\n", header = FALSE), use.names = FALSE), + which.Protein = unlist(read.table("$group.comparison_plots_opt.which_Protein.protein_list", sep = "\n", header = FALSE), use.names = FALSE), #end if - #if $comparison_plots_opt.which_Comparison.select != 'list' - which.Comparison = "$comparison_plots_opt.which_Comparison.select", + #if $group.comparison_plots_opt.comparison_vis_options.which_Comparison.select != 'list' + which.Comparison = "$group.comparison_plots_opt.comparison_vis_options.which_Comparison.select", #else - which.Comparison = unlist(read.table("$comparison_plots_opt.which_Comparison.comparison_list", sep = "\n", header = FALSE), use.names = FALSE), + which.Comparison = unlist(read.table("$group.comparison_plots_opt.comparison_vis_options.which_Comparison.comparison_list", sep = "\n", header = FALSE), use.names = FALSE), #end if address="MSStats_group_") @@ -353,7 +365,7 @@ - + @@ -407,7 +419,18 @@ -
+
+ + + + + + + + + + + @@ -456,11 +479,11 @@ - + - The processing tools report missing values differently. This option is for distinguishwhich value should be considered as missing, and further whether it is censored or at random. Skyline and OpenSWATH input should use '0'. MaxQuant input should use 'NA' + The processing tools report missing values differently. This option is for distinguish which value should be considered as missing, and further whether it is censored or at random. Skyline and OpenSWATH input should use '0'. MaxQuant input should use 'NA' @@ -470,45 +493,16 @@ - -
- - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - + + + +
+ + + + + + @@ -521,8 +515,36 @@ - -
+ + + +
+ + + + + + + + + + + + + + + + + + + + + + +
+
+
+ @@ -531,147 +553,156 @@ - - Heatmap requires more than one comparison + + - + + +
+ + + - - - + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + ’protein’ means that protein dendrogram is computed and reordered based on protein means (the order of row is changed). ’comparison’ means comparison dendrogram is computed and reordered based on comparison means (the order of comparison is changed). ’both’ means to reorder both protein and comparison. + + + + + + + + + + + + + + +
+
-
- - - - - - - - - - - - - - - - - - - - - ’protein’ means that protein dendrogram is computed and reordered based on protein means (the order of row is changed). ’comparison’ means comparison dendrogram is computed and reordered based on comparison means (the order of comparison is changed). ’both’ means to reorder both protein and comparison. - - - - - - - - - - - - - - - - - - - - - - - - - - -
- 'log' in selected_outputs + 'log' in in dp_options['selected_outputs'] - 'r_script' in selected_outputs + 'r_script' in dp_options['selected_outputs'] + + + 'raw_data' in dp_options['selected_outputs'] - 'processed_data' in selected_outputs - + 'processed_data' in dp_options['selected_outputs'] + - 'runlevel_data' in selected_outputs - + 'runlevel_data' in dp_options['selected_outputs'] + - 'QCPlot' in selected_outputs + dp_options['out_plots_opt']['selected_vis_outputs'] and 'QCPlot' in dp_options['out_plots_opt']['selected_vis_outputs'] - 'ProfilePlot' in selected_outputs + dp_options['out_plots_opt']['selected_vis_outputs'] and 'ProfilePlot' in dp_options['out_plots_opt']['selected_vis_outputs'] - 'profile_wsum_plot' in selected_outputs + dp_options['out_plots_opt']['selected_vis_outputs'] and 'profile_wsum_plot' in dp_options['out_plots_opt']['selected_vis_outputs'] - 'ConditionPlot' in selected_outputs + dp_options['out_plots_opt']['selected_vis_outputs'] and 'ConditionPlot' in dp_options['out_plots_opt']['selected_vis_outputs'] - 'quant_sample_matrix' in selected_outputs + 'quant_sample_matrix' in dp_options['selected_outputs'] - 'quant_sample_long' in selected_outputs - + 'quant_sample_long' in dp_options['selected_outputs'] + - 'quant_group_matrix' in selected_outputs + 'quant_group_matrix' in dp_options['selected_outputs'] - 'quant_group_long' in selected_outputs - + 'quant_group_long' in dp_options['selected_outputs'] + group['group_comparison'] == 'yes' and 'comparison_result' in group['select_outputs'] - + group['group_comparison'] == 'yes' and 'fittedmodel' in group['select_outputs'] group['group_comparison'] == 'yes' and 'model_qc' in group['select_outputs'] - + - group['group_comparison'] == 'yes' and 'QQPlots' in group['select_outputs'] + group['group_comparison'] == 'yes' and group['comparison_plots_opt']['select_comparison_plots'] and 'QQPlots' in group['comparison_plots_opt']['select_comparison_plots'] - group['group_comparison'] == 'yes' and 'ResidualPlots' in group['select_outputs'] + group['group_comparison'] == 'yes' and group['comparison_plots_opt']['select_comparison_plots'] and 'ResidualPlots' in group['comparison_plots_opt']['select_comparison_plots'] - group['group_comparison'] == 'yes' and 'VolcanoPlot' in group['select_outputs'] + group['group_comparison'] == 'yes' and group['comparison_plots_opt']['select_comparison_plots'] and 'VolcanoPlot' in group['comparison_plots_opt']['select_comparison_plots'] - group['group_comparison'] == 'yes' and 'Heatmap' in group['select_outputs'] + group['group_comparison'] == 'yes' and group['comparison_plots_opt']['select_comparison_plots'] and 'Heatmap' in group['comparison_plots_opt']['select_comparison_plots'] - group['group_comparison'] == 'yes' and 'ComparisonPlot' in group['select_outputs'] + group['group_comparison'] == 'yes' and group['comparison_plots_opt']['select_comparison_plots'] and 'ComparisonPlot' in group['comparison_plots_opt']['select_comparison_plots'] @@ -680,7 +711,8 @@ - + + @@ -715,7 +747,8 @@ - + + @@ -740,12 +773,14 @@ - + + - + + @@ -776,12 +811,13 @@ - + + - + @@ -811,20 +847,26 @@ - + + - - - - - - +
+ + +
+ + + + + +
+
@@ -866,6 +908,7 @@ + @@ -887,7 +930,8 @@ - + + @@ -960,11 +1004,22 @@ - Annotations as tabular file are needed for all input options except MSstats format - 4 columns with exactly these headers: Raw.file, Condition, BioReplicate, Run; additional 5th column only for MaxQuant: IsotopeLabelType + - Example file header: - - Raw.file: + :: + + Raw.file Condition BioReplicate Run IsotopeLabelType + ** disease ReplA 1 L + ** disease ReplA 2 L + ** disease ReplB 3 L + ** disease ReplB 4 L + ... ... ... ... ... + + - Raw.file: + - OpenSWATH: File name needs to fit exactly how it is written in OpenSwatch output (e.g. "in/AA12_mzML.mzML") - - MaxQuant: File name needs to fit to how it is written in MaxQuant output, but the ".raw" has to be removed (e.g. "file1.raw.thermo.raw" --> "file1.raw.thermo") + - MaxQuant: File name needs to fit exactly how it is writtein in the evidence.txt "Raw file" column. (e.g. "file1.raw.thermo") - Condition: The name of the condition is not allowed to start with a number or contain any special characters - All other columns: see description above for MSstats format columns @@ -972,7 +1027,7 @@ - 1st column: name of comparison - Additionally one column for each condition that is present in the tabular file. Use 1 and -1 to indicate the conditions to compare and 0 for conditions that are not compared. Multiple groups can be combined by using 0.5. - - First row contains the names of the groups, they must exactly match the condition name used in the annotation file + - First row contains the names of the groups, they must exactly match the condition name used in the annotation file and every condition must be present, even though it will not be used for any comparison such as G4 in the example below. Order of the condition columns is irrelevant. - Each additional row represents one comparison - Example for a two group comparison @@ -988,7 +1043,7 @@ names G1 G2 G3 G4 G5 G2-G1 -1 1 0 0 0 - G4-G5 0 0 0 1 -1 + G3-G5 0 0 1 0 -1 G3-G5 0 0 -1 0 1 G1+G2-G5 0.5 0.5 0 0 -1 @@ -1018,14 +1073,15 @@ - Summarizing intensities per MS run - TMP: Tukey’s median polish. Robust parameter estimation method with median across rows and columns. Prerequisite for missing value imputation. - - linear: Linear model (lmfunction). Average-based summarization. + - linear: Linear model (lmfunction). Average-based summarization. + + - Account for heterogeneous variation among intensities from different features: Yes: assumes equal variance among intensities from features. No: means that we cannot assume equal variance among intensities from features, then we will account for heterogeneous variation from different features - Missing value imputation: - Impute Missing Values: Only possible for Summarization Method TMP. Censored missing values will be determined (by censored intensity; cutoff value for censoring and Maximum quantile for deciding censored missing values") and imputed by Accelerated Failure Time model. - Remove runs which have more than 50% missing values: Yes or no. - - Account for heterogeneous variation among intensities from different features: Yes: assumes equal variance among intensities from features. No: means that we cannot assume equal variance among intensities from features, then we will account for heterogeneous variation from different features - Censored Intensity: The processing tools report missing values differently. This option is for distinguishwhich value should be considered as missing, and further whether it is censored or at random - NA - It assumes that all NAs in Intensity column are censored. @@ -1043,6 +1099,7 @@ - Summarization method: TMP + censored intensity: 'NULL': It assumes that all intensities are missing at random, therefore no action with missing value imputation: No; or error with missing value imputation: Yes. - Missing value imputation: Yes + censored intensity:'NA' or '0': AFT model-based imputation using cutoff value for censoring in the AFT model - Missing value imputation: No + censored intensity:'NA' or '0': censored intensities will be replaced with the value specified in cutoff value for censoring + - Missing value imputation: No + censored intensity: NULL: no imputation - Group comparison: automatic detection of differentially abundant proteins between two conditions, conditions have to be specified with the 'comparison matrix' - Quantification per sample or group: choose the corresponding output option @@ -1057,6 +1114,7 @@ - MSstats log - check log file for warnings and information on the analysis steps (txt) - MSstats Rscript - can be used to re-run analysis outside Galaxy or to inspect the executed code (txt) + - MSstats RawData - raw files combined into MSstats format (tabular) - MSstats ProcessedData - transformed, normalized, imputed intensities (tabular) - Intensity column: includes original intensities values diff -r 8212e342e482 -r 593839e1f2c3 test-data/Comparison_plot_skyline.pdf Binary file test-data/Comparison_plot_skyline.pdf has changed diff -r 8212e342e482 -r 593839e1f2c3 test-data/Heatmap_openms.pdf Binary file test-data/Heatmap_openms.pdf has changed diff -r 8212e342e482 -r 593839e1f2c3 test-data/MSstats ProfilePlot.pdf Binary file test-data/MSstats ProfilePlot.pdf has changed diff -r 8212e342e482 -r 593839e1f2c3 test-data/Profile_plot_skyline.pdf Binary file test-data/Profile_plot_skyline.pdf has changed diff -r 8212e342e482 -r 593839e1f2c3 test-data/QC_plot.pdf Binary file test-data/QC_plot.pdf has changed diff -r 8212e342e482 -r 593839e1f2c3 test-data/Volcano_plot_skyline.pdf Binary file test-data/Volcano_plot_skyline.pdf has changed diff -r 8212e342e482 -r 593839e1f2c3 test-data/condition_plot.pdf Binary file test-data/condition_plot.pdf has changed diff -r 8212e342e482 -r 593839e1f2c3 test-data/condition_plot_openms.pdf Binary file test-data/condition_plot_openms.pdf has changed diff -r 8212e342e482 -r 593839e1f2c3 test-data/profile_wsum_plot.pdf Binary file test-data/profile_wsum_plot.pdf has changed diff -r 8212e342e482 -r 593839e1f2c3 test-data/qq_plot.pdf Binary file test-data/qq_plot.pdf has changed diff -r 8212e342e482 -r 593839e1f2c3 test-data/residual_plot.pdf Binary file test-data/residual_plot.pdf has changed diff -r 8212e342e482 -r 593839e1f2c3 test-data/residualplot.pdf Binary file test-data/residualplot.pdf has changed diff -r 8212e342e482 -r 593839e1f2c3 test-data/volcanoplot.pdf Binary file test-data/volcanoplot.pdf has changed