Mercurial > repos > galaxyp > mqppep_preproc
comparison mqppep_anova_script.Rmd @ 2:a5e7469dfdfa draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 423304b26e63d23cd8e5fb4c2fb729c5beea1254
author | galaxyp |
---|---|
date | Mon, 12 Dec 2022 22:01:21 +0000 |
parents | b76c75521d91 |
children | bae3a23461c9 |
comparison
equal
deleted
inserted
replaced
1:b76c75521d91 | 2:a5e7469dfdfa |
---|---|
4 - "Nick Graham^[ORCiD 0000-0002-6811-1941, University of Southern California: Los Angeles, CA, US]" | 4 - "Nick Graham^[ORCiD 0000-0002-6811-1941, University of Southern California: Los Angeles, CA, US]" |
5 - "Larry Cheng^[ORCiD 0000-0002-6922-6433, Rutgers School of Graduate Studies: New Brunswick, NJ, US]" | 5 - "Larry Cheng^[ORCiD 0000-0002-6922-6433, Rutgers School of Graduate Studies: New Brunswick, NJ, US]" |
6 - "Art Eschenlauer^[ORCiD 0000-0002-2882-0508, University of Minnesota: Minneapolis, Minnesota, US]" | 6 - "Art Eschenlauer^[ORCiD 0000-0002-2882-0508, University of Minnesota: Minneapolis, Minnesota, US]" |
7 date: | 7 date: |
8 - "May 28, 2018" | 8 - "May 28, 2018" |
9 - "; revised June 23, 2022" | 9 - "; revised December 7, 2022" |
10 lot: true | 10 lot: true |
11 output: | 11 output: |
12 pdf_document: | 12 pdf_document: |
13 toc: true | 13 toc: true |
14 toc_depth: 2 | 14 toc_depth: 2 |
95 # look-up tables for kinase descriptions | 95 # look-up tables for kinase descriptions |
96 kinaseNameUprtLutBz2: "./kinase_name_uniprot_lut.tabular.bz2" | 96 kinaseNameUprtLutBz2: "./kinase_name_uniprot_lut.tabular.bz2" |
97 kinaseUprtDescLutBz2: "./kinase_uniprot_description_lut.tabular.bz2" | 97 kinaseUprtDescLutBz2: "./kinase_uniprot_description_lut.tabular.bz2" |
98 # should debugging trace messages be printed? | 98 # should debugging trace messages be printed? |
99 showEnrichedSubstrates: FALSE | 99 showEnrichedSubstrates: FALSE |
100 | |
101 # should debugging nb/nbe messages be printed? | 100 # should debugging nb/nbe messages be printed? |
102 printNBMsgs: FALSE | 101 printNBMsgs: FALSE |
102 # showld row-scaling be applied to heatmaps: "none" or "row" | |
103 defaultHeatMapRowScaling: "none" | |
103 # should debugging trace messages be printed? | 104 # should debugging trace messages be printed? |
104 printTraceMsgs: FALSE | 105 printTraceMsgs: FALSE |
105 # when debugging files are needed, set debugFileBasePath to the path | 106 # when debugging files are needed, set debugFileBasePath to the path |
106 # to the directory where they should be writtn | 107 # to the directory where they should be written |
107 debugFileBasePath: !r if (TRUE) NULL else "test-data" | 108 debugFileBasePath: !r if (TRUE) NULL else "test-data" |
108 --- | 109 --- |
109 | 110 |
110 ```{r setup, include = FALSE, results = 'asis'} | 111 ```{r setup, include = FALSE, results = 'asis'} |
111 | 112 |
553 ) | 554 ) |
554 ) | 555 ) |
555 knitr::knit_exit() | 556 knitr::knit_exit() |
556 } | 557 } |
557 ) | 558 ) |
559 | |
560 g_default_heatmap_row_scaling <- | |
561 params$defaultHeatMapRowScaling | |
562 if ( | |
563 !is.character(g_default_heatmap_row_scaling) || | |
564 !(g_default_heatmap_row_scaling %in% c("row", "none")) | |
565 ) { | |
566 cat("invalid defaultHeatMapRowScaling (must be 'row' or 'none')") | |
567 knitr::knit_exit() | |
568 } | |
558 | 569 |
559 # intensityHeatmapRows: 50 | 570 # intensityHeatmapRows: 50 |
560 # TODO Validate >> 0 < 75 | 571 # TODO Validate >> 0 < 75 |
561 g_intensity_hm_rows <- params$intensityHeatmapRows | 572 g_intensity_hm_rows <- params$intensityHeatmapRows |
562 if (!is.integer(g_intensity_hm_rows) || g_intensity_hm_rows < 1) { | 573 if (!is.integer(g_intensity_hm_rows) || g_intensity_hm_rows < 1) { |
2285 max_nchar_rowname <- max(nchar(rownames(x))) | 2296 max_nchar_rowname <- max(nchar(rownames(x))) |
2286 max_nchar_colname <- max(nchar(colnames(x))) | 2297 max_nchar_colname <- max(nchar(colnames(x))) |
2287 my_limit <- g_intensity_hm_rows | 2298 my_limit <- g_intensity_hm_rows |
2288 | 2299 |
2289 my_row_cex_scale <- master_cex * 150 / nrow_x | 2300 my_row_cex_scale <- master_cex * 150 / nrow_x |
2301 #ACE row cex shrink hack begin | |
2302 my_row_cex_scale <- master_cex * 150 / max(nrow_x, ncol_x) | |
2303 #ACE row cex shrink hack end | |
2304 | |
2290 my_col_cex_scale <- 3.0 | 2305 my_col_cex_scale <- 3.0 |
2306 #ACE col cex shrink hack begin | |
2307 if (ncol_x > 40) | |
2308 my_col_cex_scale <- 3.0 * 40 / ncol_x | |
2309 #ACE col cex shrink hack end | |
2310 | |
2291 my_asterisk_scale <- 0.4 * my_row_cex_scale | 2311 my_asterisk_scale <- 0.4 * my_row_cex_scale |
2292 my_row_warp <- 1 | 2312 my_row_warp <- 1 |
2293 my_note_warp <- 2 | 2313 my_note_warp <- 2 |
2294 my_row_warp <- 1 | 2314 my_row_warp <- 1 |
2295 my_row_cex_asterisk <- | 2315 my_row_cex_asterisk <- |
2604 g_intensity_hm_rows, # values of 50 and 75 worked well | 2624 g_intensity_hm_rows, # values of 50 and 75 worked well |
2605 master_cex = 1.0, # basis for text sizes | 2625 master_cex = 1.0, # basis for text sizes |
2606 margins = NULL, # optional margins (bottom, right) | 2626 margins = NULL, # optional margins (bottom, right) |
2607 cellnote = NULL, # optional matrix of character; dim = dim(m) | 2627 cellnote = NULL, # optional matrix of character; dim = dim(m) |
2608 adj = 0.5, # adjust text: 0 left, 0.5 middle, 1 right | 2628 adj = 0.5, # adjust text: 0 left, 0.5 middle, 1 right |
2629 row_scaling = # should row-scaling be applied if possible? | |
2630 g_default_heatmap_row_scaling, | |
2609 ... # passthru to hm2plus or heatmap.2 | 2631 ... # passthru to hm2plus or heatmap.2 |
2610 ) { | 2632 ) { |
2611 use_heatmap_1 <- FALSE | 2633 use_heatmap_1 <- FALSE |
2612 peptide_count <- 0 | 2634 peptide_count <- 0 |
2613 # emit the heading for the heatmap | 2635 # emit the heading for the heatmap |
2778 } | 2800 } |
2779 par(op) | 2801 par(op) |
2780 } | 2802 } |
2781 | 2803 |
2782 # invoke hm_call inner function | 2804 # invoke hm_call inner function |
2783 if (sum(rowSums(!is.na(m_hm)) < 2)) | 2805 if (row_scaling != "row" || sum(rowSums(!is.na(m_hm)) < 2)) |
2784 hm_call( | 2806 hm_call( |
2785 m_hm, | 2807 m_hm, |
2786 "none", | 2808 "none", |
2787 "log(intensities), unscaled, unimputed, and unnormalized" | 2809 "log(intensities), unimputed, and unnormalized" |
2788 ) | 2810 ) |
2789 else | 2811 else |
2790 hm_call( | 2812 hm_call( |
2791 m_hm, | 2813 m_hm, |
2792 "row", | 2814 "row", |
2800 { | 2822 { |
2801 if (nrow(m_hm) > 1) | 2823 if (nrow(m_hm) > 1) |
2802 hm_call( | 2824 hm_call( |
2803 m_hm, | 2825 m_hm, |
2804 "none", | 2826 "none", |
2805 paste( | 2827 "log(intensities), zero-imputed, unnormalized" |
2806 "log(intensities), unscaled,", | |
2807 "zero-imputed, unnormalized" | |
2808 ) | |
2809 ) | 2828 ) |
2810 else | 2829 else |
2811 cat("\nThere are too few peptides to produce a heatmap.\n") | 2830 cat("\nThere are too few peptides to produce a heatmap.\n") |
2812 }, | 2831 }, |
2813 error = function(r) { | 2832 error = function(r) { |
3713 | 3732 |
3714 imp_smry_pot_peptides_after <- sum(good_rows) | 3733 imp_smry_pot_peptides_after <- sum(good_rows) |
3715 imp_smry_rejected_after <- sum(!good_rows) | 3734 imp_smry_rejected_after <- sum(!good_rows) |
3716 imp_smry_missing_values_after <- sum(is.na(quant_data_imp[good_rows, ])) | 3735 imp_smry_missing_values_after <- sum(is.na(quant_data_imp[good_rows, ])) |
3717 | 3736 |
3718 # From ?`%in%`, %in% is currently defined as function(x, table) match(x, table, nomatch = 0) > 0 | 3737 # From ?`%in%`: |
3719 | 3738 # %in% is currently defined as function(x, table) match(x, table, nomatch = 0) > 0 |
3720 sink(stderr()) | |
3721 print("`%in%`:") | |
3722 print(`%in%`) | |
3723 sink() | |
3724 | 3739 |
3725 stock_in <- | 3740 stock_in <- |
3726 names(good_rows) %in% | 3741 names(good_rows) %in% |
3727 names(min_group_obs_count[g_intensity_min_per_class <= min_group_obs_count]) | 3742 names(min_group_obs_count[g_intensity_min_per_class <= min_group_obs_count]) |
3728 if (print_nb_messages) nbe(see_variable(stock_in), "\n") | 3743 if (print_nb_messages) nbe(see_variable(stock_in), "\n") |
5092 grouping_factor = | 5107 grouping_factor = |
5093 as.factor(grouping_factor$level), # anova_func arg2 | 5108 as.factor(grouping_factor$level), # anova_func arg2 |
5094 one_way_f = one_way_two_categories, # anova_func arg3 | 5109 one_way_f = one_way_two_categories, # anova_func arg3 |
5095 simplify = TRUE # TRUE is the default for simplify | 5110 simplify = TRUE # TRUE is the default for simplify |
5096 ) | 5111 ) |
5097 contrast_data_adj_p_values <- p.adjust( | 5112 |
5098 p = p_value_data_contrast_ps, | 5113 if (!is.null(p_value_data_contrast_ps)) { |
5099 method = "fdr", | 5114 contrast_data_adj_p_values <- |
5100 n = length(p_value_data_contrast_ps) # this is the default, length(p) | 5115 p.adjust( |
5101 ) | 5116 p = p_value_data_contrast_ps, |
5102 # - compute the fold-change | 5117 method = "fdr", |
5103 contrast_p_df <- | 5118 n = length(p_value_data_contrast_ps) # this is the default, length(p) |
5104 data.frame( | 5119 ) |
5105 contrast = contrast, | 5120 |
5106 phosphopep = contrast_cast$phosphopep, | 5121 # - compute the fold-change |
5107 p_value_raw = p_value_data_contrast_ps, | 5122 contrast_p_df <- |
5108 p_value_adj = contrast_data_adj_p_values | 5123 data.frame( |
5109 ) | 5124 contrast = contrast, |
5110 db_write_table_overwrite <- (contrast < 2) | 5125 phosphopep = contrast_cast$phosphopep, |
5111 db_write_table_append <- !db_write_table_overwrite | 5126 p_value_raw = p_value_data_contrast_ps, |
5112 RSQLite::dbWriteTable( | 5127 p_value_adj = contrast_data_adj_p_values |
5113 conn = db, | 5128 ) |
5114 name = "contrast_ppep_p_val", | 5129 db_write_table_overwrite <- (contrast < 2) |
5115 value = contrast_p_df, | 5130 db_write_table_append <- !db_write_table_overwrite |
5116 append = db_write_table_append | 5131 RSQLite::dbWriteTable( |
5117 ) | 5132 conn = db, |
5118 # Create UK for insert | 5133 name = "contrast_ppep_p_val", |
5119 ddl_exec(db, " | 5134 value = contrast_p_df, |
5120 CREATE UNIQUE INDEX IF NOT EXISTS contrast_ppep_p_val__uk__idx | 5135 append = db_write_table_append |
5121 ON contrast_ppep_p_val(phosphopep, contrast); | 5136 ) |
5122 " | 5137 # Create UK for insert |
5123 ) | 5138 ddl_exec(db, " |
5139 CREATE UNIQUE INDEX IF NOT EXISTS contrast_ppep_p_val__uk__idx | |
5140 ON contrast_ppep_p_val(phosphopep, contrast); | |
5141 " | |
5142 ) | |
5143 } | |
5124 } | 5144 } |
5125 # Perhaps this could be done more elegantly using unique keys | 5145 # Perhaps this could be done more elegantly using unique keys |
5126 # or creating the tables before saving data to them, but this | 5146 # or creating the tables before saving data to them, but this |
5127 # is fast and, if the database exists on disk rather than in | 5147 # is fast and, if the database exists on disk rather than in |
5128 # memory, it doesn't stress memory. | 5148 # memory, it doesn't stress memory. |
6093 hm_main_title | 6113 hm_main_title |
6094 = "Unnormalized (zero-imputed) intensities of enriched kinase-substrates", | 6114 = "Unnormalized (zero-imputed) intensities of enriched kinase-substrates", |
6095 suppress_row_dendrogram = FALSE, | 6115 suppress_row_dendrogram = FALSE, |
6096 master_cex = 0.35, | 6116 master_cex = 0.35, |
6097 sepcolor = "black", | 6117 sepcolor = "black", |
6098 colsep = sample_colsep | 6118 colsep = sample_colsep, |
6119 row_scaling = "none" | |
6099 ) | 6120 ) |
6100 if (number_of_peptides_found > 1) { | 6121 if (number_of_peptides_found > 1) { |
6101 | 6122 |
6102 tryCatch( | 6123 tryCatch( |
6103 { | 6124 { |