comparison mqppep_anova_script.Rmd @ 2:a5e7469dfdfa draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 423304b26e63d23cd8e5fb4c2fb729c5beea1254
author galaxyp
date Mon, 12 Dec 2022 22:01:21 +0000
parents b76c75521d91
children bae3a23461c9
comparison
equal deleted inserted replaced
1:b76c75521d91 2:a5e7469dfdfa
4 - "Nick Graham^[ORCiD 0000-0002-6811-1941, University of Southern California: Los Angeles, CA, US]" 4 - "Nick Graham^[ORCiD 0000-0002-6811-1941, University of Southern California: Los Angeles, CA, US]"
5 - "Larry Cheng^[ORCiD 0000-0002-6922-6433, Rutgers School of Graduate Studies: New Brunswick, NJ, US]" 5 - "Larry Cheng^[ORCiD 0000-0002-6922-6433, Rutgers School of Graduate Studies: New Brunswick, NJ, US]"
6 - "Art Eschenlauer^[ORCiD 0000-0002-2882-0508, University of Minnesota: Minneapolis, Minnesota, US]" 6 - "Art Eschenlauer^[ORCiD 0000-0002-2882-0508, University of Minnesota: Minneapolis, Minnesota, US]"
7 date: 7 date:
8 - "May 28, 2018" 8 - "May 28, 2018"
9 - "; revised June 23, 2022" 9 - "; revised December 7, 2022"
10 lot: true 10 lot: true
11 output: 11 output:
12 pdf_document: 12 pdf_document:
13 toc: true 13 toc: true
14 toc_depth: 2 14 toc_depth: 2
95 # look-up tables for kinase descriptions 95 # look-up tables for kinase descriptions
96 kinaseNameUprtLutBz2: "./kinase_name_uniprot_lut.tabular.bz2" 96 kinaseNameUprtLutBz2: "./kinase_name_uniprot_lut.tabular.bz2"
97 kinaseUprtDescLutBz2: "./kinase_uniprot_description_lut.tabular.bz2" 97 kinaseUprtDescLutBz2: "./kinase_uniprot_description_lut.tabular.bz2"
98 # should debugging trace messages be printed? 98 # should debugging trace messages be printed?
99 showEnrichedSubstrates: FALSE 99 showEnrichedSubstrates: FALSE
100
101 # should debugging nb/nbe messages be printed? 100 # should debugging nb/nbe messages be printed?
102 printNBMsgs: FALSE 101 printNBMsgs: FALSE
102 # showld row-scaling be applied to heatmaps: "none" or "row"
103 defaultHeatMapRowScaling: "none"
103 # should debugging trace messages be printed? 104 # should debugging trace messages be printed?
104 printTraceMsgs: FALSE 105 printTraceMsgs: FALSE
105 # when debugging files are needed, set debugFileBasePath to the path 106 # when debugging files are needed, set debugFileBasePath to the path
106 # to the directory where they should be writtn 107 # to the directory where they should be written
107 debugFileBasePath: !r if (TRUE) NULL else "test-data" 108 debugFileBasePath: !r if (TRUE) NULL else "test-data"
108 --- 109 ---
109 110
110 ```{r setup, include = FALSE, results = 'asis'} 111 ```{r setup, include = FALSE, results = 'asis'}
111 112
553 ) 554 )
554 ) 555 )
555 knitr::knit_exit() 556 knitr::knit_exit()
556 } 557 }
557 ) 558 )
559
560 g_default_heatmap_row_scaling <-
561 params$defaultHeatMapRowScaling
562 if (
563 !is.character(g_default_heatmap_row_scaling) ||
564 !(g_default_heatmap_row_scaling %in% c("row", "none"))
565 ) {
566 cat("invalid defaultHeatMapRowScaling (must be 'row' or 'none')")
567 knitr::knit_exit()
568 }
558 569
559 # intensityHeatmapRows: 50 570 # intensityHeatmapRows: 50
560 # TODO Validate >> 0 < 75 571 # TODO Validate >> 0 < 75
561 g_intensity_hm_rows <- params$intensityHeatmapRows 572 g_intensity_hm_rows <- params$intensityHeatmapRows
562 if (!is.integer(g_intensity_hm_rows) || g_intensity_hm_rows < 1) { 573 if (!is.integer(g_intensity_hm_rows) || g_intensity_hm_rows < 1) {
2285 max_nchar_rowname <- max(nchar(rownames(x))) 2296 max_nchar_rowname <- max(nchar(rownames(x)))
2286 max_nchar_colname <- max(nchar(colnames(x))) 2297 max_nchar_colname <- max(nchar(colnames(x)))
2287 my_limit <- g_intensity_hm_rows 2298 my_limit <- g_intensity_hm_rows
2288 2299
2289 my_row_cex_scale <- master_cex * 150 / nrow_x 2300 my_row_cex_scale <- master_cex * 150 / nrow_x
2301 #ACE row cex shrink hack begin
2302 my_row_cex_scale <- master_cex * 150 / max(nrow_x, ncol_x)
2303 #ACE row cex shrink hack end
2304
2290 my_col_cex_scale <- 3.0 2305 my_col_cex_scale <- 3.0
2306 #ACE col cex shrink hack begin
2307 if (ncol_x > 40)
2308 my_col_cex_scale <- 3.0 * 40 / ncol_x
2309 #ACE col cex shrink hack end
2310
2291 my_asterisk_scale <- 0.4 * my_row_cex_scale 2311 my_asterisk_scale <- 0.4 * my_row_cex_scale
2292 my_row_warp <- 1 2312 my_row_warp <- 1
2293 my_note_warp <- 2 2313 my_note_warp <- 2
2294 my_row_warp <- 1 2314 my_row_warp <- 1
2295 my_row_cex_asterisk <- 2315 my_row_cex_asterisk <-
2604 g_intensity_hm_rows, # values of 50 and 75 worked well 2624 g_intensity_hm_rows, # values of 50 and 75 worked well
2605 master_cex = 1.0, # basis for text sizes 2625 master_cex = 1.0, # basis for text sizes
2606 margins = NULL, # optional margins (bottom, right) 2626 margins = NULL, # optional margins (bottom, right)
2607 cellnote = NULL, # optional matrix of character; dim = dim(m) 2627 cellnote = NULL, # optional matrix of character; dim = dim(m)
2608 adj = 0.5, # adjust text: 0 left, 0.5 middle, 1 right 2628 adj = 0.5, # adjust text: 0 left, 0.5 middle, 1 right
2629 row_scaling = # should row-scaling be applied if possible?
2630 g_default_heatmap_row_scaling,
2609 ... # passthru to hm2plus or heatmap.2 2631 ... # passthru to hm2plus or heatmap.2
2610 ) { 2632 ) {
2611 use_heatmap_1 <- FALSE 2633 use_heatmap_1 <- FALSE
2612 peptide_count <- 0 2634 peptide_count <- 0
2613 # emit the heading for the heatmap 2635 # emit the heading for the heatmap
2778 } 2800 }
2779 par(op) 2801 par(op)
2780 } 2802 }
2781 2803
2782 # invoke hm_call inner function 2804 # invoke hm_call inner function
2783 if (sum(rowSums(!is.na(m_hm)) < 2)) 2805 if (row_scaling != "row" || sum(rowSums(!is.na(m_hm)) < 2))
2784 hm_call( 2806 hm_call(
2785 m_hm, 2807 m_hm,
2786 "none", 2808 "none",
2787 "log(intensities), unscaled, unimputed, and unnormalized" 2809 "log(intensities), unimputed, and unnormalized"
2788 ) 2810 )
2789 else 2811 else
2790 hm_call( 2812 hm_call(
2791 m_hm, 2813 m_hm,
2792 "row", 2814 "row",
2800 { 2822 {
2801 if (nrow(m_hm) > 1) 2823 if (nrow(m_hm) > 1)
2802 hm_call( 2824 hm_call(
2803 m_hm, 2825 m_hm,
2804 "none", 2826 "none",
2805 paste( 2827 "log(intensities), zero-imputed, unnormalized"
2806 "log(intensities), unscaled,",
2807 "zero-imputed, unnormalized"
2808 )
2809 ) 2828 )
2810 else 2829 else
2811 cat("\nThere are too few peptides to produce a heatmap.\n") 2830 cat("\nThere are too few peptides to produce a heatmap.\n")
2812 }, 2831 },
2813 error = function(r) { 2832 error = function(r) {
3713 3732
3714 imp_smry_pot_peptides_after <- sum(good_rows) 3733 imp_smry_pot_peptides_after <- sum(good_rows)
3715 imp_smry_rejected_after <- sum(!good_rows) 3734 imp_smry_rejected_after <- sum(!good_rows)
3716 imp_smry_missing_values_after <- sum(is.na(quant_data_imp[good_rows, ])) 3735 imp_smry_missing_values_after <- sum(is.na(quant_data_imp[good_rows, ]))
3717 3736
3718 # From ?`%in%`, %in% is currently defined as function(x, table) match(x, table, nomatch = 0) > 0 3737 # From ?`%in%`:
3719 3738 # %in% is currently defined as function(x, table) match(x, table, nomatch = 0) > 0
3720 sink(stderr())
3721 print("`%in%`:")
3722 print(`%in%`)
3723 sink()
3724 3739
3725 stock_in <- 3740 stock_in <-
3726 names(good_rows) %in% 3741 names(good_rows) %in%
3727 names(min_group_obs_count[g_intensity_min_per_class <= min_group_obs_count]) 3742 names(min_group_obs_count[g_intensity_min_per_class <= min_group_obs_count])
3728 if (print_nb_messages) nbe(see_variable(stock_in), "\n") 3743 if (print_nb_messages) nbe(see_variable(stock_in), "\n")
5092 grouping_factor = 5107 grouping_factor =
5093 as.factor(grouping_factor$level), # anova_func arg2 5108 as.factor(grouping_factor$level), # anova_func arg2
5094 one_way_f = one_way_two_categories, # anova_func arg3 5109 one_way_f = one_way_two_categories, # anova_func arg3
5095 simplify = TRUE # TRUE is the default for simplify 5110 simplify = TRUE # TRUE is the default for simplify
5096 ) 5111 )
5097 contrast_data_adj_p_values <- p.adjust( 5112
5098 p = p_value_data_contrast_ps, 5113 if (!is.null(p_value_data_contrast_ps)) {
5099 method = "fdr", 5114 contrast_data_adj_p_values <-
5100 n = length(p_value_data_contrast_ps) # this is the default, length(p) 5115 p.adjust(
5101 ) 5116 p = p_value_data_contrast_ps,
5102 # - compute the fold-change 5117 method = "fdr",
5103 contrast_p_df <- 5118 n = length(p_value_data_contrast_ps) # this is the default, length(p)
5104 data.frame( 5119 )
5105 contrast = contrast, 5120
5106 phosphopep = contrast_cast$phosphopep, 5121 # - compute the fold-change
5107 p_value_raw = p_value_data_contrast_ps, 5122 contrast_p_df <-
5108 p_value_adj = contrast_data_adj_p_values 5123 data.frame(
5109 ) 5124 contrast = contrast,
5110 db_write_table_overwrite <- (contrast < 2) 5125 phosphopep = contrast_cast$phosphopep,
5111 db_write_table_append <- !db_write_table_overwrite 5126 p_value_raw = p_value_data_contrast_ps,
5112 RSQLite::dbWriteTable( 5127 p_value_adj = contrast_data_adj_p_values
5113 conn = db, 5128 )
5114 name = "contrast_ppep_p_val", 5129 db_write_table_overwrite <- (contrast < 2)
5115 value = contrast_p_df, 5130 db_write_table_append <- !db_write_table_overwrite
5116 append = db_write_table_append 5131 RSQLite::dbWriteTable(
5117 ) 5132 conn = db,
5118 # Create UK for insert 5133 name = "contrast_ppep_p_val",
5119 ddl_exec(db, " 5134 value = contrast_p_df,
5120 CREATE UNIQUE INDEX IF NOT EXISTS contrast_ppep_p_val__uk__idx 5135 append = db_write_table_append
5121 ON contrast_ppep_p_val(phosphopep, contrast); 5136 )
5122 " 5137 # Create UK for insert
5123 ) 5138 ddl_exec(db, "
5139 CREATE UNIQUE INDEX IF NOT EXISTS contrast_ppep_p_val__uk__idx
5140 ON contrast_ppep_p_val(phosphopep, contrast);
5141 "
5142 )
5143 }
5124 } 5144 }
5125 # Perhaps this could be done more elegantly using unique keys 5145 # Perhaps this could be done more elegantly using unique keys
5126 # or creating the tables before saving data to them, but this 5146 # or creating the tables before saving data to them, but this
5127 # is fast and, if the database exists on disk rather than in 5147 # is fast and, if the database exists on disk rather than in
5128 # memory, it doesn't stress memory. 5148 # memory, it doesn't stress memory.
6093 hm_main_title 6113 hm_main_title
6094 = "Unnormalized (zero-imputed) intensities of enriched kinase-substrates", 6114 = "Unnormalized (zero-imputed) intensities of enriched kinase-substrates",
6095 suppress_row_dendrogram = FALSE, 6115 suppress_row_dendrogram = FALSE,
6096 master_cex = 0.35, 6116 master_cex = 0.35,
6097 sepcolor = "black", 6117 sepcolor = "black",
6098 colsep = sample_colsep 6118 colsep = sample_colsep,
6119 row_scaling = "none"
6099 ) 6120 )
6100 if (number_of_peptides_found > 1) { 6121 if (number_of_peptides_found > 1) {
6101 6122
6102 tryCatch( 6123 tryCatch(
6103 { 6124 {