Mercurial > repos > artbio > pathifier
diff pathifier.R @ 1:0960bd1161fa draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/pathifier commit f7363f7c391bd501fc4d1c10aaf372ec2bd82732
author | artbio |
---|---|
date | Mon, 12 Feb 2024 23:57:01 +0000 |
parents | fec313f5c889 |
children |
line wrap: on
line diff
--- a/pathifier.R Mon Apr 12 09:55:24 2021 +0000 +++ b/pathifier.R Mon Feb 12 23:57:01 2024 +0000 @@ -4,10 +4,10 @@ ################################################################################################## -options(show.error.messages = F, error = function() { - cat(geterrmessage(), file = stderr()); q("no", 1, F) - } -) +options(show.error.messages = FALSE, error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) +}) # we need that to not crash galaxy with an UTF8 error on German LC settings. loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") @@ -18,106 +18,112 @@ library(circlize) option_list <- list( - make_option( - "--exp", - type = "character", - help = "Expression matrix"), - make_option( - "--sep", - type = "character", - default = "\t", - help = "File separator [default : '%default']" - ), - make_option( - "--genes", - type = "character", - help = "Gene sets Pathways : gmt format (one pathway per line : Name, description, genes (one by column), tab separated)"), - make_option( - "--is_normal", - default = F, - type = "logical", - help = "Define normals cells in your data"), - make_option( - "--normals", - type = "character", - help = "A vector of sample status : 1 = Healthy, 0 = Tumor. Must be in the same order as in expression data"), - make_option( - "--logfile", - type = "character", - default = "log.txt", - help = "Log file name [default : '%default']" - ), - make_option( - "--max_stability", - type = "logical", - default = T, - help = "If true, throw away components leading to low stability of sampling noise [default : '%default']" - ), - make_option( - "--attempts", - type = "integer", - default = 10, - help = "Number of runs to determine stability. [default : '%default']" - ), - make_option( - "--min_std", - type = "character", - default = "0.4", - help = "Minimum of standard deviation to filter out low variable genes. + make_option( + "--exp", + type = "character", + help = "Expression matrix" + ), + make_option( + "--sep", + type = "character", + default = "\t", + help = "File separator [default : '%default']" + ), + make_option( + "--genes", + type = "character", + help = "Gene sets Pathways : gmt format (one pathway per line : Name, description, genes (one by column), tab separated)" + ), + make_option( + "--is_normal", + default = FALSE, + type = "logical", + help = "Define normals cells in your data" + ), + make_option( + "--normals", + type = "character", + help = "A vector of sample status : 1 = Healthy, 0 = Tumor. Must be in the same order as in expression data" + ), + make_option( + "--logfile", + type = "character", + default = "log.txt", + help = "Log file name [default : '%default']" + ), + make_option( + "--max_stability", + type = "logical", + default = TRUE, + help = "If true, throw away components leading to low stability of sampling noise [default : '%default']" + ), + make_option( + "--attempts", + type = "integer", + default = 10, + help = "Number of runs to determine stability. [default : '%default']" + ), + make_option( + "--min_std", + type = "character", + default = "0.4", + help = "Minimum of standard deviation to filter out low variable genes. Use --min.std data, to use the minimum std of your data [default : '%default']" - ), - make_option( - "--min_exp", - type = "character", - default = "4", - help = "Minimum of gene expression to filter out low expressed genes. + ), + make_option( + "--min_exp", + type = "character", + default = "4", + help = "Minimum of gene expression to filter out low expressed genes. Use --min.exp data, to use the minimum expression of your data [default : '%default']" - ), - make_option( - "--pds", - type = "character", - default = "PDS.tsv", - help = "Output PDS (Pathway deregulation score) of Pathifier in tabular file [default : '%default']" - ), - make_option( - "--heatmap_cluster_cells", - type = "logical", - default = TRUE, - help = "Cluster columns (cells) in the heatmap [default : '%default']" - ), - make_option( - "--heatmap_cluster_pathways", - type = "logical", - default = TRUE, - help = "Cluster rows (pathways) in the heatmap [default : '%default']" - ), - make_option( - "--heatmap_show_cell_labels", - type = "logical", - default = FALSE, - help = "Print column names (cells) on the heatmap [default : '%default']" - ), - make_option( - "--heatmap_show_pathway_labels", - type = "logical", - default = FALSE, - help = "Print row names (pathways) on the heatmap [default : '%default']" - ), - make_option( - "--plot", - type = "character", - default = "./plot.pdf", - help = "Pathifier visualization [default : '%default']" - ), - make_option( - "--rdata", - type = "character", - default = "./results.rdata", - help = "Pathifier object (S4) [default : '%default']")) + ), + make_option( + "--pds", + type = "character", + default = "PDS.tsv", + help = "Output PDS (Pathway deregulation score) of Pathifier in tabular file [default : '%default']" + ), + make_option( + "--heatmap_cluster_cells", + type = "logical", + default = TRUE, + help = "Cluster columns (cells) in the heatmap [default : '%default']" + ), + make_option( + "--heatmap_cluster_pathways", + type = "logical", + default = TRUE, + help = "Cluster rows (pathways) in the heatmap [default : '%default']" + ), + make_option( + "--heatmap_show_cell_labels", + type = "logical", + default = FALSE, + help = "Print column names (cells) on the heatmap [default : '%default']" + ), + make_option( + "--heatmap_show_pathway_labels", + type = "logical", + default = FALSE, + help = "Print row names (pathways) on the heatmap [default : '%default']" + ), + make_option( + "--plot", + type = "character", + default = "./plot.pdf", + help = "Pathifier visualization [default : '%default']" + ), + make_option( + "--rdata", + type = "character", + default = "./results.rdata", + help = "Pathifier object (S4) [default : '%default']" + ) +) parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) args <- parse_args(parser) if (args$sep == "tab") { - args$sep <- "\t" + args$sep <- "\t" } @@ -125,11 +131,13 @@ set.seed(123) # Load expression data for PATHIFIER -exp_matrix <- as.matrix(read.delim(file = args$exp, - as.is = T, - row.names = 1, - sep = args$sep, - check.names = F)) +exp_matrix <- as.matrix(read.delim( + file = args$exp, + as.is = TRUE, + row.names = 1, + sep = args$sep, + check.names = FALSE +)) # Load Genesets annotation gene_sets_file <- file(args$genes, open = "r") @@ -146,19 +154,23 @@ # Prepare data and parameters ################################################## # Extract information from binary phenotypes. 1 = Normal, 0 = Tumor -if (args$is_normal == T) { - normals <- read.delim(file = args$normals, h = F) - normals <- as.logical(normals[, 2]) - n_exp_matrix <- exp_matrix[, normals] -} else n_exp_matrix <- exp_matrix +if (args$is_normal == TRUE) { + normals <- read.delim(file = args$normals, h = FALSE) + normals <- as.logical(normals[, 2]) + n_exp_matrix <- exp_matrix[, normals] +} else { + n_exp_matrix <- exp_matrix +} # Calculate MIN_STD rsd <- apply(n_exp_matrix, 1, sd) min_std <- quantile(rsd, 0.25) # Calculate MIN_EXP -min_exp <- quantile(as.vector(as.matrix(exp_matrix)), - 0.1) # Percentile 10 of data +min_exp <- quantile( + as.vector(as.matrix(exp_matrix)), + 0.1 +) # Percentile 10 of data # Filter low value genes. At least 10% of samples with values over min_exp # Set expression levels < MIN_EXP to MIN_EXP @@ -169,19 +181,23 @@ exp_matrix_filtered[exp_matrix_filtered < min_exp] <- min_exp # Set maximum 5000 genes with more variance -variable_genes <- names(sort(apply(exp_matrix_filtered, 1, var), decreasing = T))[1:5000] +variable_genes <- names(sort(apply(exp_matrix_filtered, 1, var), decreasing = TRUE))[1:5000] variable_genes <- variable_genes[!is.na(variable_genes)] exp_matrix_filtered <- exp_matrix_filtered[variable_genes, ] allgenes <- as.vector(rownames(exp_matrix_filtered)) if (args$min_std == "data") { - args$min_std <- min_std -} else args$min_std <- as.numeric(args$min_std) + args$min_std <- min_std +} else { + args$min_std <- as.numeric(args$min_std) +} if (args$min_exp == "data") { - args$min_exp <- min_exp -} else args$min_exp <- as.numeric(args$min_exp) + args$min_exp <- min_exp +} else { + args$min_exp <- as.numeric(args$min_exp) +} # Open pdf @@ -191,89 +207,108 @@ col_score_fun <- colorRamp2(c(0, 0.5, 1), c("#4575B4", "#FFFFBF", "#D73027")) # Run Pathifier -if (args$is_normal == T) { - pds <- quantify_pathways_deregulation(exp_matrix_filtered, - allgenes, - gs, - pathwaynames, - normals, - maximize_stability = args$max_stability, - attempts = args$attempts, - logfile = args$logfile, - min_std = args$min_std, - min_exp = args$min_exp) - for (i in pathwaynames) { - df <- data.frame(pds$curves[[i]][, 1:3], - normal = normals, - PDS = as.numeric(pds$scores[[i]]), - curve_order = as.vector(pds$curves_order[[i]])) - ordered <- df[df$curve_order, ] +if (args$is_normal == TRUE) { + pds <- quantify_pathways_deregulation(exp_matrix_filtered, + allgenes, + gs, + pathwaynames, + normals, + maximize_stability = args$max_stability, + attempts = args$attempts, + logfile = args$logfile, + min_std = args$min_std, + min_exp = args$min_exp + ) + for (i in pathwaynames) { + df <- data.frame(pds$curves[[i]][, 1:3], + normal = normals, + PDS = as.numeric(pds$scores[[i]]), + curve_order = as.vector(pds$curves_order[[i]]) + ) + ordered <- df[df$curve_order, ] - layout(cbind(1:2, 1:2), heights = c(7, 1)) - sc3 <- scatterplot3d(ordered[, 1:3], - main = paste("Principal curve of", i), - box = F, pch = 19, type = "l") - sc3$points3d(ordered[, 1:3], box = F, pch = 19, - col = col_score_fun(ordered$PDS)) + layout(cbind(1:2, 1:2), heights = c(7, 1)) + sc3 <- scatterplot3d(ordered[, 1:3], + main = paste("Principal curve of", i), + box = FALSE, pch = 19, type = "l" + ) + sc3$points3d(ordered[, 1:3], + box = FALSE, pch = 19, + col = col_score_fun(ordered$PDS) + ) - # Plot color scale legend - par(mar = c(5, 3, 0, 3)) - plot(seq(min(ordered$PDS), max(ordered$PDS), length = 100), rep(0, 100), pch = 15, - axes = TRUE, yaxt = "n", xlab = "Color scale of PDS", ylab = "", bty = "n", - col = col_score_fun(seq(min(ordered$PDS), max(ordered$PDS), length = 100))) + # Plot color scale legend + par(mar = c(5, 3, 0, 3)) + plot(seq(min(ordered$PDS), max(ordered$PDS), length = 100), rep(0, 100), + pch = 15, + axes = TRUE, yaxt = "n", xlab = "Color scale of PDS", ylab = "", bty = "n", + col = col_score_fun(seq(min(ordered$PDS), max(ordered$PDS), length = 100)) + ) - cols_status <- ifelse(ordered$normal, "blue", "red") - sc3 <- scatterplot3d(ordered[, 1:3], - main = paste("Principal curve of", i), - box = F, pch = "", type = "l") - sc3$points3d(ordered[, 1:3], box = F, - pch = ifelse(ordered$normal, 19, 8), - col = ifelse(ordered$normal, "blue", "red")) - legend("topright", pch = c(19, 8), yjust = 0, - legend = c("normal", "cancer"), - col = c("blue", "red"), cex = 1.1) + cols_status <- ifelse(ordered$normal, "blue", "red") + sc3 <- scatterplot3d(ordered[, 1:3], + main = paste("Principal curve of", i), + box = FALSE, pch = "", type = "l" + ) + sc3$points3d(ordered[, 1:3], + box = FALSE, + pch = ifelse(ordered$normal, 19, 8), + col = ifelse(ordered$normal, "blue", "red") + ) + legend("topright", + pch = c(19, 8), yjust = 0, + legend = c("normal", "cancer"), + col = c("blue", "red"), cex = 1.1 + ) - ## annotation for heatmap - sample_status <- data.frame(Status = factor(ifelse(df$normal, "normal", "tumor"))) - rownames(sample_status) <- colnames(exp_matrix_filtered) - color_status_heatmap <- list(Status = c(normal = "blue", tumor = "red")) - } -} else{ - pds <- quantify_pathways_deregulation(exp_matrix_filtered, - allgenes, - gs, - pathwaynames, - maximize_stability = args$max_stability, - attempts = args$attempts, - logfile = args$logfile, - min_std = args$min_std, - min_exp = args$min_exp) - for (i in pathwaynames) { - df <- data.frame(pds$curves[[i]][, 1:3], - PDS = as.numeric(pds$scores[[i]]), - curve_order = as.vector(pds$curves_order[[i]])) - ordered <- df[df$curve_order, ] + ## annotation for heatmap + sample_status <- data.frame(Status = factor(ifelse(df$normal, "normal", "tumor"))) + rownames(sample_status) <- colnames(exp_matrix_filtered) + color_status_heatmap <- list(Status = c(normal = "blue", tumor = "red")) + } +} else { + pds <- quantify_pathways_deregulation(exp_matrix_filtered, + allgenes, + gs, + pathwaynames, + maximize_stability = args$max_stability, + attempts = args$attempts, + logfile = args$logfile, + min_std = args$min_std, + min_exp = args$min_exp + ) + for (i in pathwaynames) { + df <- data.frame(pds$curves[[i]][, 1:3], + PDS = as.numeric(pds$scores[[i]]), + curve_order = as.vector(pds$curves_order[[i]]) + ) + ordered <- df[df$curve_order, ] - layout(cbind(1:2, 1:2), heights = c(7, 1)) - sc3 <- scatterplot3d(ordered[, 1:3], - main = paste("Principal curve of", i), - box = F, pch = 19, type = "l") - sc3$points3d(ordered[, 1:3], box = F, pch = 19, - col = col_score_fun(ordered$PDS)) + layout(cbind(1:2, 1:2), heights = c(7, 1)) + sc3 <- scatterplot3d(ordered[, 1:3], + main = paste("Principal curve of", i), + box = FALSE, pch = 19, type = "l" + ) + sc3$points3d(ordered[, 1:3], + box = FALSE, pch = 19, + col = col_score_fun(ordered$PDS) + ) - # Plot color scale legend - par(mar = c(5, 3, 0, 3)) - plot(seq(min(ordered$PDS), max(ordered$PDS), length = 100), rep(0, 100), pch = 15, - axes = TRUE, yaxt = "n", xlab = "Color scale of PDS", ylab = "", bty = "n", - col = col_score_fun(seq(min(ordered$PDS), max(ordered$PDS), length = 100))) + # Plot color scale legend + par(mar = c(5, 3, 0, 3)) + plot(seq(min(ordered$PDS), max(ordered$PDS), length = 100), rep(0, 100), + pch = 15, + axes = TRUE, yaxt = "n", xlab = "Color scale of PDS", ylab = "", bty = "n", + col = col_score_fun(seq(min(ordered$PDS), max(ordered$PDS), length = 100)) + ) - ## annotation for heatmap (for the moment none for this situation) - sample_status <- NA - color_status_heatmap <- NA - } + ## annotation for heatmap (for the moment none for this situation) + sample_status <- NA + color_status_heatmap <- NA + } } ## Create dataframe from Pathifier list and round score to 4 digits @@ -283,29 +318,31 @@ ## plot heatmap if (ncol(pds_scores) > 1) { pheatmap(t(pds_scores), - main = "Heatmap of Pathway Deregulation Score", # heat map title - cluster_rows = args$heatmap_cluster_pathways, # apply clustering method - cluster_cols = args$heatmap_cluster_cells, # apply clustering method + main = "Heatmap of Pathway Deregulation Score", # heat map title + cluster_rows = args$heatmap_cluster_pathways, # apply clustering method + cluster_cols = args$heatmap_cluster_cells, # apply clustering method - #Additional Options - ## Color labeled columns - annotation_col = sample_status, - annotation_colors = color_status_heatmap, - show_rownames = args$heatmap_show_pathway_labels, - show_colnames = args$heatmap_show_cell_labels, - border_color = NA, - legend = TRUE) + # Additional Options + ## Color labeled columns + annotation_col = sample_status, + annotation_colors = color_status_heatmap, + show_rownames = args$heatmap_show_pathway_labels, + show_colnames = args$heatmap_show_cell_labels, + border_color = NA, + legend = TRUE + ) } dev.off() ## write table write.table(pds_scores, - args$pds, - row.names = T, - col.names = T, - quote = F, - sep = "\t") + args$pds, + row.names = TRUE, + col.names = TRUE, + quote = FALSE, + sep = "\t" +) ## write S4 pathifier object save(pds, file = args$rdata)