Mercurial > repos > goeckslab > cleaning_spatialge
comparison spatialGE_single_input.R @ 0:c84663d92248 draft default tip
planemo upload for repository https://github.com/goeckslab/tools-st/tree/main/tools/spatialge commit 482b2e0e6ca7aaa789ba07b8cd689da9a01532ef
| author | goeckslab |
|---|---|
| date | Wed, 13 Aug 2025 19:32:05 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:c84663d92248 |
|---|---|
| 1 # ------------- | |
| 2 # Data Cleaning | |
| 3 # ------------- | |
| 4 | |
| 5 # SINGLE INPUT SCRIPT: | |
| 6 # Accepts single raw data sample and single cosmx sample | |
| 7 # Does not accept single visium sample due to spatial subdirectory | |
| 8 | |
| 9 # Purpose: | |
| 10 # Transform data into STlist, perform QC, log transform | |
| 11 | |
| 12 library(spatialGE) | |
| 13 library(optparse) | |
| 14 library(ggplot2) | |
| 15 library(tools) | |
| 16 library(fs) | |
| 17 | |
| 18 | |
| 19 ### Command line options | |
| 20 | |
| 21 option_list <- list( | |
| 22 make_option(c("-c", "--counts"), action = "store", default = NA, type = "character", | |
| 23 help = "Path to count data file(s)"), | |
| 24 make_option(c("-s", "--spots"), action = "store", default = NULL, type = "character", | |
| 25 help = "Path to cell coordinates file(s), not required for Visium or Xenium"), | |
| 26 make_option(c("-m", "--meta"), action = "store", default = NA, type = "character", | |
| 27 help = "Path to metadata file"), | |
| 28 make_option(c("-n", "--names"), action = "store", default = NA, type = "character", | |
| 29 help = "Specific sample names"), | |
| 30 make_option(c("--plotmeta"), action = "store", default = NULL, type = "character", | |
| 31 help = "Plots counts per cell or genes per cell"), | |
| 32 make_option(c("--samples"), action = "store", default = NULL, type = "character", | |
| 33 help = "Samples to include in plots, defaults to all"), | |
| 34 make_option(c("--sminreads"), action = "store", default = 0, type = "integer", | |
| 35 help = "Minimum number of total reads for a spot to be retained"), | |
| 36 make_option(c("--smaxreads"), action = "store", default = NULL, type = "integer", | |
| 37 help = "Maximum number of total reads for a spot to be retained"), | |
| 38 make_option(c("--smingenes"), action = "store", default = 0, type = "integer", | |
| 39 help = "Minimum number of non-zero counts for a spot to be retained"), | |
| 40 make_option(c("--smaxgenes"), action = "store", default = NULL, type = "integer", | |
| 41 help = "Maximum number of non-zero counts for a spot to be retained"), | |
| 42 make_option(c("--gminreads"), action = "store", default = 0, type = "integer", | |
| 43 help = "Minimum number of total reads for a gene to be retained"), | |
| 44 make_option(c("--gmaxreads"), action = "store", default = NULL, type = "integer", | |
| 45 help = "Maximum number of total reads for a gene to be retained"), | |
| 46 make_option(c("--gminspots"), action = "store", default = 0, type = "integer", | |
| 47 help = "Minimum number of spots with non-zero counts for a gene to be retained"), | |
| 48 make_option(c("--gmaxspots"), action = "store", default = NULL, type = "integer", | |
| 49 help = "Maximum number of spots with non-zero counts for a gene to be retained"), | |
| 50 make_option(c("--distplot"), action = "store_true", type = "logical", default = FALSE, | |
| 51 help = "If set, generate unfiltered distribution plot"), | |
| 52 make_option(c("--filter"), action = "store_true", type = "logical", default = FALSE, | |
| 53 help = "If set, apply filtering before transformation"), | |
| 54 make_option(c("--filterplot"), action = "store_true", type = "logical", default = FALSE, | |
| 55 help = "If set, generate filtered distribution plot"), | |
| 56 make_option(c("-t", "--type"), action = "store_true", default = "log", type = "character", | |
| 57 help = "Type of transformation to apply: log or sct") | |
| 58 ) | |
| 59 | |
| 60 ### Main | |
| 61 | |
| 62 #parse args | |
| 63 opt <- parse_args(OptionParser(option_list = option_list)) | |
| 64 | |
| 65 #check if metadata or sample names were provided | |
| 66 #need metadata for raw, sample names for cosmx | |
| 67 if (!is.na(opt$meta) && is.na(opt$names)) { | |
| 68 samples_input <- opt$meta | |
| 69 } else if (is.na(opt$meta) && !is.na(opt$names)) { | |
| 70 samples_input <- opt$names | |
| 71 } else { | |
| 72 stop("Please only specify either --metadata OR --names") | |
| 73 } | |
| 74 | |
| 75 #create STlist with single input flags | |
| 76 st_data <- STlist(rnacounts = opt$counts, spotcoords = opt$spots, samples = samples_input) | |
| 77 | |
| 78 message("STlist has been created") | |
| 79 | |
| 80 #distribution plot | |
| 81 | |
| 82 #create distribution plot if flag is included | |
| 83 if (opt$distplot) { | |
| 84 | |
| 85 #if sample names are provided, separate the character string | |
| 86 #probably don't need strsplit, keeping for safety | |
| 87 if (!is.null(opt$samples) && opt$samples != "") { | |
| 88 sample_names <- strsplit(opt$samples, split = ",", fixed = TRUE)[[1]] | |
| 89 } else { | |
| 90 sample_names <- NULL | |
| 91 } | |
| 92 | |
| 93 #generate distribution plot | |
| 94 dist_plot <- distribution_plots(x = st_data, plot_meta = opt$plotmeta, samples = sample_names, ptsize = 1) | |
| 95 | |
| 96 #create unique plot file names based on sample name | |
| 97 base_input <- basename(opt$counts) | |
| 98 base_name <- file_path_sans_ext(base_input) | |
| 99 | |
| 100 filename <- paste0("unfiltered_", base_name, ".png") | |
| 101 | |
| 102 #create output directory for cluster plots | |
| 103 dir.create("./unfiltered_distribution_plots", showWarnings = FALSE, recursive = TRUE) | |
| 104 | |
| 105 #save plot to subdir | |
| 106 ggsave( | |
| 107 path = "./unfiltered_distribution_plots", | |
| 108 filename = filename, | |
| 109 bg = "white", | |
| 110 width = 12 | |
| 111 ) | |
| 112 | |
| 113 message("Unfiltered distribution plot saved to ./unfiltered_distribution_plots") | |
| 114 } | |
| 115 | |
| 116 #spot/cell filtering | |
| 117 | |
| 118 #filter spots if flag is included | |
| 119 if (opt$filter) { | |
| 120 | |
| 121 #filter out spots or genes based on minimum and maximum counts | |
| 122 st_data <- filter_data(x = st_data, spot_minreads = opt$sminreads, spot_maxreads = opt$smaxreads, spot_mingenes = opt$smingenes, | |
| 123 spot_maxgenes = opt$smaxgenes, gene_minreads = opt$gminreads) | |
| 124 | |
| 125 message("Data filtering completed & saved to STlist") | |
| 126 } | |
| 127 | |
| 128 #filtered data plot | |
| 129 | |
| 130 #create filtered distribution plot if flag is included | |
| 131 if (opt$filterplot) { | |
| 132 | |
| 133 #if sample names are provided, separate the character string | |
| 134 #probably don't need strsplit, keeping for safety | |
| 135 if (!is.null(opt$samples) && opt$samples != "") { | |
| 136 sample_names <- strsplit(opt$samples, split = ",", fixed = TRUE)[[1]] | |
| 137 } else { | |
| 138 sample_names <- NULL | |
| 139 } | |
| 140 | |
| 141 #generate filtered distribution plot | |
| 142 filter_dist_plot <- distribution_plots(x = st_data, plot_meta = opt$plotmeta, samples = sample_names, ptsize = 1) | |
| 143 | |
| 144 #create unique plot file names based on sample name | |
| 145 base_input_2 <- basename(opt$counts) | |
| 146 base_name_2 <- file_path_sans_ext(base_input_2) | |
| 147 | |
| 148 filename_2 <- paste0("filtered_", base_name_2, ".png") | |
| 149 | |
| 150 #create output directory for cluster plots | |
| 151 dir.create("./filtered_distribution_plots", showWarnings = FALSE, recursive = TRUE) | |
| 152 | |
| 153 #save plot to subdir | |
| 154 ggsave( | |
| 155 path = "./filtered_distribution_plots", | |
| 156 filename = filename_2, | |
| 157 bg = "white", | |
| 158 width = 12 | |
| 159 ) | |
| 160 | |
| 161 message("Filtered distribution plot saved to ./filtered_distribution_plots") | |
| 162 } | |
| 163 | |
| 164 #transform data, defaults to log transformation | |
| 165 | |
| 166 STobj <- transform_data(x = st_data, method = opt$type) | |
| 167 | |
| 168 message("Data has been log transformed, unless otherwise specified") | |
| 169 | |
| 170 #save transformed data to .rds | |
| 171 | |
| 172 saveRDS(STobj, file = "STobj.rds") | |
| 173 | |
| 174 message("STlist has been saved as .rds file") |
