Mercurial > repos > goeckslab > cleaning_spatialge
comparison spatialGE_multiple_input.R @ 0:c84663d92248 draft default tip
planemo upload for repository https://github.com/goeckslab/tools-st/tree/main/tools/spatialge commit 482b2e0e6ca7aaa789ba07b8cd689da9a01532ef
| author | goeckslab |
|---|---|
| date | Wed, 13 Aug 2025 19:32:05 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:c84663d92248 |
|---|---|
| 1 # ------------- | |
| 2 # Data Cleaning | |
| 3 # ------------- | |
| 4 | |
| 5 # MULTIPLE INPUT SCRIPT: | |
| 6 # Accepts multiple sample input for raw data and cosmx | |
| 7 # Accepts single and multiple sample input for Visium, due to spatial subdirectory | |
| 8 | |
| 9 # Purpose: | |
| 10 # Transform data into STlist, perform QC, log transform | |
| 11 | |
| 12 library(spatialGE) | |
| 13 library(optparse) | |
| 14 library(ggplot2) | |
| 15 library(tools) | |
| 16 library(fs) | |
| 17 | |
| 18 | |
| 19 ### Command Line Options | |
| 20 | |
| 21 option_list <- list( | |
| 22 make_option(c("-c", "--counts"), action = "store", default = NA, type = "character", | |
| 23 help = "Path to count data file(s)"), | |
| 24 make_option(c("-s", "--spots"), action = "store", default = NULL, type = "character", | |
| 25 help = "Path to cell coordinates file(s), not required for Visium or Xenium"), | |
| 26 make_option(c("-m", "--meta"), action = "store", default = NA, type = "character", | |
| 27 help = "Path to metadata file"), | |
| 28 make_option(c("-n", "--names"), action = "store", default = NA, type = "character", | |
| 29 help = "Specific sample names"), | |
| 30 make_option(c("--plotmeta"), action = "store", default = NULL, type = "character", | |
| 31 help = "Plots counts per cell or genes per cell"), | |
| 32 make_option(c("--samples"), action = "store", default = NULL, type = "character", | |
| 33 help = "Samples to include in plots, defaults to all"), | |
| 34 make_option(c("--sminreads"), action = "store", default = 0, type = "integer", | |
| 35 help = "Minimum number of total reads for a spot to be retained"), | |
| 36 make_option(c("--smaxreads"), action = "store", default = NULL, type = "integer", | |
| 37 help = "Maximum number of total reads for a spot to be retained"), | |
| 38 make_option(c("--smingenes"), action = "store", default = 0, type = "integer", | |
| 39 help = "Minimum number of non-zero counts for a spot to be retained"), | |
| 40 make_option(c("--smaxgenes"), action = "store", default = NULL, type = "integer", | |
| 41 help = "Maximum number of non-zero counts for a spot to be retained"), | |
| 42 make_option(c("--gminreads"), action = "store", default = 0, type = "integer", | |
| 43 help = "Minimum number of total reads for a gene to be retained"), | |
| 44 make_option(c("--gmaxreads"), action = "store", default = NULL, type = "integer", | |
| 45 help = "Maximum number of total reads for a gene to be retained"), | |
| 46 make_option(c("--gminspots"), action = "store", default = 0, type = "integer", | |
| 47 help = "Minimum number of spots with non-zero counts for a gene to be retained"), | |
| 48 make_option(c("--gmaxspots"), action = "store", default = NULL, type = "integer", | |
| 49 help = "Maximum number of spots with non-zero counts for a gene to be retained"), | |
| 50 make_option(c("--distplot"), action = "store_true", type = "logical", default = FALSE, | |
| 51 help = "If set, generate unfiltered distribution plot"), | |
| 52 make_option(c("--filter"), action = "store_true", type = "logical", default = FALSE, | |
| 53 help = "If set, apply filtering before transformation"), | |
| 54 make_option(c("--filterplot"), action = "store_true", type = "logical", default = FALSE, | |
| 55 help = "If set, generate filtered distribution plot"), | |
| 56 make_option(c("-t", "--type"), action = "store_true", default = "log", type = "character", | |
| 57 help = "Type of transformation to apply: log or sct") | |
| 58 ) | |
| 59 | |
| 60 ### Main | |
| 61 | |
| 62 #parse args | |
| 63 opt <- parse_args(OptionParser(option_list = option_list)) | |
| 64 | |
| 65 #check if metadata or sample names were provided | |
| 66 #need metadata for raw and visium data, sample names for cosmx | |
| 67 if (!is.na(opt$meta) && is.na(opt$names)) { | |
| 68 samples_input <- opt$meta | |
| 69 } else if (is.na(opt$meta) && !is.na(opt$names)) { | |
| 70 samples_input <- unlist(strsplit(opt$names, split = ",")) | |
| 71 } else { | |
| 72 stop("Please only specify either --metadata OR --names") | |
| 73 } | |
| 74 | |
| 75 #create temporary directory to hold count data | |
| 76 count_dir <- tempdir() | |
| 77 unlink(count_dir, recursive = TRUE) | |
| 78 dir.create(count_dir) | |
| 79 | |
| 80 #create temporary directory to hold coord data | |
| 81 coord_dir <- tempdir() | |
| 82 unlink(coord_dir, recursive = TRUE) | |
| 83 dir.create(coord_dir) | |
| 84 | |
| 85 #if spotcoords were provided, load in count and coord data | |
| 86 #if spotcoords were not provided (visium input), only load the count file | |
| 87 if (!is.null(opt$spots)) { | |
| 88 coord_dir <- as.character(opt$spots) | |
| 89 coord_file <- fs::dir_ls(coord_dir) | |
| 90 count_dir <- as.character(opt$counts) | |
| 91 count_file <- fs::dir_ls(count_dir) | |
| 92 } else { | |
| 93 count_dir <- as.character(opt$counts) | |
| 94 count_file <- fs::dir_ls(count_dir) | |
| 95 } | |
| 96 | |
| 97 #if spotcoords are present, include in stlist input | |
| 98 if (!is.null(opt$spots)) { | |
| 99 st_data <- STlist(rnacounts = count_file, spotcoords = coord_file, samples = samples_input) | |
| 100 } else { | |
| 101 st_data <- STlist(rnacounts = count_file, samples = samples_input) | |
| 102 } | |
| 103 | |
| 104 message("STList has been created") | |
| 105 | |
| 106 #distribution plot | |
| 107 | |
| 108 #create distribution plot if flag is included | |
| 109 if (opt$distplot) { | |
| 110 | |
| 111 #if sample names are provided, separate the character string | |
| 112 if (!is.null(opt$samples) && opt$samples != "") { | |
| 113 sample_names <- strsplit(opt$samples, split = ",", fixed = TRUE)[[1]] | |
| 114 } else { | |
| 115 sample_names <- NULL | |
| 116 } | |
| 117 | |
| 118 #generate distribution plot | |
| 119 dist_plot <- distribution_plots(x = st_data, plot_meta = opt$plotmeta, samples = sample_names, ptsize = 1) | |
| 120 | |
| 121 #create unique plot file names based on sample name | |
| 122 base_input <- basename(opt$counts) | |
| 123 base_name <- file_path_sans_ext(base_input) | |
| 124 | |
| 125 filename <- paste0("unfiltered_", base_name, ".png") | |
| 126 | |
| 127 #create output directory for distribution plots | |
| 128 dir.create("./unfiltered_distribution_plots", showWarnings = FALSE, recursive = TRUE) | |
| 129 | |
| 130 #save plot to subdir | |
| 131 ggsave( | |
| 132 path = "./unfiltered_distribution_plots", | |
| 133 filename = filename, | |
| 134 bg = "white", | |
| 135 width = 12 | |
| 136 ) | |
| 137 | |
| 138 message("Unfiltered distribution plot saved to ./unfiltered_distribution_plots") | |
| 139 } | |
| 140 | |
| 141 #spot/cell filtering | |
| 142 | |
| 143 #filter spots if flag is included | |
| 144 if (opt$filter) { | |
| 145 | |
| 146 #filter out spots or genes based on minimum and maximum counts | |
| 147 st_data <- filter_data(x = st_data, spot_minreads = opt$sminreads, spot_maxreads = opt$smaxreads, spot_mingenes = opt$smingenes, | |
| 148 spot_maxgenes = opt$smaxgenes, gene_minreads = opt$gminreads) | |
| 149 message("Data filtering completed & saved to STlist") | |
| 150 } | |
| 151 | |
| 152 #filtered data plot | |
| 153 | |
| 154 #create filtered distribution plot if flag is included | |
| 155 if (opt$filterplot) { | |
| 156 | |
| 157 #if sample names are provided, separate the character string | |
| 158 if (!is.null(opt$samples) && opt$samples != "") { | |
| 159 sample_names <- strsplit(opt$samples, split = ",", fixed = TRUE)[[1]] | |
| 160 } else { | |
| 161 sample_names <- NULL | |
| 162 } | |
| 163 | |
| 164 #generate filtered distribution plot | |
| 165 filter_dist_plot <- distribution_plots(x = st_data, plot_meta = opt$plotmeta, samples = sample_names, ptsize = 1) | |
| 166 | |
| 167 #create unique plot file names based on sample name | |
| 168 base_input_2 <- basename(opt$counts) | |
| 169 base_name_2 <- file_path_sans_ext(base_input_2) | |
| 170 | |
| 171 filename_2 <- paste0("filtered_", base_name_2, ".png") | |
| 172 | |
| 173 #create output directory for cluster plots | |
| 174 dir.create("./filtered_distribution_plots", showWarnings = FALSE, recursive = TRUE) | |
| 175 | |
| 176 #save plot to subdir | |
| 177 ggsave( | |
| 178 path = "./filtered_distribution_plots", | |
| 179 filename = filename_2, | |
| 180 bg = "white", | |
| 181 width = 12 | |
| 182 ) | |
| 183 | |
| 184 message("Filtered distribution plot saved to ./filtered_distribution_plots") | |
| 185 } | |
| 186 | |
| 187 #transform data, defaults to log transformation | |
| 188 | |
| 189 STobj <- transform_data(x = st_data, method = opt$type) | |
| 190 | |
| 191 message("Data has been log transformed, unless otherwise specified") | |
| 192 | |
| 193 #save transformed data to .rds | |
| 194 | |
| 195 saveRDS(STobj, file = "STobj.rds") | |
| 196 | |
| 197 message("STlist has been saved as .rds file") |
