Mercurial > repos > perssond > naivestates
comparison main.R @ 0:1fb6181c2c64 draft
"planemo upload for repository https://github.com/ohsu-comp-bio/naivestates commit 392f57d212a7499bf1d3e421112a32a56635bc67-dirty"
author | perssond |
---|---|
date | Fri, 12 Mar 2021 00:20:13 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1fb6181c2c64 |
---|---|
1 #!/usr/bin/env Rscript | |
2 | |
3 suppressMessages( library(tidyverse) ) | |
4 library( optparse ) | |
5 library( naivestates ) | |
6 | |
7 ## Identify directory of the script | |
8 wd <- commandArgs( trailingOnly=FALSE ) %>% | |
9 keep( ~grepl("--file=", .x) ) %>% | |
10 str_replace( "--file=", "" ) %>% dirname() | |
11 cat( "Running the script from", wd, "\n" ) | |
12 | |
13 ## Parse command-line arugments | |
14 option_list <- list( | |
15 make_option(c("-i", "--in"), type="character", help="Input file"), | |
16 make_option(c("-o", "--out"), type="character", default="/data", | |
17 help="Output directory"), | |
18 make_option(c("-m", "--markers"), type="character", default="auto", | |
19 help="Markers to model"), | |
20 make_option(c("-p", "--plots"), type="character", default="off", | |
21 help="Generate plots showing the fit"), | |
22 make_option("--mct", type="character", default="", | |
23 help="Marker -> cell type map in .csv format"), | |
24 make_option("--id", type="character", default="CellID", | |
25 help="Column containing cell IDs"), | |
26 make_option("--log", type="character", default="auto", | |
27 help="Whether to apply a log transform <yes|no|auto>"), | |
28 make_option("--sfx", type="character", default="", | |
29 help="Common suffix on marker columns (e.g., _cellMask)"), | |
30 make_option("--umap", action="store_true", default=FALSE, | |
31 help="Generate UMAP plots") | |
32 ) | |
33 opt <- parse_args(OptionParser(option_list=option_list)) | |
34 | |
35 ## Argument verification | |
36 if( !("in" %in% names(opt)) ) | |
37 stop( "Please provide an input file name with -i" ) | |
38 if( !(opt$log %in% c("yes","no","auto")) ) | |
39 stop( "--log must be one of <yes|no|auto>" ) | |
40 if( !(opt$plots %in% c("off", "pdf", "png")) ) | |
41 stop( "--plots must be one of <off|pdf|png>" ) | |
42 | |
43 ## Identify the sample name | |
44 sn <- basename( opt$`in` ) %>% str_split( "\\." ) %>% | |
45 pluck( 1, 1 ) | |
46 cat( "Inferred sample name:", sn, "\n" ) | |
47 | |
48 ## Read the data matrix | |
49 X <- read_csv( opt$`in`, col_types=cols() ) | |
50 cat( "Read", nrow(X), "entries\n" ) | |
51 | |
52 ## Fix potential capitalization mismatch of --id | |
53 if( !(opt$id %in% colnames(X)) ) | |
54 { | |
55 ## Attempt to find a singular case-insensitive match | |
56 i <- grep( tolower(opt$id), tolower(colnames(X)) ) | |
57 if( length(i) == 1 ) | |
58 { | |
59 warning( " No such column ", opt$id, | |
60 "; using ", colnames(X)[i], " instead" ) | |
61 opt$id <- colnames(X)[i] | |
62 } | |
63 else stop( "No such column ", opt$id, | |
64 "; use --id to specify which column contains cell IDs" ) | |
65 } | |
66 | |
67 ## Identify markers in the matrix | |
68 mrkv <- findMarkers(setdiff(colnames(X), opt$id), opt$markers, | |
69 opt$sfx, TRUE, TRUE) | |
70 | |
71 ## Handle log transformation of the data | |
72 if( opt$log == "yes" || | |
73 (opt$log == "auto" && max(X[mrkv], na.rm=TRUE) > 1000) ) | |
74 { | |
75 cat( "Applying a log10 transform\n" ) | |
76 X <- X %>% mutate_at( unname(mrkv), ~log10(.x+1) ) | |
77 } | |
78 | |
79 ## Fit Gaussian mixture models | |
80 GMM <- GMMfit(X, opt$id, !!!mrkv) | |
81 fnMdl <- file.path( opt$out, str_c(sn, "-models.csv") ) | |
82 cat( "Saving models to", fnMdl, "\n" ) | |
83 GMMmodels(GMM) %>% write_csv( fnMdl ) | |
84 | |
85 ## Reshape the matrix back to cells-by-marker format | |
86 Y <- GMMreshape(GMM) | |
87 | |
88 cat( "------\n" ) | |
89 | |
90 ## Find the default cell type map | |
91 if( opt$mct != "" ) { | |
92 | |
93 ## Load marker -> cell type associations | |
94 cat( "Loading cell type map from", opt$mct, "\n" ) | |
95 mct <- read_csv( opt$mct, col_types=cols() ) %>% | |
96 distinct() %>% filter(Marker %in% colnames(Y)) | |
97 | |
98 if( nrow(mct) == 0 ) { | |
99 warning( "No usable marker -> cell type mappings detected" ) | |
100 Y <- findDominant(Y, opt$id) | |
101 } else { | |
102 cat( "Using the following marker -> cell type map:\n" ) | |
103 walk2( mct$Marker, mct$State, ~cat(.x, "->", .y, "\n") ) | |
104 Y <- callStates(Y, opt$id, mct) | |
105 } | |
106 } else { | |
107 cat( "No marker -> cell type mapping provided\n" ) | |
108 Y <- findDominant(Y, opt$id) | |
109 } | |
110 | |
111 cat( "------\n" ) | |
112 | |
113 ## Identify the output location(s) | |
114 fnOut <- file.path( opt$out, str_c(sn, "-states.csv") ) | |
115 cat( "Saving probabilities and calls to", fnOut, "\n") | |
116 Y %>% write_csv( fnOut ) | |
117 | |
118 ## Generates plots as necessary | |
119 if( opt$plots != "off" ) | |
120 { | |
121 ## Create a separate directory for plots | |
122 dirPlot <- file.path( opt$out, "plots", sn ) | |
123 dir.create(dirPlot, recursive=TRUE, showWarnings=FALSE) | |
124 | |
125 ## Fit overview | |
126 fn <- file.path( file.path(opt$out, "plots"), str_c(sn, "-allfits.", opt$plots) ) | |
127 ggf <- plotFitOverview(GMM) | |
128 suppressMessages(ggsave( fn, ggf, width=12, height=8 )) | |
129 | |
130 ## Compute a UMAP projection | |
131 if( opt$umap ) { | |
132 cat( "Computing a UMAP projection...\n" ) | |
133 U <- umap( Y, c(opt$id, "State", "Dominant") ) | |
134 | |
135 ## Generate and write a summary plot | |
136 gg <- plotSummary( U ) | |
137 fn <- file.path( file.path(opt$out, "plots"), str_c(sn, "-summary.", opt$plots) ) | |
138 suppressMessages(ggsave( fn, gg, width=9, height=7 )) | |
139 cat( "Plotted summary to", fn, "\n" ) | |
140 | |
141 ## Generate and write faceted probabilities plot | |
142 gg <- plotProbs( U, c(opt$id, "State", "Dominant") ) | |
143 fn <- file.path( file.path(opt$out, "plots"), str_c(sn, "-probs.", opt$plots) ) | |
144 suppressMessages(ggsave( fn, gg, width=9, height=7 )) | |
145 cat( "Plotted probabilities to", fn, "\n" ) | |
146 } | |
147 | |
148 ## Generate and write out plots for individual marker fits | |
149 for( i in names(mrkv) ) | |
150 { | |
151 gg <- plotMarker(GMM, i) | |
152 fn <- file.path( dirPlot, str_c(i,".",opt$plots) ) | |
153 suppressMessages(ggsave( fn, gg )) | |
154 cat( "Wrote", fn, "\n" ) | |
155 } | |
156 } |