comparison ruvseq.R @ 1:c24765926774 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ruvseq commit 9ed3d83cc447ee897af867362bf1dd67af8a11c2
author iuc
date Tue, 26 Mar 2019 06:25:38 -0400
parents 61dffb20b6f9
children fed9d0350d72
comparison
equal deleted inserted replaced
0:61dffb20b6f9 1:c24765926774
81 ) 81 )
82 } 82 }
83 83
84 create_seq_expression_set <- function (dds, min_mean_count) { 84 create_seq_expression_set <- function (dds, min_mean_count) {
85 count_values <- counts(dds) 85 count_values <- counts(dds)
86 print(paste0("feature count before filtering :",nrow(count_values),"\n"))
87 print(paste0("Filtering features which mean expression is less or eq. than ", min_mean_count, " counts\n"))
86 filter <- apply(count_values, 1, function(x) mean(x) > min_mean_count) 88 filter <- apply(count_values, 1, function(x) mean(x) > min_mean_count)
87 filtered <- count_values[filter,] 89 filtered <- count_values[filter,]
88 set = newSeqExpressionSet(as.matrix(count_values), 90 print(paste0("feature count after filtering :",nrow(filtered),"\n"))
91 set = newSeqExpressionSet(as.matrix(filtered),
89 phenoData = data.frame(colData(dds)$condition, row.names=colnames(filtered))) 92 phenoData = data.frame(colData(dds)$condition, row.names=colnames(filtered)))
90 plot_pca_rle(set = set, title = 'raw data') 93 plot_pca_rle(set = set, title = 'raw data')
91 set <- betweenLaneNormalization(set, which="upper") 94 set <- betweenLaneNormalization(set, which="upper")
92 plot_pca_rle(set = set, title = 'upper quartile normalized') 95 plot_pca_rle(set = set, title = 'upper quartile normalized')
93 return(set) 96 return(set)
101 y <- estimateGLMCommonDisp(y, design) 104 y <- estimateGLMCommonDisp(y, design)
102 y <- estimateGLMTagwiseDisp(y, design) 105 y <- estimateGLMTagwiseDisp(y, design)
103 fit <- glmFit(y, design) 106 fit <- glmFit(y, design)
104 lrt <- glmLRT(fit, coef=2) 107 lrt <- glmLRT(fit, coef=2)
105 top <- topTags(lrt, n=nrow(set))$table 108 top <- topTags(lrt, n=nrow(set))$table
106 top_rows <- rownames(top)[which(top$PValue > cutoff_p)] 109 top_rows <- rownames(top)[which(top$PValue < cutoff_p)]
107 empirical <- rownames(set)[which(!(rownames(set) %in% top_rows))] 110 empirical <- rownames(set)[which(!(rownames(set) %in% top_rows))]
108 return(empirical) 111 return(empirical)
109 } 112 }
110 113
111 ruv_control_gene_method <- function(set, k, control_genes='empirical', cutoff_p=0.2) { 114 ruv_control_gene_method <- function(set, k, control_genes='empirical', cutoff_p=0.2) {
152 155
153 opt <- setup_cmdline_options() 156 opt <- setup_cmdline_options()
154 alpha <- opt$alpha 157 alpha <- opt$alpha
155 min_k <- opt$min_k 158 min_k <- opt$min_k
156 max_k <- opt$max_k 159 max_k <- opt$max_k
160 min_c <- opt$min_mean_count
157 sample_json <- fromJSON(opt$sample_json) 161 sample_json <- fromJSON(opt$sample_json)
158 sample_paths <- sample_json$path 162 sample_paths <- sample_json$path
159 sample_names <- sample_json$label 163 sample_names <- sample_json$label
160 condition <- as.factor(sample_json$condition) 164 condition <- as.factor(sample_json$condition)
161 sampleTable <- data.frame(samplename=sample_names, 165 sampleTable <- data.frame(samplename=sample_names,
167 if (!is.null(opt$plots)) { 171 if (!is.null(opt$plots)) {
168 pdf(opt$plots) 172 pdf(opt$plots)
169 } 173 }
170 174
171 # Run through the ruvseq variants 175 # Run through the ruvseq variants
172 set <- create_seq_expression_set(dds, min_mean_count = opt$min_mean_count) 176 set <- create_seq_expression_set(dds, min_mean_count = min_c)
173 result <- list(no_correction = set) 177 result <- list(no_correction = set)
174 for (k in seq(min_k, max_k)) { 178 for (k in seq(min_k, max_k)) {
175 result[[paste0('residual_method_k', k)]] <- ruv_residual_method(set, k=k) 179 result[[paste0('residual_method_k', k)]] <- ruv_residual_method(set, k=k)
176 result[[paste0('replicate_method_k', k)]] <- ruv_replicate_method(set, k=k) 180 result[[paste0('replicate_method_k', k)]] <- ruv_replicate_method(set, k=k)
177 result[[paste0('control_method_k', k)]] <- ruv_control_gene_method(set, k=k, cutoff_p=0.5) 181 result[[paste0('control_method_k', k)]] <- ruv_control_gene_method(set, k=k, cutoff_p=0.5)