Mercurial > repos > crs4 > exomedepth

--- a/exomedepth.R	Fri Nov 08 13:25:44 2019 -0500
+++ b/exomedepth.R	Wed Nov 25 18:37:13 2020 +0000
@@ -2,94 +2,107 @@
 suppressMessages(library(ExomeDepth))

 # Import parameters from xml wrapper (args_file)
-args  <- commandArgs(trailingOnly=TRUE)
-param <- read.table(args[1],sep="=", as.is=TRUE)
+args <- commandArgs(trailingOnly = TRUE)
+param <- read.table(args[1], sep = "=", as.is = TRUE)

 # Set common parameters
-target      <- param[match("target",param[,1]),2]
-trans_prob  <- as.numeric(param[match("trans_prob",param[,1]),2])
-output      <- param[match("output",param[,1]),2]
-test_vs_ref <- as.logical(param[match("test_vs_ref",param[,1]),2])
+target <- param[match("target", param[, 1]), 2]
+trans_prob <- as.numeric(param[match("trans_prob", param[, 1]), 2])
+output <- param[match("output", param[, 1]), 2]
+test_vs_ref <- as.logical(param[match("test_vs_ref", param[, 1]), 2])

-# Create symbolic links for multiple bam and bai
-bam         <- param[param[,1]=="bam",2]
-bam_bai     <- param[param[,1]=="bam_bai",2]
-bam_label   <- param[param[,1]=="bam_label",2]
-bam_label   <- gsub(" ", "_", bam_label)
+# Create symbolic links for multiple bam and bai
+bam <- param[param[, 1] == "bam", 2]
+bam_bai <- param[param[, 1] == "bam_bai", 2]
+bam_label <- param[param[, 1] == "bam_label", 2]
+bam_label <- gsub(" ", "_", bam_label)

-for(i in 1:length(bam)){
-  stopifnot(file.symlink(bam[i], paste(bam_label[i], "bam", sep=".")))
-  stopifnot(file.symlink(bam_bai[i], paste(bam_label[i], "bam.bai", sep=".")))
+for (i in seq_len(length(bam))) {
+    stopifnot(file.symlink(bam[i], paste(bam_label[i], "bam", sep = ".")))
+    stopifnot(file.symlink(bam_bai[i], paste(bam_label[i], "bam.bai", sep = ".")))
 }

 # Generate read count data
-BAMFiles <- paste(bam_label, "bam", sep=".")
+bam_files <- paste(bam_label, "bam", sep = ".")
 sink("/dev/null")
-ExomeCount <- suppressMessages(getBamCounts(bed.file=target, bam.files = BAMFiles))
+exome_count <- suppressMessages(getBamCounts(bed.file = target, bam.files = bam_files))
 sink()

 # Convert counts in a data frame
-ExomeCount.dafr <- as(ExomeCount[, colnames(ExomeCount)], 'data.frame')
+exome_count_dafr <- as(exome_count[, colnames(exome_count)], "data.frame")

 # Prepare the main matrix of read count data
-ExomeCount.mat <- as.matrix(ExomeCount.dafr[, grep(names(ExomeCount.dafr), pattern='.bam')])
+exome_count_mat <- as.matrix(exome_count_dafr[, grep(names(exome_count_dafr), pattern = ".bam")])

 # Remove .bam from sample name
-colnames(ExomeCount.mat) <- gsub(".bam", "", colnames(ExomeCount.mat))
+colnames(exome_count_mat) <- gsub(".bam", "", colnames(exome_count_mat))

 # Set nsamples == 1 if mode is test vs reference, assuming test is sample 1
-nsamples <- ifelse(test_vs_ref, 1, ncol(ExomeCount.mat))
+nsamples <- ifelse(test_vs_ref, 1, ncol(exome_count_mat))

 # Loop over samples
-for (i in 1:nsamples){
+for (i in 1:nsamples) {
+    # Create the aggregate reference set for this sample
+    my_choice <- suppressWarnings(suppressMessages(select.reference.set(
+        test.counts = exome_count_mat[, i],
+        reference.counts = subset(exome_count_mat, select = -i),
+        bin.length = (exome_count_dafr$end - exome_count_dafr$start) / 1000,
+        n.bins.reduced = 10000
+    )))

-	# Create the aggregate reference set for this sample
-	my.choice <- suppressWarnings(suppressMessages(
-					select.reference.set(test.counts = ExomeCount.mat[,i],
-                           				 reference.counts = subset(ExomeCount.mat, select=-i),
-                           			     bin.length = (ExomeCount.dafr$end - ExomeCount.dafr$start)/1000,
-                           		         n.bins.reduced = 10000)))
+    my_reference_selected <- apply(
+        X = exome_count_mat[, my_choice$reference.choice, drop = FALSE],
+        MAR = 1,
+        FUN = sum
+    )

-	my.reference.selected <- apply(X = ExomeCount.mat[, my.choice$reference.choice, drop=FALSE],
-                               	   MAR = 1,
-                               	   FUN = sum)
-
-	# Now create the ExomeDepth object for the CNVs call
-	all.exons<-suppressWarnings(suppressMessages(new('ExomeDepth',
-               test = ExomeCount.mat[,i],
-               reference = my.reference.selected,
-               formula = 'cbind(test,reference)~1')))
+    # Now create the ExomeDepth object for the CNVs call
+    all.exons <- suppressWarnings(suppressMessages(
+        new("ExomeDepth",
+        test = exome_count_mat[, i],
+        reference = my_reference_selected,
+        formula = "cbind(test,reference)~1"
+    )))

+    # Now call the CNVs
+    result <- try(all.exons <- suppressMessages(CallCNVs(
+        x = all.exons,
+        transition.probability = trans_prob,
+        chromosome = exome_count_dafr$space,
+        start = exome_count_dafr$start,
+        end = exome_count_dafr$end,
+        name = exome_count_dafr$names
+    )), silent = T)

-	# Now call the CNVs
-	result <- try(all.exons<-suppressMessages(CallCNVs(x=all.exons,
-                            transition.probability = trans_prob,
-                            chromosome = ExomeCount.dafr$space,
-                            start = ExomeCount.dafr$start,
-                            end = ExomeCount.dafr$end,
-                            name = ExomeCount.dafr$names)), silent=T)
+    # Next if CNVs are not detected
+    if (class(result) == "try-error") {
+        next
+    }
+
+    # Compute correlation between ref and test
+    my_cor <- cor(all.exons@reference, all.exons@test)

-	# Next if CNVs are not detected
-	if (class(result)=="try-error"){
-		next
-	}
+    # Write results
+    my_results <- cbind(
+        all.exons@CNV.calls[, c(7, 5, 6, 3)],
+        sample = colnames(exome_count_mat)[i],
+        corr = my_cor,
+        all.exons@CNV.calls[, c(4, 9, 12)]
+    )

-	# Compute correlation between ref and test
-	my.cor <- cor(all.exons@reference, all.exons@test)
-	n.call <- nrow(all.exons@CNV.calls)
+    # Re-order by chr and position
+    chr_order <- c(paste("chr", 1:22, sep = ""), "chrX", "chrY", "chrM")
+    my_results[, 1] <- factor(my_results[, 1], levels = chr_order)
+    my_results <- my_results[order(my_results[, 1], my_results[, 2], my_results[, 3]), ]

-	# Write results
-	my.results <- cbind(all.exons@CNV.calls[,c(7,5,6,3)],
-            		 	sample=colnames(ExomeCount.mat)[i],
-            			corr=my.cor,
-            			all.exons@CNV.calls[,c(4,9,12)])
-
-    	# Re-order by chr and position
-    	chrOrder<-c(paste("chr",1:22,sep=""),"chrX","chrY","chrM")
-    	my.results[,1] <- factor(my.results[,1], levels=chrOrder)
-    	my.results <- my.results[order(my.results[,1], my.results[,2], my.results[,3]),]
-
-	write.table(sep='\t', quote=FALSE, file = output,
-            		x = my.results,
-       			row.names = FALSE, col.names = FALSE, dec=".", append=TRUE)
+    write.table(
+        sep = "\t",
+        quote = FALSE,
+        file = output,
+        x = my_results,
+        row.names = FALSE,
+        col.names = FALSE,
+        dec = ".",
+        append = TRUE
+    )
 }
--- a/exomedepth.xml	Fri Nov 08 13:25:44 2019 -0500
+++ b/exomedepth.xml	Wed Nov 25 18:37:13 2020 +0000
@@ -103,7 +103,7 @@

 ExomeDepth uses read depth data to call CNVs from exome sequencing experiments. A key idea is that the test
 exome should be compared to a matched aggregate reference set. This aggregate reference set should combine
-exomes from the same batch and it should also be optimized for each exome. It will certainly diﬀer from one exome
+exomes from the same batch and it should also be optimized for each exome. It will certainly differ from one exome
 to the next.

 Importantly, ExomeDepth assumes that the CNV of interest is absent from the aggregate reference set. Hence
@@ -111,8 +111,8 @@
 common CNVs, if the call is also present in the aggregate reference. ExomeDepth is really suited to detect rare
 CNV calls (typically for rare Mendelian disorder analysis).

-The ideas used in this package are of course not speciﬁc to exome sequencing and could be applied to other
-targeted sequencing datasets, as long as they contain a suﬃciently large number of exons to estimate the parameters
+The ideas used in this package are of course not specific to exome sequencing and could be applied to other
+targeted sequencing datasets, as long as they contain a sufficiently large number of exons to estimate the parameters
 (at least 20 genes, say, but probably more would be useful). Also note that PCR based enrichment studies are often
 not well suited for this type of read depth analysis. The reason is that as the number of cycles is often set to a high
 number in order to equalize the representation of each amplicon, which can discard the CNV information.