comparison scpipe.R @ 3:7397e6badc11 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scpipe commit 8007f71281553ddfa45e6f8e1172952d956bb000"
author iuc
date Thu, 11 Jun 2020 07:18:37 -0400
parents 5c4bca9dd4a2
children
comparison
equal deleted inserted replaced
2:5c4bca9dd4a2 3:7397e6badc11
1 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) 1 err_foo <- function() {
2 cat(geterrmessage(), file = stderr());
3 q("no", 1, F)
4 }
5 options(show.error.messages = F, error = err_foo)
2 6
3 # we need that to not crash galaxy with an UTF8 error on German LC settings. 7 # we need that to not crash galaxy with an UTF8 error on German LC settings.
4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") 8 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
5 9
6 suppressPackageStartupMessages({ 10 suppressPackageStartupMessages({
16 library(scales) 20 library(scales)
17 library(Rtsne) 21 library(Rtsne)
18 }) 22 })
19 23
20 option_list <- list( 24 option_list <- list(
21 make_option(c("-bam","--bam"), type="character", help="BAM file"), 25 make_option(c("-bam", "--bam"), type = "character", help = "BAM file"),
22 make_option(c("-fasta","--fasta"), type="character", help="Genome fasta file"), 26 make_option(c("-fasta", "--fasta"), type = "character", help = "Genome fasta file"),
23 make_option(c("-exons","--exons"), type="character", help="Exon annotation gff3 file"), 27 make_option(c("-exons", "--exons"), type = "character", help = "Exon annotation gff3 file"),
24 make_option(c("-organism","--organism"), type="character", help="Organism e.g. hsapiens_gene_ensembl"), 28 make_option(c("-organism", "--organism"), type = "character", help = "Organism e.g. hsapiens_gene_ensembl"),
25 make_option(c("-barcodes","--barcodes"), type="character", help="Cell barcodes csv file"), 29 make_option(c("-barcodes", "--barcodes"), type = "character", help = "Cell barcodes csv file"),
26 make_option(c("-read1","--read1"), type="character", help="Read 1 fastq.gz"), 30 make_option(c("-read1", "--read1"), type = "character", help = "Read 1 fastq.gz"),
27 make_option(c("-read2","--read2"), type="character", help="Read 2 fastq.gz"), 31 make_option(c("-read2", "--read2"), type = "character", help = "Read 2 fastq.gz"),
28 make_option(c("-samplename","--samplename"), type="character", help="Name to use for sample"), 32 make_option(c("-samplename", "--samplename"), type = "character", help = "Name to use for sample"),
29 make_option(c("-bs1","--bs1"), type="integer", help="Barcode start in Read 1"), 33 make_option(c("-bs1", "--bs1"), type = "integer", help = "Barcode start in Read 1"),
30 make_option(c("-bl1","--bl1"), type="integer", help="Barcode length in Read 1"), 34 make_option(c("-bl1", "--bl1"), type = "integer", help = "Barcode length in Read 1"),
31 make_option(c("-bs2","--bs2"), type="integer", help="Barcode start in Read 2"), 35 make_option(c("-bs2", "--bs2"), type = "integer", help = "Barcode start in Read 2"),
32 make_option(c("-bl2","--bl2"), type="integer", help="Barcode length in Read 2"), 36 make_option(c("-bl2", "--bl2"), type = "integer", help = "Barcode length in Read 2"),
33 make_option(c("-us","--us"), type="integer", help="UMI start in Read 2"), 37 make_option(c("-us", "--us"), type = "integer", help = "UMI start in Read 2"),
34 make_option(c("-ul","--ul"), type="integer", help="UMI length in Read 2"), 38 make_option(c("-ul", "--ul"), type = "integer", help = "UMI length in Read 2"),
35 make_option(c("-rmlow","--rmlow"), type="logical", help="Remove reads with N in barcode or UMI"), 39 make_option(c("-rmlow", "--rmlow"), type = "logical", help = "Remove reads with N in barcode or UMI"),
36 make_option(c("-rmN","--rmN"), type="logical", help="Remove reads with low quality"), 40 make_option(c("-rmN", "--rmN"), type = "logical", help = "Remove reads with low quality"),
37 make_option(c("-minq","--minq"), type="integer", help="Minimum read quality"), 41 make_option(c("-minq", "--minq"), type = "integer", help = "Minimum read quality"),
38 make_option(c("-numbq","--numbq"), type="integer", help="Maximum number of bases below minq"), 42 make_option(c("-numbq", "--numbq"), type = "integer", help = "Maximum number of bases below minq"),
39 make_option(c("-stnd","--stnd"), type="logical", help="Perform strand-specific mapping"), 43 make_option(c("-stnd", "--stnd"), type = "logical", help = "Perform strand-specific mapping"),
40 make_option(c("-max_mis","--max_mis"), type="integer", help="Maximum mismatch allowed in barcode"), 44 make_option(c("-max_mis", "--max_mis"), type = "integer", help = "Maximum mismatch allowed in barcode"),
41 make_option(c("-UMI_cor","--UMI_cor"), type="integer", help="Correct UMI sequence error"), 45 make_option(c("-UMI_cor", "--UMI_cor"), type = "integer", help = "Correct UMI sequence error"),
42 make_option(c("-gene_fl","--gene_fl"), type="logical", help="Remove low abundant genes"), 46 make_option(c("-gene_fl", "--gene_fl"), type = "logical", help = "Remove low abundant genes"),
43 make_option(c("-max_reads","--max_reads"), type="integer", help="Maximum reads processed"), 47 make_option(c("-max_reads", "--max_reads"), type = "integer", help = "Maximum reads processed"),
44 make_option(c("-min_count","--min_count"), type="integer", help="Minimum count to keep"), 48 make_option(c("-min_count", "--min_count"), type = "integer", help = "Minimum count to keep"),
45 make_option(c("-metrics_matrix","--metrics_matrix"), type="logical", help="QC metrics matrix"), 49 make_option(c("-metrics_matrix", "--metrics_matrix"), type = "logical", help = "QC metrics matrix"),
46 make_option(c("-keep_outliers","--keep_outliers"), type="logical", help="Keep outlier cells"), 50 make_option(c("-keep_outliers", "--keep_outliers"), type = "logical", help = "Keep outlier cells"),
47 make_option(c("-report","--report"), type="logical", help="HTML report of plots"), 51 make_option(c("-report", "--report"), type = "logical", help = "HTML report of plots"),
48 make_option(c("-rdata","--rdata"), type="logical", help="Output RData file"), 52 make_option(c("-rdata", "--rdata"), type = "logical", help = "Output RData file"),
49 make_option(c("-nthreads","--nthreads"), type="integer", help="Number of threads") 53 make_option(c("-nthreads", "--nthreads"), type = "integer", help = "Number of threads")
50 )
51
52 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
53 args = parse_args(parser)
54
55 bam = args$bam
56 fa_fn = args$fasta
57 anno_fn = args$exons
58 fq_R1 = args$read1
59 fq_R2 = args$read2
60 read_structure = list(
61 bs1 = args$bs1, # barcode start position in fq_R1, -1 indicates no barcode
62 bl1 = args$bl1, # barcode length in fq_R1, 0 since no barcode present
63 bs2 = args$bs2, # barcode start position in fq_R2
64 bl2 = args$bl2, # barcode length in fq_R2
65 us = args$us, # UMI start position in fq_R2
66 ul = args$ul # UMI length
67 ) 54 )
68 55
56 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list)
57 args <- parse_args(parser)
58
59 bam <- args$bam
60 fa_fn <- args$fasta
61 anno_fn <- args$exons
62 fq_r1 <- args$read1
63 fq_r2 <- args$read2
64 read_structure <- list(bs1 = args$bs1, # barcode start position in fq_r1, -1 indicates no barcode
65 bl1 = args$bl1, # barcode length in fq_r1, 0 since no barcode present
66 bs2 = args$bs2, # barcode start position in fq_r2
67 bl2 = args$bl2, # barcode length in fq_r2
68 us = args$us, # UMI start position in fq_r2
69 ul = args$ul # UMI length
70 )
71
69 if (args$us == -1) { 72 if (args$us == -1) {
70 has_umi = FALSE 73 has_umi <- FALSE
71 } else { 74 } else {
72 has_umi = TRUE 75 has_umi <- TRUE
73 } 76 }
74 77
75 filter_settings=list(rmlow=args$rmlow, rmN=args$rmN, minq=args$minq, numbq=args$numbq) 78 filter_settings <- list(rmlow = args$rmlow,
79 rmN = args$rmN,
80 minq = args$minq,
81 numbq = args$numbq)
76 82
77 # Outputs 83 # Outputs
78 out_dir = "." 84 out_dir <- "."
79 mapped_bam = file.path(out_dir, "aligned.mapped.bam") 85 mapped_bam <- file.path(out_dir, "aligned.mapped.bam")
80 86
81 # if input is fastqs 87 # if input is fastqs
82 if (!is.null(fa_fn)) { 88 if (!is.null(fa_fn)) {
83 fasta_index = file.path(out_dir, paste0(fa_fn, ".fasta_index")) 89 fasta_index <- file.path(out_dir, paste0(fa_fn, ".fasta_index"))
84 combined_fastq = file.path(out_dir, "combined.fastq") 90 combined_fastq <- file.path(out_dir, "combined.fastq")
85 aligned_bam = file.path(out_dir, "aligned.bam") 91 aligned_bam <- file.path(out_dir, "aligned.bam")
86 92
87 print("Trimming barcodes") 93 print("Trimming barcodes")
88 sc_trim_barcode(combined_fastq, 94 sc_trim_barcode(combined_fastq,
89 fq_R1, 95 fq_r1,
90 fq_R2, 96 fq_r2,
91 read_structure=read_structure, 97 read_structure = read_structure,
92 filter_settings=filter_settings) 98 filter_settings = filter_settings)
93 99
94 print("Building genome index") 100 print("Building genome index")
95 Rsubread::buildindex(basename=fasta_index, reference=fa_fn) 101 Rsubread::buildindex(basename = fasta_index, reference = fa_fn)
96 102
97 print("Aligning reads to genome") 103 print("Aligning reads to genome")
98 Rsubread::align(index=fasta_index, 104 Rsubread::align(index = fasta_index,
99 readfile1=combined_fastq, 105 readfile1 = combined_fastq,
100 output_file=aligned_bam, 106 output_file = aligned_bam,
101 nthreads=args$nthreads) 107 nthreads = args$nthreads)
102 108
103 if (!is.null(args$barcodes)) { 109 if (!is.null(args$barcodes)) {
104 barcode_anno=args$barcodes 110 barcode_anno <- args$barcodes
105 } else { 111 } else {
106 print("Detecting barcodes") 112 print("Detecting barcodes")
107 # detect 10X barcodes and generate sample_index.csv file 113 # detect 10X barcodes and generate sample_index.csv file
108 barcode_anno = "sample_index.csv" 114 barcode_anno <- "sample_index.csv"
109 sc_detect_bc( 115 sc_detect_bc(infq = combined_fastq,
110 infq=combined_fastq, 116 outcsv = barcode_anno,
111 outcsv=barcode_anno, 117 bc_len = read_structure$bl2,
112 bc_len=read_structure$bl2, 118 max_reads = args$max_reads,
113 max_reads=args$max_reads, 119 min_count = args$min_count,
114 min_count=args$min_count, 120 max_mismatch = args$max_mis
115 max_mismatch=args$max_mis
116 ) 121 )
117 } 122 }
118 } else { 123 } else {
119 aligned_bam = file.path(out_dir, bam) 124 aligned_bam <- file.path(out_dir, bam)
120 barcode_anno=args$barcodes 125 barcode_anno <- args$barcodes
121 } 126 }
122 127
123 print("Assigning reads to exons") 128 print("Assigning reads to exons")
124 sc_exon_mapping(aligned_bam, mapped_bam, anno_fn, bc_len=read_structure$bl2, UMI_len=read_structure$ul, stnd=args$stnd) 129 sc_exon_mapping(aligned_bam, mapped_bam, anno_fn, bc_len = read_structure$bl2, UMI_len = read_structure$ul, stnd = args$stnd)
125 130
126 print("De-multiplexing data") 131 print("De-multiplexing data")
127 sc_demultiplex(mapped_bam, out_dir, barcode_anno, has_UMI=has_umi) 132 sc_demultiplex(mapped_bam, out_dir, barcode_anno, has_UMI = has_umi)
128 133
129 print("Counting genes") 134 print("Counting genes")
130 sc_gene_counting(out_dir, barcode_anno, UMI_cor=args$UMI_cor, gene_fl=args$gene_fl) 135 sc_gene_counting(out_dir, barcode_anno, UMI_cor = args$UMI_cor, gene_fl = args$gene_fl)
131 136
132 print("Performing QC") 137 print("Performing QC")
133 sce <- create_sce_by_dir(out_dir) 138 sce <- create_sce_by_dir(out_dir)
134 pdf("plots.pdf") 139 pdf("plots.pdf")
135 plot_demultiplex(sce) 140 plot_demultiplex(sce)
136 if (has_umi) { 141 if (has_umi) {
137 p = plot_UMI_dup(sce) 142 p <- plot_UMI_dup(sce)
138 print(p) 143 print(p)
139 } 144 }
140 sce = calculate_QC_metrics(sce) 145 sce <- calculate_QC_metrics(sce)
141 sce = detect_outlier(sce) 146 sce <- detect_outlier(sce)
142 p = plot_mapping(sce, percentage=TRUE, dataname=args$samplename) 147 p <- plot_mapping(sce, percentage = TRUE, dataname = args$samplename)
143 print(p) 148 print(p)
144 p = plot_QC_pairs(sce) 149 p <- plot_QC_pairs(sce)
145 print(p) 150 print(p)
146 dev.off() 151 dev.off()
147 152
148 print("Removing outliers") 153 print("Removing outliers")
149 if (is.null(args$keep_outliers)) { 154 if (is.null(args$keep_outliers)) {
150 sce = remove_outliers(sce) 155 sce <- remove_outliers(sce)
151 gene_counts <- counts(sce) 156 gene_counts <- counts(sce)
152 write.table(data.frame("gene_id"=rownames(gene_counts), gene_counts), file="gene_count.tsv", sep="\t", quote=FALSE, row.names=FALSE) 157 write.table(data.frame("gene_id" = rownames(gene_counts), gene_counts), file = "gene_count.tsv", sep = "\t", quote = FALSE, row.names = FALSE)
153 } 158 }
154 159
155 if (!is.null(args$metrics_matrix)) { 160 if (!is.null(args$metrics_matrix)) {
156 metrics <- colData(sce, internal=TRUE) 161 metrics <- colData(sce, internal = TRUE)
157 write.table(data.frame("cell_id"=rownames(metrics), metrics), file="metrics_matrix.tsv", sep="\t", quote=FALSE, row.names=FALSE) 162 write.table(data.frame("cell_id" = rownames(metrics), metrics), file = "metrics_matrix.tsv", sep = "\t", quote = FALSE, row.names = FALSE)
158 } 163 }
159 164
160 if (!is.null(args$report) & (!is.null(fa_fn))) { 165 if (!is.null(args$report) & (!is.null(fa_fn))) {
161 print("Creating report") 166 print("Creating report")
162 create_report(sample_name=args$samplename, 167 create_report(sample_name = args$samplename,
163 outdir=out_dir, 168 outdir = out_dir,
164 r1=fq_R1, 169 r1 = fq_r1,
165 r2=fq_R2, 170 r2 = fq_r2,
166 outfq=combined_fastq, 171 outfq = combined_fastq,
167 read_structure=read_structure, 172 read_structure = read_structure,
168 filter_settings=filter_settings, 173 filter_settings = filter_settings,
169 align_bam=aligned_bam, 174 align_bam = aligned_bam,
170 genome_index=fasta_index, 175 genome_index = fasta_index,
171 map_bam=mapped_bam, 176 map_bam = mapped_bam,
172 exon_anno=anno_fn, 177 exon_anno = anno_fn,
173 stnd=args$stnd, 178 stnd = args$stnd,
174 fix_chr=FALSE, 179 fix_chr = FALSE,
175 barcode_anno=barcode_anno, 180 barcode_anno = barcode_anno,
176 max_mis=args$max_mis, 181 max_mis = args$max_mis,
177 UMI_cor=args$UMI_cor, 182 UMI_cor = args$UMI_cor,
178 gene_fl=args$gene_fl, 183 gene_fl = args$gene_fl,
179 organism=args$organism, 184 organism = args$organism,
180 gene_id_type="ensembl_gene_id") 185 gene_id_type = "ensembl_gene_id")
181 } 186 }
182 187
183 if (!is.null(args$rdata) ) { 188 if (!is.null(args$rdata)) {
184 save(sce, file = file.path(out_dir,"scPipe_analysis.RData")) 189 save(sce, file = file.path(out_dir, "scPipe_analysis.RData"))
185 } 190 }
186 191
187 sessionInfo() 192 sessionInfo()