comparison scpipe.R @ 2:5c4bca9dd4a2 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scpipe commit 60e2a9e9129a22924c55b11b218b39d913c7e686
author iuc
date Mon, 14 Jan 2019 08:06:47 -0500
parents 32e1bfc6b7b2
children 7397e6badc11
comparison
equal deleted inserted replaced
1:4ec6717872b1 2:5c4bca9dd4a2
16 library(scales) 16 library(scales)
17 library(Rtsne) 17 library(Rtsne)
18 }) 18 })
19 19
20 option_list <- list( 20 option_list <- list(
21 make_option(c("-bam","--bam"), type="character", help="BAM file"),
21 make_option(c("-fasta","--fasta"), type="character", help="Genome fasta file"), 22 make_option(c("-fasta","--fasta"), type="character", help="Genome fasta file"),
22 make_option(c("-exons","--exons"), type="character", help="Exon annotation gff3 file"), 23 make_option(c("-exons","--exons"), type="character", help="Exon annotation gff3 file"),
24 make_option(c("-organism","--organism"), type="character", help="Organism e.g. hsapiens_gene_ensembl"),
23 make_option(c("-barcodes","--barcodes"), type="character", help="Cell barcodes csv file"), 25 make_option(c("-barcodes","--barcodes"), type="character", help="Cell barcodes csv file"),
24 make_option(c("-read1","--read1"), type="character", help="Read 1 fastq.gz"), 26 make_option(c("-read1","--read1"), type="character", help="Read 1 fastq.gz"),
25 make_option(c("-read2","--read2"), type="character", help="Read 2 fastq.gz"), 27 make_option(c("-read2","--read2"), type="character", help="Read 2 fastq.gz"),
26 make_option(c("-samplename","--samplename"), type="character", help="Name to use for sample"), 28 make_option(c("-samplename","--samplename"), type="character", help="Name to use for sample"),
27 make_option(c("-bs1","--bs1"), type="integer", help="Barcode start in Read 1"), 29 make_option(c("-bs1","--bs1"), type="integer", help="Barcode start in Read 1"),
38 make_option(c("-max_mis","--max_mis"), type="integer", help="Maximum mismatch allowed in barcode"), 40 make_option(c("-max_mis","--max_mis"), type="integer", help="Maximum mismatch allowed in barcode"),
39 make_option(c("-UMI_cor","--UMI_cor"), type="integer", help="Correct UMI sequence error"), 41 make_option(c("-UMI_cor","--UMI_cor"), type="integer", help="Correct UMI sequence error"),
40 make_option(c("-gene_fl","--gene_fl"), type="logical", help="Remove low abundant genes"), 42 make_option(c("-gene_fl","--gene_fl"), type="logical", help="Remove low abundant genes"),
41 make_option(c("-max_reads","--max_reads"), type="integer", help="Maximum reads processed"), 43 make_option(c("-max_reads","--max_reads"), type="integer", help="Maximum reads processed"),
42 make_option(c("-min_count","--min_count"), type="integer", help="Minimum count to keep"), 44 make_option(c("-min_count","--min_count"), type="integer", help="Minimum count to keep"),
45 make_option(c("-metrics_matrix","--metrics_matrix"), type="logical", help="QC metrics matrix"),
46 make_option(c("-keep_outliers","--keep_outliers"), type="logical", help="Keep outlier cells"),
43 make_option(c("-report","--report"), type="logical", help="HTML report of plots"), 47 make_option(c("-report","--report"), type="logical", help="HTML report of plots"),
44 make_option(c("-rdata","--rdata"), type="logical", help="Output RData file"), 48 make_option(c("-rdata","--rdata"), type="logical", help="Output RData file"),
45 make_option(c("-nthreads","--nthreads"), type="integer", help="Number of threads") 49 make_option(c("-nthreads","--nthreads"), type="integer", help="Number of threads")
46 ) 50 )
47 51
48 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) 52 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
49 args = parse_args(parser) 53 args = parse_args(parser)
50 54
55 bam = args$bam
51 fa_fn = args$fasta 56 fa_fn = args$fasta
52 anno_fn = args$exons 57 anno_fn = args$exons
53 fq_R1 = args$read1 58 fq_R1 = args$read1
54 fq_R2 = args$read2 59 fq_R2 = args$read2
55 read_structure = list( 60 read_structure = list(
69 74
70 filter_settings=list(rmlow=args$rmlow, rmN=args$rmN, minq=args$minq, numbq=args$numbq) 75 filter_settings=list(rmlow=args$rmlow, rmN=args$rmN, minq=args$minq, numbq=args$numbq)
71 76
72 # Outputs 77 # Outputs
73 out_dir = "." 78 out_dir = "."
74 fasta_index = file.path(out_dir, paste0(fa_fn, ".fasta_index"))
75 combined_fastq = file.path(out_dir, "combined.fastq")
76 aligned_bam = file.path(out_dir, "aligned.bam")
77 mapped_bam = file.path(out_dir, "aligned.mapped.bam") 79 mapped_bam = file.path(out_dir, "aligned.mapped.bam")
78 80
81 # if input is fastqs
82 if (!is.null(fa_fn)) {
83 fasta_index = file.path(out_dir, paste0(fa_fn, ".fasta_index"))
84 combined_fastq = file.path(out_dir, "combined.fastq")
85 aligned_bam = file.path(out_dir, "aligned.bam")
79 86
80 print("Trimming barcodes") 87 print("Trimming barcodes")
81 sc_trim_barcode(combined_fastq, 88 sc_trim_barcode(combined_fastq,
82 fq_R1, 89 fq_R1,
83 fq_R2, 90 fq_R2,
84 read_structure=read_structure, 91 read_structure=read_structure,
85 filter_settings=filter_settings) 92 filter_settings=filter_settings)
86 93
87 print("Building genome index") 94 print("Building genome index")
88 Rsubread::buildindex(basename=fasta_index, reference=fa_fn) 95 Rsubread::buildindex(basename=fasta_index, reference=fa_fn)
89 96
90 print("Aligning reads to genome") 97 print("Aligning reads to genome")
91 Rsubread::align(index=fasta_index, 98 Rsubread::align(index=fasta_index,
92 readfile1=combined_fastq, 99 readfile1=combined_fastq,
93 output_file=aligned_bam, 100 output_file=aligned_bam,
94 nthreads=args$nthreads) 101 nthreads=args$nthreads)
95 102
96 if (!is.null(args$barcodes)) { 103 if (!is.null(args$barcodes)) {
97 barcode_anno=args$barcodes 104 barcode_anno=args$barcodes
105 } else {
106 print("Detecting barcodes")
107 # detect 10X barcodes and generate sample_index.csv file
108 barcode_anno = "sample_index.csv"
109 sc_detect_bc(
110 infq=combined_fastq,
111 outcsv=barcode_anno,
112 bc_len=read_structure$bl2,
113 max_reads=args$max_reads,
114 min_count=args$min_count,
115 max_mismatch=args$max_mis
116 )
117 }
98 } else { 118 } else {
99 print("Detecting barcodes") 119 aligned_bam = file.path(out_dir, bam)
100 # detect 10X barcodes and generate sample_index.csv file 120 barcode_anno=args$barcodes
101 barcode_anno = "sample_index.csv"
102 sc_detect_bc(
103 infq=combined_fastq,
104 outcsv=barcode_anno,
105 bc_len=read_structure$bl2,
106 max_reads=args$max_reads,
107 min_count=args$min_count,
108 max_mismatch=args$max_mis
109 )
110 } 121 }
111 122
112 print("Assigning reads to exons") 123 print("Assigning reads to exons")
113 sc_exon_mapping(aligned_bam, mapped_bam, anno_fn, bc_len=read_structure$bl2, UMI_len=read_structure$ul, stnd=args$stnd) 124 sc_exon_mapping(aligned_bam, mapped_bam, anno_fn, bc_len=read_structure$bl2, UMI_len=read_structure$ul, stnd=args$stnd)
114 125
116 sc_demultiplex(mapped_bam, out_dir, barcode_anno, has_UMI=has_umi) 127 sc_demultiplex(mapped_bam, out_dir, barcode_anno, has_UMI=has_umi)
117 128
118 print("Counting genes") 129 print("Counting genes")
119 sc_gene_counting(out_dir, barcode_anno, UMI_cor=args$UMI_cor, gene_fl=args$gene_fl) 130 sc_gene_counting(out_dir, barcode_anno, UMI_cor=args$UMI_cor, gene_fl=args$gene_fl)
120 131
121 print("Creating SingleCellExperiment object") 132 print("Performing QC")
122 sce <- create_sce_by_dir(out_dir) 133 sce <- create_sce_by_dir(out_dir)
134 pdf("plots.pdf")
135 plot_demultiplex(sce)
136 if (has_umi) {
137 p = plot_UMI_dup(sce)
138 print(p)
139 }
140 sce = calculate_QC_metrics(sce)
141 sce = detect_outlier(sce)
142 p = plot_mapping(sce, percentage=TRUE, dataname=args$samplename)
143 print(p)
144 p = plot_QC_pairs(sce)
145 print(p)
146 dev.off()
123 147
124 if (!is.null(args$report)) { 148 print("Removing outliers")
125 print("Creating report") 149 if (is.null(args$keep_outliers)) {
126 create_report(sample_name=args$samplename, 150 sce = remove_outliers(sce)
127 outdir=out_dir, 151 gene_counts <- counts(sce)
128 r1=fq_R1, 152 write.table(data.frame("gene_id"=rownames(gene_counts), gene_counts), file="gene_count.tsv", sep="\t", quote=FALSE, row.names=FALSE)
129 r2=fq_R2, 153 }
130 outfq=combined_fastq, 154
131 read_structure=read_structure, 155 if (!is.null(args$metrics_matrix)) {
132 filter_settings=filter_settings, 156 metrics <- colData(sce, internal=TRUE)
133 align_bam=aligned_bam, 157 write.table(data.frame("cell_id"=rownames(metrics), metrics), file="metrics_matrix.tsv", sep="\t", quote=FALSE, row.names=FALSE)
134 genome_index=fasta_index, 158 }
135 map_bam=mapped_bam, 159
136 exon_anno=anno_fn, 160 if (!is.null(args$report) & (!is.null(fa_fn))) {
137 stnd=args$stnd, 161 print("Creating report")
138 fix_chr=FALSE, 162 create_report(sample_name=args$samplename,
139 barcode_anno=barcode_anno, 163 outdir=out_dir,
140 max_mis=args$max_mis, 164 r1=fq_R1,
141 UMI_cor=args$UMI_cor, 165 r2=fq_R2,
142 gene_fl=args$gene_fl) 166 outfq=combined_fastq,
167 read_structure=read_structure,
168 filter_settings=filter_settings,
169 align_bam=aligned_bam,
170 genome_index=fasta_index,
171 map_bam=mapped_bam,
172 exon_anno=anno_fn,
173 stnd=args$stnd,
174 fix_chr=FALSE,
175 barcode_anno=barcode_anno,
176 max_mis=args$max_mis,
177 UMI_cor=args$UMI_cor,
178 gene_fl=args$gene_fl,
179 organism=args$organism,
180 gene_id_type="ensembl_gene_id")
143 } 181 }
144 182
145 if (!is.null(args$rdata) ) { 183 if (!is.null(args$rdata) ) {
146 save(sce, file = file.path(out_dir,"scPipe_analysis.RData")) 184 save(sce, file = file.path(out_dir,"scPipe_analysis.RData"))
147 } 185 }
148 186
149 sessionInfo() 187 sessionInfo()