Mercurial > repos > iuc > scpipe
comparison scpipe.R @ 2:5c4bca9dd4a2 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scpipe commit 60e2a9e9129a22924c55b11b218b39d913c7e686
author | iuc |
---|---|
date | Mon, 14 Jan 2019 08:06:47 -0500 |
parents | 32e1bfc6b7b2 |
children | 7397e6badc11 |
comparison
equal
deleted
inserted
replaced
1:4ec6717872b1 | 2:5c4bca9dd4a2 |
---|---|
16 library(scales) | 16 library(scales) |
17 library(Rtsne) | 17 library(Rtsne) |
18 }) | 18 }) |
19 | 19 |
20 option_list <- list( | 20 option_list <- list( |
21 make_option(c("-bam","--bam"), type="character", help="BAM file"), | |
21 make_option(c("-fasta","--fasta"), type="character", help="Genome fasta file"), | 22 make_option(c("-fasta","--fasta"), type="character", help="Genome fasta file"), |
22 make_option(c("-exons","--exons"), type="character", help="Exon annotation gff3 file"), | 23 make_option(c("-exons","--exons"), type="character", help="Exon annotation gff3 file"), |
24 make_option(c("-organism","--organism"), type="character", help="Organism e.g. hsapiens_gene_ensembl"), | |
23 make_option(c("-barcodes","--barcodes"), type="character", help="Cell barcodes csv file"), | 25 make_option(c("-barcodes","--barcodes"), type="character", help="Cell barcodes csv file"), |
24 make_option(c("-read1","--read1"), type="character", help="Read 1 fastq.gz"), | 26 make_option(c("-read1","--read1"), type="character", help="Read 1 fastq.gz"), |
25 make_option(c("-read2","--read2"), type="character", help="Read 2 fastq.gz"), | 27 make_option(c("-read2","--read2"), type="character", help="Read 2 fastq.gz"), |
26 make_option(c("-samplename","--samplename"), type="character", help="Name to use for sample"), | 28 make_option(c("-samplename","--samplename"), type="character", help="Name to use for sample"), |
27 make_option(c("-bs1","--bs1"), type="integer", help="Barcode start in Read 1"), | 29 make_option(c("-bs1","--bs1"), type="integer", help="Barcode start in Read 1"), |
38 make_option(c("-max_mis","--max_mis"), type="integer", help="Maximum mismatch allowed in barcode"), | 40 make_option(c("-max_mis","--max_mis"), type="integer", help="Maximum mismatch allowed in barcode"), |
39 make_option(c("-UMI_cor","--UMI_cor"), type="integer", help="Correct UMI sequence error"), | 41 make_option(c("-UMI_cor","--UMI_cor"), type="integer", help="Correct UMI sequence error"), |
40 make_option(c("-gene_fl","--gene_fl"), type="logical", help="Remove low abundant genes"), | 42 make_option(c("-gene_fl","--gene_fl"), type="logical", help="Remove low abundant genes"), |
41 make_option(c("-max_reads","--max_reads"), type="integer", help="Maximum reads processed"), | 43 make_option(c("-max_reads","--max_reads"), type="integer", help="Maximum reads processed"), |
42 make_option(c("-min_count","--min_count"), type="integer", help="Minimum count to keep"), | 44 make_option(c("-min_count","--min_count"), type="integer", help="Minimum count to keep"), |
45 make_option(c("-metrics_matrix","--metrics_matrix"), type="logical", help="QC metrics matrix"), | |
46 make_option(c("-keep_outliers","--keep_outliers"), type="logical", help="Keep outlier cells"), | |
43 make_option(c("-report","--report"), type="logical", help="HTML report of plots"), | 47 make_option(c("-report","--report"), type="logical", help="HTML report of plots"), |
44 make_option(c("-rdata","--rdata"), type="logical", help="Output RData file"), | 48 make_option(c("-rdata","--rdata"), type="logical", help="Output RData file"), |
45 make_option(c("-nthreads","--nthreads"), type="integer", help="Number of threads") | 49 make_option(c("-nthreads","--nthreads"), type="integer", help="Number of threads") |
46 ) | 50 ) |
47 | 51 |
48 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) | 52 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) |
49 args = parse_args(parser) | 53 args = parse_args(parser) |
50 | 54 |
55 bam = args$bam | |
51 fa_fn = args$fasta | 56 fa_fn = args$fasta |
52 anno_fn = args$exons | 57 anno_fn = args$exons |
53 fq_R1 = args$read1 | 58 fq_R1 = args$read1 |
54 fq_R2 = args$read2 | 59 fq_R2 = args$read2 |
55 read_structure = list( | 60 read_structure = list( |
69 | 74 |
70 filter_settings=list(rmlow=args$rmlow, rmN=args$rmN, minq=args$minq, numbq=args$numbq) | 75 filter_settings=list(rmlow=args$rmlow, rmN=args$rmN, minq=args$minq, numbq=args$numbq) |
71 | 76 |
72 # Outputs | 77 # Outputs |
73 out_dir = "." | 78 out_dir = "." |
74 fasta_index = file.path(out_dir, paste0(fa_fn, ".fasta_index")) | |
75 combined_fastq = file.path(out_dir, "combined.fastq") | |
76 aligned_bam = file.path(out_dir, "aligned.bam") | |
77 mapped_bam = file.path(out_dir, "aligned.mapped.bam") | 79 mapped_bam = file.path(out_dir, "aligned.mapped.bam") |
78 | 80 |
81 # if input is fastqs | |
82 if (!is.null(fa_fn)) { | |
83 fasta_index = file.path(out_dir, paste0(fa_fn, ".fasta_index")) | |
84 combined_fastq = file.path(out_dir, "combined.fastq") | |
85 aligned_bam = file.path(out_dir, "aligned.bam") | |
79 | 86 |
80 print("Trimming barcodes") | 87 print("Trimming barcodes") |
81 sc_trim_barcode(combined_fastq, | 88 sc_trim_barcode(combined_fastq, |
82 fq_R1, | 89 fq_R1, |
83 fq_R2, | 90 fq_R2, |
84 read_structure=read_structure, | 91 read_structure=read_structure, |
85 filter_settings=filter_settings) | 92 filter_settings=filter_settings) |
86 | 93 |
87 print("Building genome index") | 94 print("Building genome index") |
88 Rsubread::buildindex(basename=fasta_index, reference=fa_fn) | 95 Rsubread::buildindex(basename=fasta_index, reference=fa_fn) |
89 | 96 |
90 print("Aligning reads to genome") | 97 print("Aligning reads to genome") |
91 Rsubread::align(index=fasta_index, | 98 Rsubread::align(index=fasta_index, |
92 readfile1=combined_fastq, | 99 readfile1=combined_fastq, |
93 output_file=aligned_bam, | 100 output_file=aligned_bam, |
94 nthreads=args$nthreads) | 101 nthreads=args$nthreads) |
95 | 102 |
96 if (!is.null(args$barcodes)) { | 103 if (!is.null(args$barcodes)) { |
97 barcode_anno=args$barcodes | 104 barcode_anno=args$barcodes |
105 } else { | |
106 print("Detecting barcodes") | |
107 # detect 10X barcodes and generate sample_index.csv file | |
108 barcode_anno = "sample_index.csv" | |
109 sc_detect_bc( | |
110 infq=combined_fastq, | |
111 outcsv=barcode_anno, | |
112 bc_len=read_structure$bl2, | |
113 max_reads=args$max_reads, | |
114 min_count=args$min_count, | |
115 max_mismatch=args$max_mis | |
116 ) | |
117 } | |
98 } else { | 118 } else { |
99 print("Detecting barcodes") | 119 aligned_bam = file.path(out_dir, bam) |
100 # detect 10X barcodes and generate sample_index.csv file | 120 barcode_anno=args$barcodes |
101 barcode_anno = "sample_index.csv" | |
102 sc_detect_bc( | |
103 infq=combined_fastq, | |
104 outcsv=barcode_anno, | |
105 bc_len=read_structure$bl2, | |
106 max_reads=args$max_reads, | |
107 min_count=args$min_count, | |
108 max_mismatch=args$max_mis | |
109 ) | |
110 } | 121 } |
111 | 122 |
112 print("Assigning reads to exons") | 123 print("Assigning reads to exons") |
113 sc_exon_mapping(aligned_bam, mapped_bam, anno_fn, bc_len=read_structure$bl2, UMI_len=read_structure$ul, stnd=args$stnd) | 124 sc_exon_mapping(aligned_bam, mapped_bam, anno_fn, bc_len=read_structure$bl2, UMI_len=read_structure$ul, stnd=args$stnd) |
114 | 125 |
116 sc_demultiplex(mapped_bam, out_dir, barcode_anno, has_UMI=has_umi) | 127 sc_demultiplex(mapped_bam, out_dir, barcode_anno, has_UMI=has_umi) |
117 | 128 |
118 print("Counting genes") | 129 print("Counting genes") |
119 sc_gene_counting(out_dir, barcode_anno, UMI_cor=args$UMI_cor, gene_fl=args$gene_fl) | 130 sc_gene_counting(out_dir, barcode_anno, UMI_cor=args$UMI_cor, gene_fl=args$gene_fl) |
120 | 131 |
121 print("Creating SingleCellExperiment object") | 132 print("Performing QC") |
122 sce <- create_sce_by_dir(out_dir) | 133 sce <- create_sce_by_dir(out_dir) |
134 pdf("plots.pdf") | |
135 plot_demultiplex(sce) | |
136 if (has_umi) { | |
137 p = plot_UMI_dup(sce) | |
138 print(p) | |
139 } | |
140 sce = calculate_QC_metrics(sce) | |
141 sce = detect_outlier(sce) | |
142 p = plot_mapping(sce, percentage=TRUE, dataname=args$samplename) | |
143 print(p) | |
144 p = plot_QC_pairs(sce) | |
145 print(p) | |
146 dev.off() | |
123 | 147 |
124 if (!is.null(args$report)) { | 148 print("Removing outliers") |
125 print("Creating report") | 149 if (is.null(args$keep_outliers)) { |
126 create_report(sample_name=args$samplename, | 150 sce = remove_outliers(sce) |
127 outdir=out_dir, | 151 gene_counts <- counts(sce) |
128 r1=fq_R1, | 152 write.table(data.frame("gene_id"=rownames(gene_counts), gene_counts), file="gene_count.tsv", sep="\t", quote=FALSE, row.names=FALSE) |
129 r2=fq_R2, | 153 } |
130 outfq=combined_fastq, | 154 |
131 read_structure=read_structure, | 155 if (!is.null(args$metrics_matrix)) { |
132 filter_settings=filter_settings, | 156 metrics <- colData(sce, internal=TRUE) |
133 align_bam=aligned_bam, | 157 write.table(data.frame("cell_id"=rownames(metrics), metrics), file="metrics_matrix.tsv", sep="\t", quote=FALSE, row.names=FALSE) |
134 genome_index=fasta_index, | 158 } |
135 map_bam=mapped_bam, | 159 |
136 exon_anno=anno_fn, | 160 if (!is.null(args$report) & (!is.null(fa_fn))) { |
137 stnd=args$stnd, | 161 print("Creating report") |
138 fix_chr=FALSE, | 162 create_report(sample_name=args$samplename, |
139 barcode_anno=barcode_anno, | 163 outdir=out_dir, |
140 max_mis=args$max_mis, | 164 r1=fq_R1, |
141 UMI_cor=args$UMI_cor, | 165 r2=fq_R2, |
142 gene_fl=args$gene_fl) | 166 outfq=combined_fastq, |
167 read_structure=read_structure, | |
168 filter_settings=filter_settings, | |
169 align_bam=aligned_bam, | |
170 genome_index=fasta_index, | |
171 map_bam=mapped_bam, | |
172 exon_anno=anno_fn, | |
173 stnd=args$stnd, | |
174 fix_chr=FALSE, | |
175 barcode_anno=barcode_anno, | |
176 max_mis=args$max_mis, | |
177 UMI_cor=args$UMI_cor, | |
178 gene_fl=args$gene_fl, | |
179 organism=args$organism, | |
180 gene_id_type="ensembl_gene_id") | |
143 } | 181 } |
144 | 182 |
145 if (!is.null(args$rdata) ) { | 183 if (!is.null(args$rdata) ) { |
146 save(sce, file = file.path(out_dir,"scPipe_analysis.RData")) | 184 save(sce, file = file.path(out_dir,"scPipe_analysis.RData")) |
147 } | 185 } |
148 | 186 |
149 sessionInfo() | 187 sessionInfo() |