Mercurial > repos > iuc > scpipe
comparison scpipe.R @ 0:32e1bfc6b7b2 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scpipe commit 8908da9cdd112ae0943dbf1eccb221e84cd99ca7
author | iuc |
---|---|
date | Wed, 15 Aug 2018 13:54:40 -0400 |
parents | |
children | 5c4bca9dd4a2 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:32e1bfc6b7b2 |
---|---|
1 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | |
2 | |
3 # we need that to not crash galaxy with an UTF8 error on German LC settings. | |
4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
5 | |
6 suppressPackageStartupMessages({ | |
7 library(scPipe) | |
8 library(SingleCellExperiment) | |
9 library(optparse) | |
10 library(readr) | |
11 library(ggplot2) | |
12 library(plotly) | |
13 library(DT) | |
14 library(scater) | |
15 library(scran) | |
16 library(scales) | |
17 library(Rtsne) | |
18 }) | |
19 | |
20 option_list <- list( | |
21 make_option(c("-fasta","--fasta"), type="character", help="Genome fasta file"), | |
22 make_option(c("-exons","--exons"), type="character", help="Exon annotation gff3 file"), | |
23 make_option(c("-barcodes","--barcodes"), type="character", help="Cell barcodes csv file"), | |
24 make_option(c("-read1","--read1"), type="character", help="Read 1 fastq.gz"), | |
25 make_option(c("-read2","--read2"), type="character", help="Read 2 fastq.gz"), | |
26 make_option(c("-samplename","--samplename"), type="character", help="Name to use for sample"), | |
27 make_option(c("-bs1","--bs1"), type="integer", help="Barcode start in Read 1"), | |
28 make_option(c("-bl1","--bl1"), type="integer", help="Barcode length in Read 1"), | |
29 make_option(c("-bs2","--bs2"), type="integer", help="Barcode start in Read 2"), | |
30 make_option(c("-bl2","--bl2"), type="integer", help="Barcode length in Read 2"), | |
31 make_option(c("-us","--us"), type="integer", help="UMI start in Read 2"), | |
32 make_option(c("-ul","--ul"), type="integer", help="UMI length in Read 2"), | |
33 make_option(c("-rmlow","--rmlow"), type="logical", help="Remove reads with N in barcode or UMI"), | |
34 make_option(c("-rmN","--rmN"), type="logical", help="Remove reads with low quality"), | |
35 make_option(c("-minq","--minq"), type="integer", help="Minimum read quality"), | |
36 make_option(c("-numbq","--numbq"), type="integer", help="Maximum number of bases below minq"), | |
37 make_option(c("-stnd","--stnd"), type="logical", help="Perform strand-specific mapping"), | |
38 make_option(c("-max_mis","--max_mis"), type="integer", help="Maximum mismatch allowed in barcode"), | |
39 make_option(c("-UMI_cor","--UMI_cor"), type="integer", help="Correct UMI sequence error"), | |
40 make_option(c("-gene_fl","--gene_fl"), type="logical", help="Remove low abundant genes"), | |
41 make_option(c("-max_reads","--max_reads"), type="integer", help="Maximum reads processed"), | |
42 make_option(c("-min_count","--min_count"), type="integer", help="Minimum count to keep"), | |
43 make_option(c("-report","--report"), type="logical", help="HTML report of plots"), | |
44 make_option(c("-rdata","--rdata"), type="logical", help="Output RData file"), | |
45 make_option(c("-nthreads","--nthreads"), type="integer", help="Number of threads") | |
46 ) | |
47 | |
48 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) | |
49 args = parse_args(parser) | |
50 | |
51 fa_fn = args$fasta | |
52 anno_fn = args$exons | |
53 fq_R1 = args$read1 | |
54 fq_R2 = args$read2 | |
55 read_structure = list( | |
56 bs1 = args$bs1, # barcode start position in fq_R1, -1 indicates no barcode | |
57 bl1 = args$bl1, # barcode length in fq_R1, 0 since no barcode present | |
58 bs2 = args$bs2, # barcode start position in fq_R2 | |
59 bl2 = args$bl2, # barcode length in fq_R2 | |
60 us = args$us, # UMI start position in fq_R2 | |
61 ul = args$ul # UMI length | |
62 ) | |
63 | |
64 if (args$us == -1) { | |
65 has_umi = FALSE | |
66 } else { | |
67 has_umi = TRUE | |
68 } | |
69 | |
70 filter_settings=list(rmlow=args$rmlow, rmN=args$rmN, minq=args$minq, numbq=args$numbq) | |
71 | |
72 # Outputs | |
73 out_dir = "." | |
74 fasta_index = file.path(out_dir, paste0(fa_fn, ".fasta_index")) | |
75 combined_fastq = file.path(out_dir, "combined.fastq") | |
76 aligned_bam = file.path(out_dir, "aligned.bam") | |
77 mapped_bam = file.path(out_dir, "aligned.mapped.bam") | |
78 | |
79 | |
80 print("Trimming barcodes") | |
81 sc_trim_barcode(combined_fastq, | |
82 fq_R1, | |
83 fq_R2, | |
84 read_structure=read_structure, | |
85 filter_settings=filter_settings) | |
86 | |
87 print("Building genome index") | |
88 Rsubread::buildindex(basename=fasta_index, reference=fa_fn) | |
89 | |
90 print("Aligning reads to genome") | |
91 Rsubread::align(index=fasta_index, | |
92 readfile1=combined_fastq, | |
93 output_file=aligned_bam, | |
94 nthreads=args$nthreads) | |
95 | |
96 if (!is.null(args$barcodes)) { | |
97 barcode_anno=args$barcodes | |
98 } else { | |
99 print("Detecting barcodes") | |
100 # detect 10X barcodes and generate sample_index.csv file | |
101 barcode_anno = "sample_index.csv" | |
102 sc_detect_bc( | |
103 infq=combined_fastq, | |
104 outcsv=barcode_anno, | |
105 bc_len=read_structure$bl2, | |
106 max_reads=args$max_reads, | |
107 min_count=args$min_count, | |
108 max_mismatch=args$max_mis | |
109 ) | |
110 } | |
111 | |
112 print("Assigning reads to exons") | |
113 sc_exon_mapping(aligned_bam, mapped_bam, anno_fn, bc_len=read_structure$bl2, UMI_len=read_structure$ul, stnd=args$stnd) | |
114 | |
115 print("De-multiplexing data") | |
116 sc_demultiplex(mapped_bam, out_dir, barcode_anno, has_UMI=has_umi) | |
117 | |
118 print("Counting genes") | |
119 sc_gene_counting(out_dir, barcode_anno, UMI_cor=args$UMI_cor, gene_fl=args$gene_fl) | |
120 | |
121 print("Creating SingleCellExperiment object") | |
122 sce <- create_sce_by_dir(out_dir) | |
123 | |
124 if (!is.null(args$report)) { | |
125 print("Creating report") | |
126 create_report(sample_name=args$samplename, | |
127 outdir=out_dir, | |
128 r1=fq_R1, | |
129 r2=fq_R2, | |
130 outfq=combined_fastq, | |
131 read_structure=read_structure, | |
132 filter_settings=filter_settings, | |
133 align_bam=aligned_bam, | |
134 genome_index=fasta_index, | |
135 map_bam=mapped_bam, | |
136 exon_anno=anno_fn, | |
137 stnd=args$stnd, | |
138 fix_chr=FALSE, | |
139 barcode_anno=barcode_anno, | |
140 max_mis=args$max_mis, | |
141 UMI_cor=args$UMI_cor, | |
142 gene_fl=args$gene_fl) | |
143 } | |
144 | |
145 if (!is.null(args$rdata) ) { | |
146 save(sce, file = file.path(out_dir,"scPipe_analysis.RData")) | |
147 } | |
148 | |
149 sessionInfo() |