comparison scpipe.R @ 0:32e1bfc6b7b2 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scpipe commit 8908da9cdd112ae0943dbf1eccb221e84cd99ca7
author iuc
date Wed, 15 Aug 2018 13:54:40 -0400
parents
children 5c4bca9dd4a2
comparison
equal deleted inserted replaced
-1:000000000000 0:32e1bfc6b7b2
1 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
2
3 # we need that to not crash galaxy with an UTF8 error on German LC settings.
4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
5
6 suppressPackageStartupMessages({
7 library(scPipe)
8 library(SingleCellExperiment)
9 library(optparse)
10 library(readr)
11 library(ggplot2)
12 library(plotly)
13 library(DT)
14 library(scater)
15 library(scran)
16 library(scales)
17 library(Rtsne)
18 })
19
20 option_list <- list(
21 make_option(c("-fasta","--fasta"), type="character", help="Genome fasta file"),
22 make_option(c("-exons","--exons"), type="character", help="Exon annotation gff3 file"),
23 make_option(c("-barcodes","--barcodes"), type="character", help="Cell barcodes csv file"),
24 make_option(c("-read1","--read1"), type="character", help="Read 1 fastq.gz"),
25 make_option(c("-read2","--read2"), type="character", help="Read 2 fastq.gz"),
26 make_option(c("-samplename","--samplename"), type="character", help="Name to use for sample"),
27 make_option(c("-bs1","--bs1"), type="integer", help="Barcode start in Read 1"),
28 make_option(c("-bl1","--bl1"), type="integer", help="Barcode length in Read 1"),
29 make_option(c("-bs2","--bs2"), type="integer", help="Barcode start in Read 2"),
30 make_option(c("-bl2","--bl2"), type="integer", help="Barcode length in Read 2"),
31 make_option(c("-us","--us"), type="integer", help="UMI start in Read 2"),
32 make_option(c("-ul","--ul"), type="integer", help="UMI length in Read 2"),
33 make_option(c("-rmlow","--rmlow"), type="logical", help="Remove reads with N in barcode or UMI"),
34 make_option(c("-rmN","--rmN"), type="logical", help="Remove reads with low quality"),
35 make_option(c("-minq","--minq"), type="integer", help="Minimum read quality"),
36 make_option(c("-numbq","--numbq"), type="integer", help="Maximum number of bases below minq"),
37 make_option(c("-stnd","--stnd"), type="logical", help="Perform strand-specific mapping"),
38 make_option(c("-max_mis","--max_mis"), type="integer", help="Maximum mismatch allowed in barcode"),
39 make_option(c("-UMI_cor","--UMI_cor"), type="integer", help="Correct UMI sequence error"),
40 make_option(c("-gene_fl","--gene_fl"), type="logical", help="Remove low abundant genes"),
41 make_option(c("-max_reads","--max_reads"), type="integer", help="Maximum reads processed"),
42 make_option(c("-min_count","--min_count"), type="integer", help="Minimum count to keep"),
43 make_option(c("-report","--report"), type="logical", help="HTML report of plots"),
44 make_option(c("-rdata","--rdata"), type="logical", help="Output RData file"),
45 make_option(c("-nthreads","--nthreads"), type="integer", help="Number of threads")
46 )
47
48 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
49 args = parse_args(parser)
50
51 fa_fn = args$fasta
52 anno_fn = args$exons
53 fq_R1 = args$read1
54 fq_R2 = args$read2
55 read_structure = list(
56 bs1 = args$bs1, # barcode start position in fq_R1, -1 indicates no barcode
57 bl1 = args$bl1, # barcode length in fq_R1, 0 since no barcode present
58 bs2 = args$bs2, # barcode start position in fq_R2
59 bl2 = args$bl2, # barcode length in fq_R2
60 us = args$us, # UMI start position in fq_R2
61 ul = args$ul # UMI length
62 )
63
64 if (args$us == -1) {
65 has_umi = FALSE
66 } else {
67 has_umi = TRUE
68 }
69
70 filter_settings=list(rmlow=args$rmlow, rmN=args$rmN, minq=args$minq, numbq=args$numbq)
71
72 # Outputs
73 out_dir = "."
74 fasta_index = file.path(out_dir, paste0(fa_fn, ".fasta_index"))
75 combined_fastq = file.path(out_dir, "combined.fastq")
76 aligned_bam = file.path(out_dir, "aligned.bam")
77 mapped_bam = file.path(out_dir, "aligned.mapped.bam")
78
79
80 print("Trimming barcodes")
81 sc_trim_barcode(combined_fastq,
82 fq_R1,
83 fq_R2,
84 read_structure=read_structure,
85 filter_settings=filter_settings)
86
87 print("Building genome index")
88 Rsubread::buildindex(basename=fasta_index, reference=fa_fn)
89
90 print("Aligning reads to genome")
91 Rsubread::align(index=fasta_index,
92 readfile1=combined_fastq,
93 output_file=aligned_bam,
94 nthreads=args$nthreads)
95
96 if (!is.null(args$barcodes)) {
97 barcode_anno=args$barcodes
98 } else {
99 print("Detecting barcodes")
100 # detect 10X barcodes and generate sample_index.csv file
101 barcode_anno = "sample_index.csv"
102 sc_detect_bc(
103 infq=combined_fastq,
104 outcsv=barcode_anno,
105 bc_len=read_structure$bl2,
106 max_reads=args$max_reads,
107 min_count=args$min_count,
108 max_mismatch=args$max_mis
109 )
110 }
111
112 print("Assigning reads to exons")
113 sc_exon_mapping(aligned_bam, mapped_bam, anno_fn, bc_len=read_structure$bl2, UMI_len=read_structure$ul, stnd=args$stnd)
114
115 print("De-multiplexing data")
116 sc_demultiplex(mapped_bam, out_dir, barcode_anno, has_UMI=has_umi)
117
118 print("Counting genes")
119 sc_gene_counting(out_dir, barcode_anno, UMI_cor=args$UMI_cor, gene_fl=args$gene_fl)
120
121 print("Creating SingleCellExperiment object")
122 sce <- create_sce_by_dir(out_dir)
123
124 if (!is.null(args$report)) {
125 print("Creating report")
126 create_report(sample_name=args$samplename,
127 outdir=out_dir,
128 r1=fq_R1,
129 r2=fq_R2,
130 outfq=combined_fastq,
131 read_structure=read_structure,
132 filter_settings=filter_settings,
133 align_bam=aligned_bam,
134 genome_index=fasta_index,
135 map_bam=mapped_bam,
136 exon_anno=anno_fn,
137 stnd=args$stnd,
138 fix_chr=FALSE,
139 barcode_anno=barcode_anno,
140 max_mis=args$max_mis,
141 UMI_cor=args$UMI_cor,
142 gene_fl=args$gene_fl)
143 }
144
145 if (!is.null(args$rdata) ) {
146 save(sce, file = file.path(out_dir,"scPipe_analysis.RData"))
147 }
148
149 sessionInfo()