Mercurial > repos > iuc > scpipe
diff scpipe.R @ 2:5c4bca9dd4a2 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scpipe commit 60e2a9e9129a22924c55b11b218b39d913c7e686
author | iuc |
---|---|
date | Mon, 14 Jan 2019 08:06:47 -0500 |
parents | 32e1bfc6b7b2 |
children | 7397e6badc11 |
line wrap: on
line diff
--- a/scpipe.R Fri Aug 17 08:22:49 2018 -0400 +++ b/scpipe.R Mon Jan 14 08:06:47 2019 -0500 @@ -18,8 +18,10 @@ }) option_list <- list( + make_option(c("-bam","--bam"), type="character", help="BAM file"), make_option(c("-fasta","--fasta"), type="character", help="Genome fasta file"), make_option(c("-exons","--exons"), type="character", help="Exon annotation gff3 file"), + make_option(c("-organism","--organism"), type="character", help="Organism e.g. hsapiens_gene_ensembl"), make_option(c("-barcodes","--barcodes"), type="character", help="Cell barcodes csv file"), make_option(c("-read1","--read1"), type="character", help="Read 1 fastq.gz"), make_option(c("-read2","--read2"), type="character", help="Read 2 fastq.gz"), @@ -40,6 +42,8 @@ make_option(c("-gene_fl","--gene_fl"), type="logical", help="Remove low abundant genes"), make_option(c("-max_reads","--max_reads"), type="integer", help="Maximum reads processed"), make_option(c("-min_count","--min_count"), type="integer", help="Minimum count to keep"), + make_option(c("-metrics_matrix","--metrics_matrix"), type="logical", help="QC metrics matrix"), + make_option(c("-keep_outliers","--keep_outliers"), type="logical", help="Keep outlier cells"), make_option(c("-report","--report"), type="logical", help="HTML report of plots"), make_option(c("-rdata","--rdata"), type="logical", help="Output RData file"), make_option(c("-nthreads","--nthreads"), type="integer", help="Number of threads") @@ -48,6 +52,7 @@ parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) args = parse_args(parser) +bam = args$bam fa_fn = args$fasta anno_fn = args$exons fq_R1 = args$read1 @@ -71,42 +76,48 @@ # Outputs out_dir = "." -fasta_index = file.path(out_dir, paste0(fa_fn, ".fasta_index")) -combined_fastq = file.path(out_dir, "combined.fastq") -aligned_bam = file.path(out_dir, "aligned.bam") mapped_bam = file.path(out_dir, "aligned.mapped.bam") +# if input is fastqs +if (!is.null(fa_fn)) { + fasta_index = file.path(out_dir, paste0(fa_fn, ".fasta_index")) + combined_fastq = file.path(out_dir, "combined.fastq") + aligned_bam = file.path(out_dir, "aligned.bam") -print("Trimming barcodes") -sc_trim_barcode(combined_fastq, - fq_R1, - fq_R2, - read_structure=read_structure, - filter_settings=filter_settings) + print("Trimming barcodes") + sc_trim_barcode(combined_fastq, + fq_R1, + fq_R2, + read_structure=read_structure, + filter_settings=filter_settings) -print("Building genome index") -Rsubread::buildindex(basename=fasta_index, reference=fa_fn) + print("Building genome index") + Rsubread::buildindex(basename=fasta_index, reference=fa_fn) -print("Aligning reads to genome") -Rsubread::align(index=fasta_index, - readfile1=combined_fastq, - output_file=aligned_bam, - nthreads=args$nthreads) + print("Aligning reads to genome") + Rsubread::align(index=fasta_index, + readfile1=combined_fastq, + output_file=aligned_bam, + nthreads=args$nthreads) -if (!is.null(args$barcodes)) { - barcode_anno=args$barcodes + if (!is.null(args$barcodes)) { + barcode_anno=args$barcodes + } else { + print("Detecting barcodes") + # detect 10X barcodes and generate sample_index.csv file + barcode_anno = "sample_index.csv" + sc_detect_bc( + infq=combined_fastq, + outcsv=barcode_anno, + bc_len=read_structure$bl2, + max_reads=args$max_reads, + min_count=args$min_count, + max_mismatch=args$max_mis + ) + } } else { - print("Detecting barcodes") - # detect 10X barcodes and generate sample_index.csv file - barcode_anno = "sample_index.csv" - sc_detect_bc( - infq=combined_fastq, - outcsv=barcode_anno, - bc_len=read_structure$bl2, - max_reads=args$max_reads, - min_count=args$min_count, - max_mismatch=args$max_mis - ) + aligned_bam = file.path(out_dir, bam) + barcode_anno=args$barcodes } print("Assigning reads to exons") @@ -118,32 +129,59 @@ print("Counting genes") sc_gene_counting(out_dir, barcode_anno, UMI_cor=args$UMI_cor, gene_fl=args$gene_fl) -print("Creating SingleCellExperiment object") +print("Performing QC") sce <- create_sce_by_dir(out_dir) +pdf("plots.pdf") +plot_demultiplex(sce) +if (has_umi) { + p = plot_UMI_dup(sce) + print(p) +} +sce = calculate_QC_metrics(sce) +sce = detect_outlier(sce) +p = plot_mapping(sce, percentage=TRUE, dataname=args$samplename) +print(p) +p = plot_QC_pairs(sce) +print(p) +dev.off() + +print("Removing outliers") +if (is.null(args$keep_outliers)) { + sce = remove_outliers(sce) + gene_counts <- counts(sce) + write.table(data.frame("gene_id"=rownames(gene_counts), gene_counts), file="gene_count.tsv", sep="\t", quote=FALSE, row.names=FALSE) +} -if (!is.null(args$report)) { -print("Creating report") -create_report(sample_name=args$samplename, - outdir=out_dir, - r1=fq_R1, - r2=fq_R2, - outfq=combined_fastq, - read_structure=read_structure, - filter_settings=filter_settings, - align_bam=aligned_bam, - genome_index=fasta_index, - map_bam=mapped_bam, - exon_anno=anno_fn, - stnd=args$stnd, - fix_chr=FALSE, - barcode_anno=barcode_anno, - max_mis=args$max_mis, - UMI_cor=args$UMI_cor, - gene_fl=args$gene_fl) +if (!is.null(args$metrics_matrix)) { + metrics <- colData(sce, internal=TRUE) + write.table(data.frame("cell_id"=rownames(metrics), metrics), file="metrics_matrix.tsv", sep="\t", quote=FALSE, row.names=FALSE) +} + +if (!is.null(args$report) & (!is.null(fa_fn))) { + print("Creating report") + create_report(sample_name=args$samplename, + outdir=out_dir, + r1=fq_R1, + r2=fq_R2, + outfq=combined_fastq, + read_structure=read_structure, + filter_settings=filter_settings, + align_bam=aligned_bam, + genome_index=fasta_index, + map_bam=mapped_bam, + exon_anno=anno_fn, + stnd=args$stnd, + fix_chr=FALSE, + barcode_anno=barcode_anno, + max_mis=args$max_mis, + UMI_cor=args$UMI_cor, + gene_fl=args$gene_fl, + organism=args$organism, + gene_id_type="ensembl_gene_id") } if (!is.null(args$rdata) ) { - save(sce, file = file.path(out_dir,"scPipe_analysis.RData")) + save(sce, file = file.path(out_dir,"scPipe_analysis.RData")) } sessionInfo()