comparison scpipe.xml @ 2:5c4bca9dd4a2 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scpipe commit 60e2a9e9129a22924c55b11b218b39d913c7e686
author iuc
date Mon, 14 Jan 2019 08:06:47 -0500
parents 4ec6717872b1
children 3ffca09599ca
comparison
equal deleted inserted replaced
1:4ec6717872b1 2:5c4bca9dd4a2
1 <tool id="scpipe" name="scPipe" version="1.0.0"> 1 <tool id="scpipe" name="scPipe" version="1.0.0+galaxy1">
2 <description>- preprocessing pipeline for single cell RNA-seq</description> 2 <description>- preprocessing pipeline for single cell RNA-seq</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="1.0.0">bioconductor-scpipe</requirement> 4 <requirement type="package" version="1.0.0">bioconductor-scpipe</requirement>
5 <requirement type="package" version="1.28.1">bioconductor-rsubread</requirement> 5 <requirement type="package" version="1.28.1">bioconductor-rsubread</requirement>
6 <!-- rhtslib can be removed with a newer scpipe package -->
7 <requirement type="package" version="1.10.0">bioconductor-rhtslib</requirement>
6 <requirement type="package" version="1.20">r-knitr</requirement> 8 <requirement type="package" version="1.20">r-knitr</requirement>
7 <requirement type="package" version="1.10">r-rmarkdown</requirement> 9 <requirement type="package" version="1.10">r-rmarkdown</requirement>
8 <requirement type="package" version="1.1.1">r-readr</requirement> 10 <requirement type="package" version="1.1.1">r-readr</requirement>
9 <requirement type="package" version="4.7.1">r-plotly</requirement> 11 <requirement type="package" version="4.7.1">r-plotly</requirement>
10 <requirement type="package" version="0.4">r-dt</requirement> 12 <requirement type="package" version="0.4">r-dt</requirement>
11 <requirement type="package" version="1.6.0">bioconductor-scater</requirement> 13 <requirement type="package" version="1.6.0">bioconductor-scater</requirement>
12 <requirement type="package" version="1.6.2">bioconductor-scran</requirement> 14 <requirement type="package" version="1.6.2">bioconductor-scran</requirement>
13 <requirement type="package" version="0.13">r-rtsne</requirement> 15 <requirement type="package" version="0.13">r-rtsne</requirement>
14 <!-- Using older version of ggplot2 as getting error like this with 3.0.0: 16 <!-- Using older version of ggplot2 as getting error like this with 3.0.0:
15 https://github.com/ggobi/ggally/issues/263 --> 17 https://github.com/ggobi/ggally/issues/263 -->
16 <requirement type="package" version="2.2.1">r-ggplot2</requirement> 18 <requirement type="package" version="2.2.1">r-ggplot2</requirement>
17 <requirement type="package" version="1.6.0">r-optparse</requirement> 19 <requirement type="package" version="1.6.0">r-optparse</requirement>
18 </requirements> 20 </requirements>
19 <version_command><![CDATA[ 21 <version_command><![CDATA[
20 echo $(R --version | grep version | grep -v GNU)", scPipe version" $(R --vanilla --slave -e "library(scPipe); cat(sessionInfo()\$otherPkgs\$scPipe\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", Rsubread version" $(R --vanilla --slave -e "library(Rsubread); cat(sessionInfo()\$otherPkgs\$Rsubread\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", knitr version" $(R --vanilla --slave -e "library(knitr); cat(sessionInfo()\$otherPkgs\$knitr\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rmarkdown version" $(R --vanilla --slave -e "library(rmarkdown); cat(sessionInfo()\$otherPkgs\$rmarkdown\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", readr version" $(R --vanilla --slave -e "library(readr); cat(sessionInfo()\$otherPkgs\$readr\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", plotly version" $(R --vanilla --slave -e "library(plotly); cat(sessionInfo()\$otherPkgs\$plotly\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", DT version" $(R --vanilla --slave -e "library(DT); cat(sessionInfo()\$otherPkgs\$DT\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", scater version" $(R --vanilla --slave -e "library(scater); cat(sessionInfo()\$otherPkgs\$scater\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", scran version" $(R --vanilla --slave -e "library(scran); cat(sessionInfo()\$otherPkgs\$scran\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rtsne version" $(R --vanilla --slave -e "library(Rtsne); cat(sessionInfo()\$otherPkgs\$Rtsne\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", ggplot2 version" $(R --vanilla --slave -e "library(ggplot2); cat(sessionInfo()\$otherPkgs\$ggplot2\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ") 22 echo $(R --version | grep version | grep -v GNU)", scPipe version" $(R --vanilla --slave -e "library(scPipe); cat(sessionInfo()\$otherPkgs\$scPipe\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", Rsubread version" $(R --vanilla --slave -e "library(Rsubread); cat(sessionInfo()\$otherPkgs\$Rsubread\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", knitr version" $(R --vanilla --slave -e "library(knitr); cat(sessionInfo()\$otherPkgs\$knitr\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rmarkdown version" $(R --vanilla --slave -e "library(rmarkdown); cat(sessionInfo()\$otherPkgs\$rmarkdown\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", readr version" $(R --vanilla --slave -e "library(readr); cat(sessionInfo()\$otherPkgs\$readr\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", plotly version" $(R --vanilla --slave -e "library(plotly); cat(sessionInfo()\$otherPkgs\$plotly\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", DT version" $(R --vanilla --slave -e "library(DT); cat(sessionInfo()\$otherPkgs\$DT\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", scater version" $(R --vanilla --slave -e "library(scater); cat(sessionInfo()\$otherPkgs\$scater\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", scran version" $(R --vanilla --slave -e "library(scran); cat(sessionInfo()\$otherPkgs\$scran\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rtsne version" $(R --vanilla --slave -e "library(Rtsne); cat(sessionInfo()\$otherPkgs\$Rtsne\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", ggplot2 version" $(R --vanilla --slave -e "library(ggplot2); cat(sessionInfo()\$otherPkgs\$ggplot2\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
21 ]]></version_command> 23 ]]></version_command>
22 <command detect_errors="exit_code"><![CDATA[ 24 <command detect_errors="exit_code"><![CDATA[
23 #import re 25 #import re
24 26
25 ## Link input files 27 ## Link input files
26 28
27 #if $ref_fasta.fasta_source == "history": 29 #if $samples.format_select == "bam":
28 #set $fasta_name = re.sub('[^\w\-\s]', '_', str($ref_fasta.ref_fa_hist.element_identifier)) 30 #set $bam_name = re.sub('[^\w\-\s]', '_', str($samples.bam.element_identifier))
29 ln -s '$ref_fasta.ref_fa_hist' '$fasta_name' && 31 ln -s '$samples.bam' '$bam_name' &&
32 ln -s '$samples.bam.metadata.bam_index' '${bam_name}.bai' &&
30 #else: 33 #else:
31 #set $fasta_name = os.path.basename(str($ref_fasta.ref_fa_builtin.fields.path)) 34
32 ln -s '$ref_fasta.ref_fa_builtin.fields.path' '$fasta_name' && 35 ## FASTA ##
33 #end if 36
37 #if $samples.ref_fasta.fasta_source == "history":
38 #set $fasta_name = re.sub('[^\w\-\s]', '_', str($samples.ref_fasta.ref_fa_hist.element_identifier))
39 ln -s '$samples.ref_fasta.ref_fa_hist' '$fasta_name' &&
40 #else:
41 #set $fasta_name = os.path.basename(str($samples.ref_fasta.ref_fa_builtin.fields.path))
42 ln -s '$samples.ref_fasta.ref_fa_builtin.fields.path' '$fasta_name' &&
43 #end if
44
45 ## Reads ##
46
47 #if $samples.paired_format.paired_format_selector == 'paired_collection':
48 #set $in1 = $samples.paired_format.paired_input.forward
49 #set $in2 = $samples.paired_format.paired_input.reverse
50 #set $in1_name = re.sub('[^\w\-\s]', '_', str($samples.paired_format.paired_input.name))
51 #set $in2_name = re.sub('[^\w\-\s]', '_', str("%s_%s" % ($samples.paired_format.paired_input.name, "R2")))
52 ln -s '$in1' '$in1_name' &&
53 ln -s '$in2' '$in2_name' &&
54 #elif $samples.paired_format.paired_format_selector == 'paired':
55 #set $in1 = $samples.paired_format.in1
56 #set $in2 = $samples.paired_format.in2
57 #set $in1_name = re.sub('[^\w\-\s]', '_', str($samples.paired_format.in1.element_identifier))
58 #set $in2_name = re.sub('[^\w\-\s]', '_', str($samples.paired_format.in2.element_identifier))
59 ln -s '$in1' '$in1_name' &&
60 ln -s '$in2' '$in2_name' &&
61 #end if
62 #end if
63
64 ## GFF3 ##
34 65
35 #set $anno_name = re.sub('[^\w\-\s]', '_', str($exons.element_identifier)) 66 #set $anno_name = re.sub('[^\w\-\s]', '_', str($exons.element_identifier))
36 #set $anno_name = $anno_name + ".gff3" 67 #set $anno_name = $anno_name + ".gff3"
37 ln -s '${exons}' '$anno_name' && 68 ln -s '${exons}' '$anno_name' &&
38 69
39 #if $paired_format.paired_format_selector == 'paired_collection': 70 #if $out.rscript:
40 #set $in1 = $paired_format.paired_input.forward 71 cp '$__tool_directory__/scpipe.R' '$out_rscript' &&
41 #set $in2 = $paired_format.paired_input.reverse 72 #end if
42 #set $in1_name = re.sub('[^\w\-\s]', '_', str($paired_format.paired_input.name)) 73
43 #set $in2_name = re.sub('[^\w\-\s]', '_', str("%s_%s" % ($paired_format.paired_input.name, "R2"))) 74 TAB=\$(printf '\t') &&
44 #set out1 = $output_paired_coll.forward 75
45 #set out2 = $output_paired_coll.reverse 76 #if $samples.barcodes:
46 ln -s '$in1' '$in1_name' && 77 sed -i.bak -e "s/\${TAB}/,/g" '$samples.barcodes' &&
47 ln -s '$in2' '$in2_name' && 78 #end if
48 #else 79
49 #set $in1_name = re.sub('[^\w\-\s]', '_', str($in1.element_identifier)) 80 ## Run scPipe
50 ln -s '$in1' '$in1_name' && 81
51 82 Rscript '$__tool_directory__/scpipe.R'
52 #if str($paired_format.paired_format_selector) == 'paired': 83
53 #set $in2_name = re.sub('[^\w\-\s]', '_', str($in2.element_identifier)) 84 #if $samples.format_select == "bam":
54 ln -s '$in2' '$in2_name' && 85 --bam '$bam_name'
86 --samplename '$bam_name'
87 --barcodes '$samples.barcodes'
88 #else:
89 --fasta '$fasta_name'
90 --read1 '$in1_name'
91 --read2 '$in2_name'
92 --samplename '$in1_name'
93 #if $barcodes:
94 --barcodes '$samples.barcodes'
55 #end if 95 #end if
56 #end if 96 #end if
57 97
58 #if $rscript:
59 cp '$__tool_directory__/scpipe.R' '$out_rscript' &&
60 #end if
61
62 TAB=\$(printf '\t') &&
63
64 #if $barcodes:
65 sed -i.bak -e "s/\${TAB}/,/g" '$barcodes' &&
66 #end if
67
68 ## Run scPipe
69
70 Rscript '$__tool_directory__/scpipe.R'
71
72 --fasta '$fasta_name'
73 --exons '$anno_name' 98 --exons '$anno_name'
74 --samplename '$in1_name' 99 --organism '$organism'
75 --read1 '$in1_name' 100
76 --read2 '$in2_name'
77 --bs1 $bs1 101 --bs1 $bs1
78 --bl1 $bl1 102 --bl1 $bl1
79 --bs2 $bs2 103 --bs2 $bs2
80 --bl2 $bl2 104 --bl2 $bl2
81 --us $us 105 --us $us
82 --ul $ul 106 --ul $ul
83 107
84 #if $barcodes: 108 #if $out.metrics_matrix:
85 --barcodes '$barcodes' 109 --metrics_matrix '$out.metrics_matrix'
86 #end if 110 #end if
87 111
88 #if $report: 112 #if $out.report:
89 --report '$report' 113 --report '$out.report'
90 #end if 114 #end if
91 115
92 #if $rdata: 116 #if $out.rdata:
93 --rdata '$rdata' 117 --rdata '$out.rdata'
94 #end if 118 #end if
95 119
96 --rmlow $adv.rmlow 120 --rmlow $adv.f.rmlow
97 --rmN $adv.rmN 121 --rmN $adv.f.rmN
98 --minq $adv.minq 122 --minq $adv.f.minq
99 --numbq $adv.numbq 123 --numbq $adv.f.numbq
124 --max_mis $adv.f.max_mis
125 --max_reads $adv.f.max_reads
126 --min_count $adv.f.min_count
127
128 --UMI_cor $adv.UMI_cor
100 --stnd $adv.stnd 129 --stnd $adv.stnd
101 --max_mis $adv.max_mis
102 --UMI_cor $adv.UMI_cor
103 --gene_fl $adv.gene_fl 130 --gene_fl $adv.gene_fl
104 --max_reads $adv.max_reads 131
105 --min_count $adv.min_count
106 --nthreads \${GALAXY_SLOTS:-2} 132 --nthreads \${GALAXY_SLOTS:-2}
107 133
108 && 134 #if $keep_outliers:
109 sed -e "s/,/\${TAB}/g" gene_count.csv > gene_count.tsv 135 --keep_outliers '$keep_outliers'
136 && sed -e "s/,/\${TAB}/g" gene_count.csv > gene_count.tsv
137 #end if
110 138
111 ]]></command> 139 ]]></command>
112 140
113 <inputs> 141 <inputs>
114 <conditional name="ref_fasta"> 142 <conditional name="samples">
115 <param name="fasta_source" type="select" label="Reference genome FASTA"> 143 <param name="format_select" type="select" label="FASTQs or BAM" help="Select the format of the input sample">
116 <option value="cached" selected="true">Use a built-in genome</option> 144 <option value="fastq" selected="True">FASTQ</option>
117 <option value="history">Use a FASTA from history</option> 145 <option value="bam">BAM</option>
118 </param> 146 </param>
119 <when value="cached"> 147 <when value="bam">
120 <param name="ref_fa_builtin" type="select" label="Select a built-in FASTA" help="If your genome of interest is not listed, contact your Galaxy administrator"> 148 <param name="bam" type="data" format="bam" label="BAM files"/>
121 <options from_data_table="all_fasta"> 149 <param name="barcodes" type="data" format="tabular,tsv" label="Cell barcodes file" help="File of cell barcodes. Should contain at least two columns, where the first column has the cell id and the second column contains the barcode sequence."/>
122 <filter type="sort_by" column="2" />
123 <validator type="no_options" message="No FASTA is available for the selected input dataset" />
124 </options>
125 </param>
126 </when> 150 </when>
127 <when value="history"> 151 <when value="fastq">
128 <param name="ref_fa_hist" type="data" format="fasta" label="Select a history FASTA" /> 152 <conditional name="ref_fasta">
153 <param name="fasta_source" type="select" label="Reference genome FASTA">
154 <option value="cached" selected="true">Use a built-in genome</option>
155 <option value="history">Use a FASTA from history</option>
156 </param>
157 <when value="cached">
158 <param name="ref_fa_builtin" type="select" label="Select a built-in FASTA" help="If your genome of interest is not listed, contact your Galaxy administrator">
159 <options from_data_table="all_fasta">
160 <filter type="sort_by" column="2" />
161 <validator type="no_options" message="No FASTA is available for the selected input dataset" />
162 </options>
163 </param>
164 </when>
165 <when value="history">
166 <param name="ref_fa_hist" type="data" format="fasta" label="Select a history FASTA" />
167 </when>
168 </conditional>
169 <conditional name="paired_format">
170 <param name="paired_format_selector" type="select" label="Paired reads or Paired collection">
171 <option value="paired">Paired</option>
172 <option value="paired_collection">Paired Collection</option>
173 </param>
174 <when value="paired">
175 <param name="in1" type="data" format="fastq.gz,fastq" label="Input Read 1" help="Read 1 should contain the transcripts in fastq.gz format"/>
176 <param name="in2" type="data" format="fastq.gz,fastq" label="Input Read 2" help="Read 2 should contain UMI and barcodes in fastq.gz format"/>
177 </when>
178 <when value="paired_collection">
179 <param name="paired_input" type="data_collection" collection_type="paired" format="fastq.gz,fastq" label="Select paired collection(s)"/>
180 </when>
181 </conditional>
182 <param name="barcodes" type="data" format="tabular,tsv" optional="True" label="Cell barcodes file" help="Optional file of cell barcodes. If not provied the barcodes will be detected from the reads. Should contain at least two columns, where the first column has the cell id and the second column contains the barcode sequence."/>
129 </when> 183 </when>
130 </conditional> 184 </conditional>
131 <param name="exons" type="data" format="gff3" label="Exon annotation GFF3 file" help="Current supported sources: ENSEMBL, GENCODE and RefSeq"/> 185 <param name="exons" type="data" format="gff3" label="Exon annotation GFF3 file" help="Current supported source is ENSEMBL"/>
132 186 <param name="organism" type="text" label="Species gene id" help="This must be in biomaRt ENSEMBL listDatasets() format e.g. hsapiens_gene_ensembl. See the biomaRt user guide here: https://www.bioconductor.org/packages/release/bioc/vignettes/biomaRt/inst/doc/biomaRt.html">
133 <conditional name="paired_format"> 187 <validator type="empty_field" />
134 <param name="paired_format_selector" type="select" label="Paired reads or Paired collection"> 188 <validator type="regex" message="Only letters and underscores are allowed">^[\(\w\)]+$</validator>
135 <option value="paired">Paired</option> 189 </param>
136 <option value="paired_collection">Paired Collection</option>
137 </param>
138 <when value="paired">
139 <param name="in1" type="data" format="fastq.gz,fastq" label="Input Read 1" help="Read 1 should contain the transcripts in fastq.gz format"/>
140 <param name="in2" type="data" format="fastq.gz,fastq" label="Input Read 2" help="Read 2 should contain UMI and barcodes in fastq.gz format"/>
141 </when>
142 <when value="paired_collection">
143 <param name="paired_input" type="data_collection" collection_type="paired" format="fastq.gz,fastq" label="Select paired collection(s)"/>
144 </when>
145 </conditional>
146 <param name="barcodes" type="data" format="tabular,tsv" optional="True" label="Cell barcodes file" help="Optional file of cell barcodes. Should contain at least two columns, where the first column has the cell id and the second column contains the barcode sequence."/>
147 <param argument="--bs1" type="integer" min="-1" value="-1" label="Barcode start Read 1" help="Barcode start position in Read 1. Positions are 0-indexed so the first base is considered base 0, -1 indicates no barcode. Default: -1" /> 190 <param argument="--bs1" type="integer" min="-1" value="-1" label="Barcode start Read 1" help="Barcode start position in Read 1. Positions are 0-indexed so the first base is considered base 0, -1 indicates no barcode. Default: -1" />
148 <param argument="--bl1" type="integer" min="0" value="0" label="Barcode length Read 1" help="Barcode length in Read 1, 0 if no barcode present. Default: 0" /> 191 <param argument="--bl1" type="integer" min="0" value="0" label="Barcode length Read 1" help="Barcode length in Read 1, 0 if no barcode present. Default: 0" />
149 <param argument="--bs2" type="integer" min="-1" value="6" label="Barcode start Read 2" help="Barcode start position in Read 2. Positions are 0-indexed so the first base is considered base 0, -1 indicates no barcode. Default: 6" /> 192 <param argument="--bs2" type="integer" min="-1" value="6" label="Barcode start Read 2" help="Barcode start position in Read 2. Positions are 0-indexed so the first base is considered base 0, -1 indicates no barcode. Default: 6" />
150 <param argument="--bl2" type="integer" min="0" value="8" label="Barcode length Read 2" help="Barcode length in Read 2, 0 if no barcode present. Default: 8" /> 193 <param argument="--bl2" type="integer" min="0" value="8" label="Barcode length Read 2" help="Barcode length in Read 2, 0 if no barcode present. Default: 8" />
151 <param argument="--us" type="integer" min="-1" value="0" label="UMI start Read 2" help="UMI start position in Read 2. Positions are 0-indexed so the first base is considered base 0, -1 indicates no UMI. Default: 0" /> 194 <param argument="--us" type="integer" min="-1" value="0" label="UMI start Read 2" help="UMI start position in Read 2. Positions are 0-indexed so the first base is considered base 0, -1 indicates no UMI. Default: 0" />
152 <param argument="--ul" type="integer" min="0" value="6" label="UMI length Read 2" help="UMI length in Read 2, 0 if no UMI present. Default: 6" /> 195 <param argument="--ul" type="integer" min="0" value="6" label="UMI length Read 2" help="UMI length in Read 2, 0 if no UMI present. Default: 6" />
153 <param name="report" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Output HTML Report?" help="If this option is set to Yes, a HTML report containing QC metrics will be output. Default: Yes" /> 196 <param name="keep_outliers" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Keep outliers?" help="If this option is set to Yes, outlier cells will not be removed from the gene count matrix. Default: No" />
154 <param name="rscript" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Output Rscript?" help="If this option is set to Yes, the Rscript used to annotate the IDs will be provided as a text file in the output. Default: No" /> 197 <section name="out" title="Output Options">
155 <param name="rdata" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output RData file?" 198 <param name="plots" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Output PDF with plots?" help="If this option is set to Yes, a PDF containing QC plots will be output. Default: Yes" />
156 help="Output all the data used by R to construct the tables and plots, can be loaded into R. Default: No"> 199 <param name="metrics_matrix" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Output QC metrics matrix?" help="If this option is set a matrix of QC metrics will be output. Default: No" />
157 </param> 200 <param name="report" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Output HTML Report?" help="Only valid if FASTQs are input. If this option is set to Yes, a HTML report containing QC metrics will be output. Default: No" />
201 <param name="rscript" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Output Rscript?" help="If this option is set to Yes, the Rscript used to annotate the IDs will be provided as a text file in the output. Default: No" />
202 <param name="rdata" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output RData file?"
203 help="Output all the data used by R to construct the tables and plots, can be loaded into R. Default: No">
204 </param>
205 </section>
158 <section name="adv" title="Advanced Options"> 206 <section name="adv" title="Advanced Options">
159 <param argument="--rmlow" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Remove reads with N in barcode or UMI" help="Default: Yes" /> 207 <section name="f" title="FASTQ input only">
160 <param argument="--rmN" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Remove reads with low quality" help="Default: Yes" /> 208 <param argument="--rmlow" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Remove reads with N in barcode or UMI" help="Default: Yes" />
161 <param argument="--minq" type="integer" min="0" value="20" label="Minimum read quality" help="Default: 20" /> 209 <param argument="--rmN" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Remove reads with low quality" help="Default: Yes" />
162 <param argument="--numbq" type="integer" min="0" value="2" label="Maximum number of bases below minq" help="Default: 2" /> 210 <param argument="--minq" type="integer" min="0" value="20" label="Minimum read quality" help="Default: 20" />
163 <param argument="--stnd" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Perform strand-specific mapping" help="Default: Yes" /> 211 <param argument="--numbq" type="integer" min="0" value="2" label="Maximum number of bases below minq" help="Default: 2" />
164 <param argument="--max_mis" type="integer" min="0" value="1" label="Maximum mismatch allowed in barcode" help="Default: 1" /> 212 <param argument="--max_mis" type="integer" min="0" value="1" label="Maximum mismatch allowed in barcode" help="Default: 1" />
165 <param argument="--UMI_cor" type="integer" min="0" value="1" label="Correct UMI sequence error" help="0 means no correction, 1 means simple correction and merge UMI with distance 1. Default: 1" /> 213
166 <param argument="--gene_fl" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Remove low abundant genes" help="Low abundant is defined as only one copy of one UMI for this gene. Default: No" /> 214 <param argument="--max_reads" type="integer" min="0" value="1000000" label="Maximum reads processed" help="Maximum reads processed if detecting barcodes. Default: 1,000,000" />
167 <param argument="--max_reads" type="integer" min="0" value="1000000" label="Maximum reads processed" help="Maximum reads processed if detecting barcodes. Default: 1,000,000" /> 215 <param argument="--min_count" type="integer" min="0" value="10" label="Minimum count to keep" help="Minimum count to keep if detecting barcodes. Barcode will be discarded if it has lower count. This should be set according to --max_reads. Default: 10" />
168 <param argument="--min_count" type="integer" min="0" value="10" label="Minimum count to keep" help="Minimum count to keep if detecting barcodes. Barcode will be discarded if it has lower count. This should be set according to --max_reads. Default: 10" /> 216 </section>
217 <param argument="--UMI_cor" type="integer" min="0" value="1" label="Correct UMI sequence error" help="0 means no correction, 1 means simple correction and merge UMI with distance 1. Default: 1" />
218 <param argument="--stnd" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Perform strand-specific mapping" help="Default: Yes" />
219 <param argument="--gene_fl" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Remove low abundant genes" help="Low abundant is defined as only one copy of one UMI for this gene. Default: No" />
169 </section> 220 </section>
170 </inputs> 221 </inputs>
171 222
172 <outputs> 223 <outputs>
173 <data name="out_matrix" format="tabular" from_work_dir="gene_count.tsv" label="${tool.name} on ${on_string}: Count Matrix" /> 224 <data name="out_matrix" format="tabular" from_work_dir="gene_count.tsv" label="${tool.name} on ${on_string}: Count Matrix" />
225 <data name="out_plots" format="pdf" from_work_dir="plots.pdf" label="${tool.name} on ${on_string}: Plots">
226 <filter>plots</filter>
227 </data>
228 <data name="out_metrics_matrix" format="tabular" from_work_dir="metrics_matrix.tsv" label="${tool.name} on ${on_string}: QC metrics matrix">
229 <filter>metrics_matrix</filter>
230 </data>
174 <data name="out_report" format="html" from_work_dir="report.nb.html" label="${tool.name} on ${on_string}: HTML Report" > 231 <data name="out_report" format="html" from_work_dir="report.nb.html" label="${tool.name} on ${on_string}: HTML Report" >
175 <filter>report</filter> 232 <filter>report</filter>
176 </data> 233 </data>
177 <data name="out_rscript" format="txt" from_work_dir="out_rscript.txt" label="${tool.name} on ${on_string}: Rscript"> 234 <data name="out_rscript" format="txt" from_work_dir="out_rscript.txt" label="${tool.name} on ${on_string}: Rscript">
178 <filter>rscript</filter> 235 <filter>rscript</filter>
183 </outputs> 240 </outputs>
184 241
185 <tests> 242 <tests>
186 <!-- Ensure outputs work --> 243 <!-- Ensure outputs work -->
187 <test> 244 <test>
245 <param name="format_select" value="fastq" />
188 <param name="fasta_source" value="history"/> 246 <param name="fasta_source" value="history"/>
189 <param name="ref_fa_hist" ftype="fasta" value="mm10_MT19.fa.gz"/> 247 <param name="ref_fa_hist" ftype="fasta" value="mm10_MT19.fa.gz"/>
190 <param name="exons" ftype="gff3" value="mm10_MT19.gff3.gz"/> 248 <param name="exons" ftype="gff3" value="mm10_MT19.gff3.gz"/>
249 <param name="organism" value="mmusculus_gene_ensembl"/>
191 <param name="paired_format_selector" value="paired" /> 250 <param name="paired_format_selector" value="paired" />
192 <param name="in1" ftype="fastqsanger.gz" value="CB51_MT19_R1.gz"/> 251 <param name="in1" ftype="fastqsanger.gz" value="CB51_MT19_R1.gz"/>
193 <param name="in2" ftype="fastqsanger.gz" value="CB51_MT19_R2.gz"/> 252 <param name="in2" ftype="fastqsanger.gz" value="CB51_MT19_R2.gz"/>
194 <param name="us" value="-1"/> 253 <param name="us" value="-1"/>
195 <param name="max_reads" value="5000000"/> 254 <param name="max_reads" value="5000000"/>
206 </assert_contents> 265 </assert_contents>
207 </output> 266 </output>
208 </test> 267 </test>
209 <!-- Ensure built-in fasta works --> 268 <!-- Ensure built-in fasta works -->
210 <test> 269 <test>
270 <param name="format_select" value="fastq" />
211 <param name="fasta_source" value="cached"/> 271 <param name="fasta_source" value="cached"/>
212 <param name="exons" ftype="gff3" value="mm10_MT19.gff3.gz"/> 272 <param name="exons" ftype="gff3" value="mm10_MT19.gff3.gz"/>
273 <param name="organism" value="mmusculus_gene_ensembl"/>
213 <param name="paired_format_selector" value="paired" /> 274 <param name="paired_format_selector" value="paired" />
214 <param name="in1" ftype="fastqsanger.gz" dbkey="mm10" value="CB51_MT19_R1.gz"/> 275 <param name="in1" ftype="fastqsanger.gz" dbkey="mm10" value="CB51_MT19_R1.gz"/>
215 <param name="in2" ftype="fastqsanger.gz" dbkey="mm10" value="CB51_MT19_R2.gz"/> 276 <param name="in2" ftype="fastqsanger.gz" dbkey="mm10" value="CB51_MT19_R2.gz"/>
216 <param name="us" value="-1"/> 277 <param name="us" value="-1"/>
217 <param name="max_reads" value="5000000"/> 278 <param name="max_reads" value="5000000"/>
218 <param name="min_count" value="100"/> 279 <param name="min_count" value="100"/>
219 <param name="report" value="False" />
220 <output name="out_matrix" > 280 <output name="out_matrix" >
221 <assert_contents> 281 <assert_contents>
222 <has_text text="ENSMUSG00000064351" /> 282 <has_text text="ENSMUSG00000064351" />
223 </assert_contents> 283 </assert_contents>
224 </output> 284 </output>
225 </test> 285 </test>
286 <!-- Ensure BAM input works -->
287 <test>
288 <param name="format_select" value="bam" />
289 <param name="bam" ftype="bam" value="aligned.mapped.bam"/>
290 <param name="barcodes" ftype="tabular" value="barcode_anno.tsv"/>
291 <param name="exons" ftype="gff3" value="mm10_MT19.gff3.gz"/>
292 <param name="organism" value="mmusculus_gene_ensembl"/>
293 <param name="us" value="-1"/>
294 <output name="out_matrix" >
295 <assert_contents>
296 <has_text text="ENSMUSG00000064351" />
297 </assert_contents>
298 </output>
299 </test>
300 <!-- Ensure BAM input with QC outputs works -->
301 <test>
302 <param name="format_select" value="bam" />
303 <param name="bam" ftype="bam" value="aligned.mapped.bam"/>
304 <param name="barcodes" ftype="tabular" value="barcode_anno.tsv"/>
305 <param name="exons" ftype="gff3" value="mm10_MT19.gff3.gz"/>
306 <param name="organism" value="mmusculus_gene_ensembl"/>
307 <param name="us" value="-1"/>
308 <param name="plots" value="True"/>
309 <param name="metrics_matrix" value="True"/>
310 <output name="out_matrix" >
311 <assert_contents>
312 <has_text text="ENSMUSG00000064351" />
313 </assert_contents>
314 </output>
315 <output name="out_metrics_matrix" >
316 <assert_contents>
317 <has_line_matching expression="cell_id.*unaligned.*aligned_unmapped.*mapped_to_exon.*mapped_to_intron.*ambiguous_mapping.*mapped_to_ERCC.*mapped_to_MT.*number_of_genes.*total_count_per_cell.*non_mt_percent\toutliers" />
318 </assert_contents>
319 </output>
320 <output name="out_plots" ftype="pdf" value="plots.pdf" compare="sim_size" />
321 </test>
322
226 </tests> 323 </tests>
227 <help><![CDATA[ 324 <help><![CDATA[
228 .. class:: infomark 325 .. class:: infomark
229 326
230 **What it does** 327 **What it does**
231 328
232 scPipe_ is an `R/Bioconductor package`_ that integrates barcode demultiplexing, read alignment, UMI-aware gene-level quantification and quality control of raw sequencing data generated by multiple protocols that include CEL-seq, MARS-seq, Chromium 10X, Drop-seq and Smart-seq. scPipe produces a count matrix that is essential for downstream analysis along with an HTML report that summarises data quality. These results can be used as input for downstream analyses including normalization, visualization and statistical testing. 329 scPipe_ is an `R/Bioconductor package`_ that integrates barcode demultiplexing, read alignment, UMI-aware gene-level quantification and quality control of raw sequencing data generated by multiple protocols that include CEL-seq, MARS-seq, Chromium 10X, Drop-seq and Smart-seq. scPipe produces a count matrix that is essential for downstream analysis along with QC metrics and a HTML report that summarises data quality. These results can be used as input for downstream analyses including normalization, visualization and statistical testing.
233 Examples of the report output can be found here_. 330 The scPipe workflow is described in this vignette_ and examples of the report output can be found here_. Note that outlier cells are detected and removed by default but they can be kept if "Keep outliers?" is selected.
234 331
235 ----- 332 -----
236 333
237 **Inputs** 334 **Inputs**
238 335
336 Either
239 * Reference genome in FASTA format 337 * Reference genome in FASTA format
240 * Exon annotation in GFF3 format
241 * Paired-end FASTQ.GZ reads 338 * Paired-end FASTQ.GZ reads
242 * Cell barcodes TAB-separated file (Optional) 339 * Cell barcodes TAB-separated file (Optional)
340 OR
341 * BAM file
342 * Cell barcodes TAB-separated file
343 AND
344 * Exon annotation in ENSEMBL GFF3 format
243 345
244 *Read Structure* 346 *Read Structure*
245 347
246 The default read structure represents CEL-seq 348 The default read structure represents CEL-seq
247 paired-ended reads, with one cell barcode in Read 2 Start from 349 paired-ended reads, with one cell barcode in Read 2 Start from
264 366
265 * Count matrix of genes in Tabular format 367 * Count matrix of genes in Tabular format
266 368
267 Optionally you can choose to output 369 Optionally you can choose to output
268 370
269 * HTML report (default is Yes) 371 * PDF of QC Plots (default is Yes)
372 * QC metrics matrix
373 * HTML report (if FASTQs are input)
270 * Rscript 374 * Rscript
271 * RData 375 * RData
272 376
273 .. _scPipe: http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1006361 377 .. _scPipe: http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1006361
274 .. _R/Bioconductor package: https://bioconductor.org/packages/release/bioc/html/scPipe.html 378 .. _R/Bioconductor package: https://bioconductor.org/packages/release/bioc/html/scPipe.html
379 .. _vignette: https://bioconductor.org/packages/release/bioc/vignettes/scPipe/inst/doc/scPipe_tutorial.html
275 .. _here: http://bioinf.wehi.edu.au/scPipe/ 380 .. _here: http://bioinf.wehi.edu.au/scPipe/
276 381
277 ]]></help> 382 ]]></help>
278 <citations> 383 <citations>
279 <citation type="doi">10.1371/journal.pcbi.1006361</citation> 384 <citation type="doi">10.1371/journal.pcbi.1006361</citation>