comparison scpipe.xml @ 0:32e1bfc6b7b2 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scpipe commit 8908da9cdd112ae0943dbf1eccb221e84cd99ca7
author iuc
date Wed, 15 Aug 2018 13:54:40 -0400
parents
children 4ec6717872b1
comparison
equal deleted inserted replaced
-1:000000000000 0:32e1bfc6b7b2
1 <tool id="scpipe" name="scPipe" version="1.0.0">
2 <description>- preprocessing pipeline for single cell RNA-seq</description>
3 <requirements>
4 <requirement type="package" version="1.0.0">bioconductor-scpipe</requirement>
5 <requirement type="package" version="1.28.1">bioconductor-rsubread</requirement>
6 <requirement type="package" version="1.20">r-knitr</requirement>
7 <requirement type="package" version="1.10">r-rmarkdown</requirement>
8 <requirement type="package" version="1.1.1">r-readr</requirement>
9 <requirement type="package" version="4.7.1">r-plotly</requirement>
10 <requirement type="package" version="0.4">r-dt</requirement>
11 <requirement type="package" version="1.6.0">bioconductor-scater</requirement>
12 <requirement type="package" version="1.6.2">bioconductor-scran</requirement>
13 <requirement type="package" version="0.13">r-rtsne</requirement>
14 <!-- Using older version of ggplot2 as getting error like this with 3.0.0:
15 https://github.com/ggobi/ggally/issues/263 -->
16 <requirement type="package" version="2.2.1">r-ggplot2</requirement>
17 <requirement type="package" version="1.6.0">r-optparse</requirement>
18 </requirements>
19 <version_command><![CDATA[
20 echo $(R --version | grep version | grep -v GNU)", scPipe version" $(R --vanilla --slave -e "library(scPipe); cat(sessionInfo()\$otherPkgs\$scPipe\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", Rsubread version" $(R --vanilla --slave -e "library(Rsubread); cat(sessionInfo()\$otherPkgs\$Rsubread\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", knitr version" $(R --vanilla --slave -e "library(knitr); cat(sessionInfo()\$otherPkgs\$knitr\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rmarkdown version" $(R --vanilla --slave -e "library(rmarkdown); cat(sessionInfo()\$otherPkgs\$rmarkdown\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", readr version" $(R --vanilla --slave -e "library(readr); cat(sessionInfo()\$otherPkgs\$readr\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", plotly version" $(R --vanilla --slave -e "library(plotly); cat(sessionInfo()\$otherPkgs\$plotly\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", DT version" $(R --vanilla --slave -e "library(DT); cat(sessionInfo()\$otherPkgs\$DT\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", scater version" $(R --vanilla --slave -e "library(scater); cat(sessionInfo()\$otherPkgs\$scater\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", scran version" $(R --vanilla --slave -e "library(scran); cat(sessionInfo()\$otherPkgs\$scran\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rtsne version" $(R --vanilla --slave -e "library(Rtsne); cat(sessionInfo()\$otherPkgs\$Rtsne\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", ggplot2 version" $(R --vanilla --slave -e "library(ggplot2); cat(sessionInfo()\$otherPkgs\$ggplot2\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
21 ]]></version_command>
22 <command detect_errors="exit_code"><![CDATA[
23 #import re
24
25 ## Link input files
26
27 #if $ref_fasta.fasta_source == "history":
28 #set $fasta_name = re.sub('[^\w\-\s]', '_', str($ref_fasta.ref_fa_hist.element_identifier))
29 ln -s '$ref_fasta.ref_fa_hist' '$fasta_name' &&
30 #else:
31 #set $fasta_name = re.sub('[^\w\-\s]', '_', str($ref_fasta.ref_fa_builtin.element_identifier))
32 ln -s '$ref_fasta.ref_fa_builtin.fields.path' '$fasta_name' &&
33 #end if
34
35 #set $anno_name = re.sub('[^\w\-\s]', '_', str($exons.element_identifier))
36 #set $anno_name = $anno_name + ".gff3"
37 ln -s '${exons}' '$anno_name' &&
38
39 #if $paired_format.paired_format_selector == 'paired_collection':
40 #set $in1 = $paired_format.paired_input.forward
41 #set $in2 = $paired_format.paired_input.reverse
42 #set $in1_name = re.sub('[^\w\-\s]', '_', str($paired_format.paired_input.name))
43 #set $in2_name = re.sub('[^\w\-\s]', '_', str("%s_%s" % ($paired_format.paired_input.name, "R2")))
44 #set out1 = $output_paired_coll.forward
45 #set out2 = $output_paired_coll.reverse
46 ln -s '$in1' '$in1_name' &&
47 ln -s '$in2' '$in2_name' &&
48 #else
49 #set $in1_name = re.sub('[^\w\-\s]', '_', str($in1.element_identifier))
50 ln -s '$in1' '$in1_name' &&
51
52 #if str($paired_format.paired_format_selector) == 'paired':
53 #set $in2_name = re.sub('[^\w\-\s]', '_', str($in2.element_identifier))
54 ln -s '$in2' '$in2_name' &&
55 #end if
56 #end if
57
58 #if $rscript:
59 cp '$__tool_directory__/scpipe.R' '$out_rscript' &&
60 #end if
61
62 TAB=\$(printf '\t') &&
63
64 #if $barcodes:
65 sed -i.bak -e "s/\${TAB}/,/g" '$barcodes' &&
66 #end if
67
68 ## Run scPipe
69
70 Rscript '$__tool_directory__/scpipe.R'
71
72 --fasta '$fasta_name'
73 --exons '$anno_name'
74 --samplename '$in1_name'
75 --read1 '$in1_name'
76 --read2 '$in2_name'
77 --bs1 $bs1
78 --bl1 $bl1
79 --bs2 $bs2
80 --bl2 $bl2
81 --us $us
82 --ul $ul
83
84 #if $barcodes:
85 --barcodes '$barcodes'
86 #end if
87
88 #if $report:
89 --report '$report'
90 #end if
91
92 #if $rdata:
93 --rdata '$rdata'
94 #end if
95
96 --rmlow $adv.rmlow
97 --rmN $adv.rmN
98 --minq $adv.minq
99 --numbq $adv.numbq
100 --stnd $adv.stnd
101 --max_mis $adv.max_mis
102 --UMI_cor $adv.UMI_cor
103 --gene_fl $adv.gene_fl
104 --max_reads $adv.max_reads
105 --min_count $adv.min_count
106 --nthreads \${GALAXY_SLOTS:-2}
107
108 &&
109 sed -e "s/,/\${TAB}/g" gene_count.csv > gene_count.tsv
110
111 ]]></command>
112
113 <inputs>
114 <conditional name="ref_fasta">
115 <param name="fasta_source" type="select" label="Reference genome FASTA">
116 <option value="cached" selected="true">Use a built-in genome</option>
117 <option value="history">Use a FASTA from history</option>
118 </param>
119 <when value="cached">
120 <param name="ref_fa_builtin" type="select" label="Select a built-in FASTA" help="If your genome of interest is not listed, contact your Galaxy administrator">
121 <options from_data_table="all_fasta">
122 <filter type="sort_by" column="2" />
123 <validator type="no_options" message="No FASTA is available for the selected input dataset" />
124 </options>
125 </param>
126 </when>
127 <when value="history">
128 <param name="ref_fa_hist" type="data" format="fasta" label="Select a history FASTA" />
129 </when>
130 </conditional>
131 <param name="exons" type="data" format="gff3" label="Exon annotation GFF3 file" help="Current supported sources: ENSEMBL, GENCODE and RefSeq"/>
132
133 <conditional name="paired_format">
134 <param name="paired_format_selector" type="select" label="Paired reads or Paired collection">
135 <option value="paired">Paired</option>
136 <option value="paired_collection">Paired Collection</option>
137 </param>
138 <when value="paired">
139 <param name="in1" type="data" format="fastq.gz,fastq" label="Input Read 1" help="Read 1 should contain the transcripts in fastq.gz format"/>
140 <param name="in2" type="data" format="fastq.gz,fastq" label="Input Read 2" help="Read 2 should contain UMI and barcodes in fastq.gz format"/>
141 </when>
142 <when value="paired_collection">
143 <param name="paired_input" type="data_collection" collection_type="paired" format="fastq.gz,fastq" label="Select paired collection(s)"/>
144 </when>
145 </conditional>
146 <param name="barcodes" type="data" format="tabular,tsv" optional="True" label="Cell barcodes file" help="Optional file of cell barcodes. Should contain at least two columns, where the first column has the cell id and the second column contains the barcode sequence."/>
147 <param argument="--bs1" type="integer" min="-1" value="-1" label="Barcode start Read 1" help="Barcode start position in Read 1. Positions are 0-indexed so the first base is considered base 0, -1 indicates no barcode. Default: -1" />
148 <param argument="--bl1" type="integer" min="0" value="0" label="Barcode length Read 1" help="Barcode length in Read 1, 0 if no barcode present. Default: 0" />
149 <param argument="--bs2" type="integer" min="-1" value="6" label="Barcode start Read 2" help="Barcode start position in Read 2. Positions are 0-indexed so the first base is considered base 0, -1 indicates no barcode. Default: 6" />
150 <param argument="--bl2" type="integer" min="0" value="8" label="Barcode length Read 2" help="Barcode length in Read 2, 0 if no barcode present. Default: 8" />
151 <param argument="--us" type="integer" min="-1" value="0" label="UMI start Read 2" help="UMI start position in Read 2. Positions are 0-indexed so the first base is considered base 0, -1 indicates no UMI. Default: 0" />
152 <param argument="--ul" type="integer" min="0" value="6" label="UMI length Read 2" help="UMI length in Read 2, 0 if no UMI present. Default: 6" />
153 <param name="report" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Output HTML Report?" help="If this option is set to Yes, a HTML report containing QC metrics will be output. Default: Yes" />
154 <param name="rscript" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Output Rscript?" help="If this option is set to Yes, the Rscript used to annotate the IDs will be provided as a text file in the output. Default: No" />
155 <param name="rdata" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output RData file?"
156 help="Output all the data used by R to construct the tables and plots, can be loaded into R. Default: No">
157 </param>
158 <section name="adv" title="Advanced Options">
159 <param argument="--rmlow" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Remove reads with N in barcode or UMI. Default: Yes" />
160 <param argument="--rmN" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Remove reads with low quality. Default: Yes" />
161 <param argument="--minq" type="integer" min="0" value="20" label="Minimum read quality. Default: 20" />
162 <param argument="--numbq" type="integer" min="0" value="2" label="Maximum number of bases below minq. Default: 2" />
163 <param argument="--stnd" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Perform strand-specific mapping. Default: Yes" />
164 <param argument="--max_mis" type="integer" min="0" value="1" label="Maximum mismatch allowed in barcode. Default: 1" />
165 <param argument="--UMI_cor" type="integer" min="0" value="1" label="Correct UMI sequence error" help="0 means no correction, 1 means simple correction and merge UMI with distance 1. Default: 1" />
166 <param argument="--gene_fl" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Remove low abundant genes" help="Low abundant is defined as only one copy of one UMI for this gene. Default: No" />
167 <param argument="--max_reads" type="integer" min="0" value="1000000" label="Maximum reads processed" help="Maximum reads processed if detecting barcodes. Default: 1,000,000" />
168 <param argument="--min_count" type="integer" min="0" value="10" label="Minimum count to keep" help="Minimum count to keep if detecting barcodes. Barcode will be discarded if it has lower count. This should be set according to --max_reads. Default: 10" />
169 </section>
170 </inputs>
171
172 <outputs>
173 <data name="out_matrix" format="tabular" from_work_dir="gene_count.tsv" label="${tool.name} on ${on_string}: Count Matrix" />
174 <data name="out_report" format="html" from_work_dir="report.nb.html" label="${tool.name} on ${on_string}: HTML Report" >
175 <filter>report</filter>
176 </data>
177 <data name="out_rscript" format="txt" from_work_dir="out_rscript.txt" label="${tool.name} on ${on_string}: Rscript">
178 <filter>rscript</filter>
179 </data>
180 <data name="out_rdata" format="rdata" from_work_dir="scPipe_analysis.RData" label="${tool.name} on ${on_string}: RData file">
181 <filter>rdata</filter>
182 </data>
183 </outputs>
184
185 <tests>
186 <!-- Ensure outputs work -->
187 <test>
188 <param name="fasta_source" value="history"/>
189 <param name="ref_fa_hist" ftype="fasta" value="mm10_MT19.fa.gz"/>
190 <param name="exons" ftype="gff3" value="mm10_MT19.gff3.gz"/>
191 <param name="paired_format_selector" value="paired" />
192 <param name="in1" ftype="fastqsanger.gz" value="CB51_MT19_R1.gz"/>
193 <param name="in2" ftype="fastqsanger.gz" value="CB51_MT19_R2.gz"/>
194 <param name="us" value="-1"/>
195 <param name="max_reads" value="5000000"/>
196 <param name="min_count" value="100"/>
197 <param name="report" value="True" />
198 <output name="out_matrix" >
199 <assert_contents>
200 <has_text text="ENSMUSG00000024940" />
201 </assert_contents>
202 </output>
203 <output name="out_report" >
204 <assert_contents>
205 <has_text text="scPipe report for sample" />
206 </assert_contents>
207 </output>
208 </test>
209 </tests>
210 <help><![CDATA[
211 .. class:: infomark
212
213 **What it does**
214
215 scPipe_ is an `R/Bioconductor package`_ that integrates barcode demultiplexing, read alignment, UMI-aware gene-level quantification and quality control of raw sequencing data generated by multiple protocols that include CEL-seq, MARS-seq, Chromium 10X, Drop-seq and Smart-seq. scPipe produces a count matrix that is essential for downstream analysis along with an HTML report that summarises data quality. These results can be used as input for downstream analyses including normalization, visualization and statistical testing.
216 Examples of the report output can be found here_.
217
218 -----
219
220 **Inputs**
221
222 * Reference genome in FASTA format
223 * Exon annotation in GFF3 format
224 * Paired-end FASTQ.GZ reads
225 * Cell barcodes TAB-separated file (Optional)
226
227 *Read Structure*
228
229 The default read structure represents CEL-seq
230 paired-ended reads, with one cell barcode in Read 2 Start from
231 6bp and UMI sequence in Read 2 Start from the first bp. So the
232 read structure will be : `bs1=-1, bl1=0, bs2=6, bl2=8, us=0,
233 ul=6`. `bs1=-1, bl1=0` means we don't have index in Read 1 so we
234 set a negative value to start position and zero to the length.
235 `bs2=6, bl2=8` means we have index in Read 2 which starts at 6bp
236 with 8bp length. `us=0, ul=6` means we have UMI from the
237 start of Read 2 and the length is 6bp. NOTE: the zero
238 based index is used so the index of the sequence starts from zero. For a
239 typical Drop-seq experiment the setting will be `bs1=-1,
240 bl1=0, bs2=0, bl2=12, us=12, ul=8`, which means Read 1 only
241 contains transcript and the first 12bp in Read 2 are index,
242 followed by 8bp UMIs.
243
244 -----
245
246 **Outputs**
247
248 * Count matrix of genes in Tabular format
249
250 Optionally you can choose to output
251
252 * HTML report (default is Yes)
253 * Rscript
254 * RData
255
256 .. _scPipe: http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1006361
257 .. _R/Bioconductor package: https://bioconductor.org/packages/release/bioc/html/scPipe.html
258 .. _here: http://bioinf.wehi.edu.au/scPipe/
259
260 ]]></help>
261 <citations>
262 <citation type="doi">10.1371/journal.pcbi.1006361</citation>
263 </citations>
264 </tool>