comparison tetranscript.xml @ 0:2dfbcb88d16a draft

"planemo upload for repository https://github.com/mhammell-laboratory/TEtranscripts commit 0afd89b76ba658b8bc9faa1463a1aa160ddb2339"
author iuc
date Wed, 13 May 2020 12:57:29 -0400
parents
children bf4ee2810759
comparison
equal deleted inserted replaced
-1:000000000000 0:2dfbcb88d16a
1 <?xml version="1.0"?>
2 <tool id="tetoolkit_tetranscripts" name="TEtranscripts" version="@TOOL_VERSION@+@WRAPPER_VERSION@">
3 <description>annotates reads to genes and transposable elements</description>
4 <macros>
5 <token name="@TOOL_VERSION@">2.1.4</token>
6 <token name="@WRAPPER_VERSION@">galaxy0</token>
7 </macros>
8 <requirements>
9 <requirement type="package" version="@TOOL_VERSION@">tetranscripts</requirement>
10 </requirements>
11 <version_command>TEtranscripts --version</version_command>
12 <command detect_errors="exit_code"><![CDATA[
13 ## initialize
14 ## file extension is required
15 ln -s '$GTF' 'gene_annotation.gtf' &&
16 ln -s '$TE' 'transposable_annotation.gtf' &&
17
18 ## run
19 TEtranscripts
20 ## required
21 -t
22 #for $s in $sample_rep
23 '${s.t}'
24 #end for
25 -c
26 #for $s in $sample_rep
27 '${s.c}'
28 #end for
29 --GTF 'gene_annotation.gtf'
30 --TE 'transposable_annotation.gtf'
31 ## optional
32 --stranded '$io.stranded'
33 $io.sortByPos
34 --project 'result'
35 --mode '$ap.mode'
36 --minread $ap.minread
37 #if $ap.fragmentLength
38 --fragmentLength $ap.fragmentLength
39 #end if
40 --iteration $ap.iteration
41 --padj $ap.padj
42 --foldchange $ap.foldchange
43 #if 'log' in $ap.out
44 --verbose 3
45 |& tee log.txt
46 #end if
47 ]]></command>
48 <inputs>
49 <repeat name="sample_rep" min="2" title="Select input data">
50 <param argument="-t" type="data" format="bam" label="Treatment sample file"/>
51 <param argument="-c" type="data" format="bam" label="Control sample file"/>
52 </repeat>
53 <param argument="--GTF" type="data" format="gtf" label="Select GTF file for gene annotations"/>
54 <param argument="--TE" type="data" format="gtf" label="Select GTF file for transposable element annotations"/>
55 <section name="io" title="Input options">
56 <param argument="--stranded" type="select" label="Select library type">
57 <option value="no">Library is unstranded (no)</option>
58 <option value="forward">Second-strand cDNA library e.g. QIAseq stranded (forward)</option>
59 <option value="reverse">First-strand cDNA library e.g. Illumina TruSeq stranded (reverse)</option>
60 </param>
61 <param argument="--sortByPos" type="boolean" truevalue="--sortByPos" falsevalue="" label="Are input files sorted by chromosome position?"/>
62 </section>
63 <section name="ap" title="Advanced parameters">
64 <param argument="--mode" type="select" label="Set TE counting mode">
65 <option value="multi">Distribute among all alignments (multi)</option>
66 <option value="uniq">Unique mappers only (uniq)</option>
67 </param>
68 <param argument="--minread" type="integer" value="1" min="0" label="Set read count cutoff"/>
69 <param argument="--fragmentLength" type="integer" min="0" optional="true" label="Set average length of fragment used for single-end sequencing" help="For paired-end, estimated from the input alignment file. For single-end, ignored by default."/>
70 <param argument="--iteration" type="integer" value="100" min="0" label="Set maximum number of iterations used to optimize multi-reads assignment"/>
71 <param argument="--padj" type="float" value="0.05" min="0.0" max="1.0" label="Set FDR cutoff for significance"/>
72 <param argument="--foldchange" type="float" value="1.0" min="0.0" label="Set fold-change ratio (absolute) cutoff for differential expression"/>
73 <param name="out" type="select" multiple="true" label="Select output file(s)" help="Result files for gene TE analysis and sigDiff gene TE will be created if more than one dataset is applied.">
74 <option value="cnttable" selected="true">cntTable</option>
75 <option value="deseq2" selected="true">DESeq2.R</option>
76 <option value="gta" selected="true">Gene TE Analysis</option>
77 <option value="sgt" selected="true">SigDiff Gene TE</option>
78 <option value="log">Log</option>
79 </param>
80 </section>
81 </inputs>
82 <outputs>
83 <data name="out_cnt" format="tabular" from_work_dir="result.cntTable" label="${tool.name} on ${on_string}: cntTable">
84 <filter>'cnttable' in ap['out']</filter>
85 </data>
86 <data name="out_deseq2" format="txt" from_work_dir="result_DESeq2.R" label="${tool.name} on ${on_string}: DESeq2.R">
87 <filter>'deseq2' in ap['out']</filter>
88 </data>
89 <data name="out_log" format="txt" from_work_dir="log.txt" label="${tool.name} on ${on_string}: log">
90 <filter>'log' in ap['out']</filter>
91 </data>
92 <data name="out_gta" format="txt" from_work_dir="result_gene_TE_analysis.txt" label="${tool.name} on ${on_string}: Gene TE analysis">
93 <filter>'gta' in ap['out']</filter>
94 </data>
95 <data name="out_sgt" format="txt" from_work_dir="result_sigdiff_gene_TE.txt" label="${tool.name} on ${on_string}: SigDiff Gene TE">
96 <filter>'sgt' in ap['out']</filter>
97 </data>
98 </outputs>
99 <tests>
100 <!--
101 sources for test data:
102 https://github.com/mhammell-laboratory/tetoolkit-test-data
103 https://github.com/mhammell-laboratory/TEtranscripts/issues/66
104 -->
105
106 <!-- #1: default -->
107 <test expect_num_outputs="5">
108 <repeat name="sample_rep">
109 <param name="t" value="treatment1.bam"/>
110 <param name="c" value="control1.bam"/>
111 </repeat>
112 <repeat name="sample_rep">
113 <param name="t" value="treatment2.bam"/>
114 <param name="c" value="control2.bam"/>
115 </repeat>
116 <param name="GTF" value="gtf.gtf"/>
117 <param name="TE" value="te.gtf"/>
118 <section name="ap">
119 <param name="out" value="cnttable,deseq2,gta,sgt,log"/>
120 </section>
121 <output name="out_cnt">
122 <assert_contents>
123 <has_n_lines n="295"/>
124 <!-- depends on sample names -->
125 <has_text_matching expression="gene.+"/>
126 <!-- order changes -->
127 <has_text_matching expression="TIRANT.+"/>
128 </assert_contents>
129 </output>
130 <output name="out_deseq2">
131 <assert_contents>
132 <has_n_lines n="14"/>
133 <has_text_matching expression="data.+"/>
134 </assert_contents>
135 </output>
136 <output name="out_log">
137 <assert_contents>
138 <has_text_matching expression="INFO"/>
139 <has_text_matching expression=".+Done"/>
140 </assert_contents>
141 </output>
142 <output name="out_gta">
143 <assert_contents>
144 <has_n_lines n="71"/>
145 <has_line line="baseMean&#009;log2FoldChange&#009;lfcSE&#009;stat&#009;pvalue&#009;padj"/>
146 <!-- order changes -->
147 <has_text_matching expression="TIRANT.+"/>
148 </assert_contents>
149 </output>
150 <!-- no content, test dataset to small -->
151 <output name="out_sgt">
152 <assert_contents>
153 <has_n_lines n="1"/>
154 <has_line line="baseMean&#009;log2FoldChange&#009;lfcSE&#009;stat&#009;pvalue&#009;padj"/>
155 </assert_contents>
156 </output>
157 </test>
158 <!-- #2 -->
159 <test expect_num_outputs="5">
160 <repeat name="sample_rep">
161 <param name="t" value="treatment1.bam"/>
162 <param name="c" value="control1.bam"/>
163 </repeat>
164 <repeat name="sample_rep">
165 <param name="t" value="treatment2.bam"/>
166 <param name="c" value="control2.bam"/>
167 </repeat>
168 <param name="GTF" value="gtf.gtf"/>
169 <param name="TE" value="te.gtf"/>
170 <section name="io">
171 <param name="stranded" value="forward"/>
172 <param name="sortByPos" value="true"/>
173 </section>
174 <section name="ap">
175 <param name="mode" value="uniq"/>
176 <param name="minread" value="2"/>
177 <param name="fragmentLength" value="10"/>
178 <param name="iteration" value="90"/>
179 <param name="padj" value="0.06"/>
180 <param name="foldchange" value="2.0"/>
181 <param name="out" value="cnttable,deseq2,gta,sgt,log"/>
182 </section>
183 <output name="out_cnt">
184 <assert_contents>
185 <has_n_lines n="295"/>
186 <!-- depends on sample names -->
187 <has_text_matching expression="gene.+"/>
188 <!-- order changes -->
189 <has_text_matching expression="TIRANT.+"/>
190 </assert_contents>
191 </output>
192 <output name="out_deseq2">
193 <assert_contents>
194 <has_n_lines n="14"/>
195 <has_text_matching expression="data.+"/>
196 </assert_contents>
197 </output>
198 <output name="out_log">
199 <assert_contents>
200 <has_text_matching expression="INFO.+"/>
201 </assert_contents>
202 </output>
203 <output name="out_gta">
204 <assert_contents>
205 <has_n_lines n="3"/>
206 <has_line line="baseMean&#009;log2FoldChange&#009;lfcSE&#009;stat&#009;pvalue&#009;padj"/>
207 <!-- order changes -->
208 <has_text_matching expression="DNAREP1.+"/>
209 </assert_contents>
210 </output>
211 <!-- no content, test dataset to small -->
212 <output name="out_sgt">
213 <assert_contents>
214 <has_n_lines n="1"/>
215 <has_line line="baseMean&#009;log2FoldChange&#009;lfcSE&#009;stat&#009;pvalue&#009;padj"/>
216 </assert_contents>
217 </output>
218 </test>
219 <!-- #3 -->
220 <test expect_num_outputs="5">
221 <repeat name="sample_rep">
222 <param name="t" value="treatment1.bam"/>
223 <param name="c" value="control1.bam"/>
224 </repeat>
225 <repeat name="sample_rep">
226 <param name="t" value="treatment2.bam"/>
227 <param name="c" value="control2.bam"/>
228 </repeat>
229 <param name="GTF" value="gtf.gtf"/>
230 <param name="TE" value="te.gtf"/>
231 <section name="io">
232 <param name="stranded" value="reverse"/>
233 </section>
234 <section name="ap">
235 <param name="out" value="cnttable,deseq2,gta,sgt,log"/>
236 </section>
237 <output name="out_cnt">
238 <assert_contents>
239 <has_n_lines n="295"/>
240 <!-- depends on sample names -->
241 <has_text_matching expression="gene.+"/>
242 <!-- order changes -->
243 <has_text_matching expression="TIRANT.+"/>
244 </assert_contents>
245 </output>
246 <output name="out_deseq2">
247 <assert_contents>
248 <has_n_lines n="14"/>
249 <has_text_matching expression="data.+"/>
250 </assert_contents>
251 </output>
252 <output name="out_log">
253 <assert_contents>
254 <has_text_matching expression="INFO.+"/>
255 </assert_contents>
256 </output>
257 <output name="out_gta">
258 <assert_contents>
259 <has_n_lines n="23"/>
260 <has_line line="baseMean&#009;log2FoldChange&#009;lfcSE&#009;stat&#009;pvalue&#009;padj"/>
261 <!-- order changes -->
262 <has_text_matching expression="TART.+"/>
263 </assert_contents>
264 </output>
265 <output name="out_sgt">
266 <assert_contents>
267 <has_n_lines n="2"/>
268 <has_line line="baseMean&#009;log2FoldChange&#009;lfcSE&#009;stat&#009;pvalue&#009;padj"/>
269 <has_text_matching expression="Gypsy12.+"/>
270 </assert_contents>
271 </output>
272 </test>
273 </tests>
274 <help><![CDATA[
275 .. class:: infomark
276
277 **What it does**
278
279 TEtranscripts is a software package that utilizes both unambiguously (uniquely) and ambiguously (multi-) mapped reads to perform differential enrichment analyses from high throughput sequencing experiments. Currently, most expression analysis software packates are not optimized for handling the complexities involved in quantifying highly repetitive regions of the genome, especially transposable elements (TE), from short sequencing reads. Although transposon elements make up between 20 to 80% of many eukaryotic genomes and contribute significantly to the cellular transcriptome output, the difficulty in quantifying their abundances from high throughput sequencing experiments has led them to be largely ignored in most studies. The TEtranscripts provides a noticeable improvement in the recovery of TE transcripts from RNA-Seq experiments and identification of peaks associated with repetitive regions of the genome.
280
281 **Input**
282
283 GTF files for gene annotation can be obtained from `UCSC RefSeq <http://genome.ucsc.edu/cgi-bin/hgTables>`_, Ensembl, `iGenomes <http://support.illumina.com/sequencing/sequencing_software/igenome.html>`_ or other annotation databases. GTF files for TE annotations are customly generated from `UCSC RepeatMasker <http://genome.ucsc.edu/cgi-bin/hgTables>`_ or other annotation database. They contain two custom attributes, class_id and family_id, corresponding to the class (e.g. LINE) and family (e.g. L1) of the corresponding transposable element. A unique ID (e.g. L1Md_Gf_dup1) is also assigned for each TE annotation in the transcript_id attribute.
284
285 **Output**
286
287 TEtranscripts quantifies both gene and transposable element (TE) transcript abundances from RNA-Seq experiments, utilizing both uniquely and ambiguously mapped short read sequences. It processes the short reads alignments (BAM files) and proportionally assigns read counts to the corresponding gene or TE based on the user-provided annotation files (GTF files). In addition, TEtranscripts combines multiple libraries and perform differential analysis using DESeq2.
288
289 .. class:: infomark
290
291 **References**
292
293 More information are available on the `project website <http://hammelllab.labsites.cshl.edu/software/#TEtranscripts>`_ and `github <https://github.com/mhammell-laboratory/TEtranscripts>`_.
294 ]]></help>
295 <citations>
296 <citation type="doi">10.1093/bioinformatics/btv422</citation>
297 <citation type="doi">10.1007/978-1-4939-7710-9_11</citation>
298 </citations>
299 </tool>