Mercurial > repos > iuc > tetoolkit_tetranscripts
comparison tetranscript.xml @ 0:2dfbcb88d16a draft
"planemo upload for repository https://github.com/mhammell-laboratory/TEtranscripts commit 0afd89b76ba658b8bc9faa1463a1aa160ddb2339"
author | iuc |
---|---|
date | Wed, 13 May 2020 12:57:29 -0400 |
parents | |
children | bf4ee2810759 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2dfbcb88d16a |
---|---|
1 <?xml version="1.0"?> | |
2 <tool id="tetoolkit_tetranscripts" name="TEtranscripts" version="@TOOL_VERSION@+@WRAPPER_VERSION@"> | |
3 <description>annotates reads to genes and transposable elements</description> | |
4 <macros> | |
5 <token name="@TOOL_VERSION@">2.1.4</token> | |
6 <token name="@WRAPPER_VERSION@">galaxy0</token> | |
7 </macros> | |
8 <requirements> | |
9 <requirement type="package" version="@TOOL_VERSION@">tetranscripts</requirement> | |
10 </requirements> | |
11 <version_command>TEtranscripts --version</version_command> | |
12 <command detect_errors="exit_code"><![CDATA[ | |
13 ## initialize | |
14 ## file extension is required | |
15 ln -s '$GTF' 'gene_annotation.gtf' && | |
16 ln -s '$TE' 'transposable_annotation.gtf' && | |
17 | |
18 ## run | |
19 TEtranscripts | |
20 ## required | |
21 -t | |
22 #for $s in $sample_rep | |
23 '${s.t}' | |
24 #end for | |
25 -c | |
26 #for $s in $sample_rep | |
27 '${s.c}' | |
28 #end for | |
29 --GTF 'gene_annotation.gtf' | |
30 --TE 'transposable_annotation.gtf' | |
31 ## optional | |
32 --stranded '$io.stranded' | |
33 $io.sortByPos | |
34 --project 'result' | |
35 --mode '$ap.mode' | |
36 --minread $ap.minread | |
37 #if $ap.fragmentLength | |
38 --fragmentLength $ap.fragmentLength | |
39 #end if | |
40 --iteration $ap.iteration | |
41 --padj $ap.padj | |
42 --foldchange $ap.foldchange | |
43 #if 'log' in $ap.out | |
44 --verbose 3 | |
45 |& tee log.txt | |
46 #end if | |
47 ]]></command> | |
48 <inputs> | |
49 <repeat name="sample_rep" min="2" title="Select input data"> | |
50 <param argument="-t" type="data" format="bam" label="Treatment sample file"/> | |
51 <param argument="-c" type="data" format="bam" label="Control sample file"/> | |
52 </repeat> | |
53 <param argument="--GTF" type="data" format="gtf" label="Select GTF file for gene annotations"/> | |
54 <param argument="--TE" type="data" format="gtf" label="Select GTF file for transposable element annotations"/> | |
55 <section name="io" title="Input options"> | |
56 <param argument="--stranded" type="select" label="Select library type"> | |
57 <option value="no">Library is unstranded (no)</option> | |
58 <option value="forward">Second-strand cDNA library e.g. QIAseq stranded (forward)</option> | |
59 <option value="reverse">First-strand cDNA library e.g. Illumina TruSeq stranded (reverse)</option> | |
60 </param> | |
61 <param argument="--sortByPos" type="boolean" truevalue="--sortByPos" falsevalue="" label="Are input files sorted by chromosome position?"/> | |
62 </section> | |
63 <section name="ap" title="Advanced parameters"> | |
64 <param argument="--mode" type="select" label="Set TE counting mode"> | |
65 <option value="multi">Distribute among all alignments (multi)</option> | |
66 <option value="uniq">Unique mappers only (uniq)</option> | |
67 </param> | |
68 <param argument="--minread" type="integer" value="1" min="0" label="Set read count cutoff"/> | |
69 <param argument="--fragmentLength" type="integer" min="0" optional="true" label="Set average length of fragment used for single-end sequencing" help="For paired-end, estimated from the input alignment file. For single-end, ignored by default."/> | |
70 <param argument="--iteration" type="integer" value="100" min="0" label="Set maximum number of iterations used to optimize multi-reads assignment"/> | |
71 <param argument="--padj" type="float" value="0.05" min="0.0" max="1.0" label="Set FDR cutoff for significance"/> | |
72 <param argument="--foldchange" type="float" value="1.0" min="0.0" label="Set fold-change ratio (absolute) cutoff for differential expression"/> | |
73 <param name="out" type="select" multiple="true" label="Select output file(s)" help="Result files for gene TE analysis and sigDiff gene TE will be created if more than one dataset is applied."> | |
74 <option value="cnttable" selected="true">cntTable</option> | |
75 <option value="deseq2" selected="true">DESeq2.R</option> | |
76 <option value="gta" selected="true">Gene TE Analysis</option> | |
77 <option value="sgt" selected="true">SigDiff Gene TE</option> | |
78 <option value="log">Log</option> | |
79 </param> | |
80 </section> | |
81 </inputs> | |
82 <outputs> | |
83 <data name="out_cnt" format="tabular" from_work_dir="result.cntTable" label="${tool.name} on ${on_string}: cntTable"> | |
84 <filter>'cnttable' in ap['out']</filter> | |
85 </data> | |
86 <data name="out_deseq2" format="txt" from_work_dir="result_DESeq2.R" label="${tool.name} on ${on_string}: DESeq2.R"> | |
87 <filter>'deseq2' in ap['out']</filter> | |
88 </data> | |
89 <data name="out_log" format="txt" from_work_dir="log.txt" label="${tool.name} on ${on_string}: log"> | |
90 <filter>'log' in ap['out']</filter> | |
91 </data> | |
92 <data name="out_gta" format="txt" from_work_dir="result_gene_TE_analysis.txt" label="${tool.name} on ${on_string}: Gene TE analysis"> | |
93 <filter>'gta' in ap['out']</filter> | |
94 </data> | |
95 <data name="out_sgt" format="txt" from_work_dir="result_sigdiff_gene_TE.txt" label="${tool.name} on ${on_string}: SigDiff Gene TE"> | |
96 <filter>'sgt' in ap['out']</filter> | |
97 </data> | |
98 </outputs> | |
99 <tests> | |
100 <!-- | |
101 sources for test data: | |
102 https://github.com/mhammell-laboratory/tetoolkit-test-data | |
103 https://github.com/mhammell-laboratory/TEtranscripts/issues/66 | |
104 --> | |
105 | |
106 <!-- #1: default --> | |
107 <test expect_num_outputs="5"> | |
108 <repeat name="sample_rep"> | |
109 <param name="t" value="treatment1.bam"/> | |
110 <param name="c" value="control1.bam"/> | |
111 </repeat> | |
112 <repeat name="sample_rep"> | |
113 <param name="t" value="treatment2.bam"/> | |
114 <param name="c" value="control2.bam"/> | |
115 </repeat> | |
116 <param name="GTF" value="gtf.gtf"/> | |
117 <param name="TE" value="te.gtf"/> | |
118 <section name="ap"> | |
119 <param name="out" value="cnttable,deseq2,gta,sgt,log"/> | |
120 </section> | |
121 <output name="out_cnt"> | |
122 <assert_contents> | |
123 <has_n_lines n="295"/> | |
124 <!-- depends on sample names --> | |
125 <has_text_matching expression="gene.+"/> | |
126 <!-- order changes --> | |
127 <has_text_matching expression="TIRANT.+"/> | |
128 </assert_contents> | |
129 </output> | |
130 <output name="out_deseq2"> | |
131 <assert_contents> | |
132 <has_n_lines n="14"/> | |
133 <has_text_matching expression="data.+"/> | |
134 </assert_contents> | |
135 </output> | |
136 <output name="out_log"> | |
137 <assert_contents> | |
138 <has_text_matching expression="INFO"/> | |
139 <has_text_matching expression=".+Done"/> | |
140 </assert_contents> | |
141 </output> | |
142 <output name="out_gta"> | |
143 <assert_contents> | |
144 <has_n_lines n="71"/> | |
145 <has_line line="baseMean	log2FoldChange	lfcSE	stat	pvalue	padj"/> | |
146 <!-- order changes --> | |
147 <has_text_matching expression="TIRANT.+"/> | |
148 </assert_contents> | |
149 </output> | |
150 <!-- no content, test dataset to small --> | |
151 <output name="out_sgt"> | |
152 <assert_contents> | |
153 <has_n_lines n="1"/> | |
154 <has_line line="baseMean	log2FoldChange	lfcSE	stat	pvalue	padj"/> | |
155 </assert_contents> | |
156 </output> | |
157 </test> | |
158 <!-- #2 --> | |
159 <test expect_num_outputs="5"> | |
160 <repeat name="sample_rep"> | |
161 <param name="t" value="treatment1.bam"/> | |
162 <param name="c" value="control1.bam"/> | |
163 </repeat> | |
164 <repeat name="sample_rep"> | |
165 <param name="t" value="treatment2.bam"/> | |
166 <param name="c" value="control2.bam"/> | |
167 </repeat> | |
168 <param name="GTF" value="gtf.gtf"/> | |
169 <param name="TE" value="te.gtf"/> | |
170 <section name="io"> | |
171 <param name="stranded" value="forward"/> | |
172 <param name="sortByPos" value="true"/> | |
173 </section> | |
174 <section name="ap"> | |
175 <param name="mode" value="uniq"/> | |
176 <param name="minread" value="2"/> | |
177 <param name="fragmentLength" value="10"/> | |
178 <param name="iteration" value="90"/> | |
179 <param name="padj" value="0.06"/> | |
180 <param name="foldchange" value="2.0"/> | |
181 <param name="out" value="cnttable,deseq2,gta,sgt,log"/> | |
182 </section> | |
183 <output name="out_cnt"> | |
184 <assert_contents> | |
185 <has_n_lines n="295"/> | |
186 <!-- depends on sample names --> | |
187 <has_text_matching expression="gene.+"/> | |
188 <!-- order changes --> | |
189 <has_text_matching expression="TIRANT.+"/> | |
190 </assert_contents> | |
191 </output> | |
192 <output name="out_deseq2"> | |
193 <assert_contents> | |
194 <has_n_lines n="14"/> | |
195 <has_text_matching expression="data.+"/> | |
196 </assert_contents> | |
197 </output> | |
198 <output name="out_log"> | |
199 <assert_contents> | |
200 <has_text_matching expression="INFO.+"/> | |
201 </assert_contents> | |
202 </output> | |
203 <output name="out_gta"> | |
204 <assert_contents> | |
205 <has_n_lines n="3"/> | |
206 <has_line line="baseMean	log2FoldChange	lfcSE	stat	pvalue	padj"/> | |
207 <!-- order changes --> | |
208 <has_text_matching expression="DNAREP1.+"/> | |
209 </assert_contents> | |
210 </output> | |
211 <!-- no content, test dataset to small --> | |
212 <output name="out_sgt"> | |
213 <assert_contents> | |
214 <has_n_lines n="1"/> | |
215 <has_line line="baseMean	log2FoldChange	lfcSE	stat	pvalue	padj"/> | |
216 </assert_contents> | |
217 </output> | |
218 </test> | |
219 <!-- #3 --> | |
220 <test expect_num_outputs="5"> | |
221 <repeat name="sample_rep"> | |
222 <param name="t" value="treatment1.bam"/> | |
223 <param name="c" value="control1.bam"/> | |
224 </repeat> | |
225 <repeat name="sample_rep"> | |
226 <param name="t" value="treatment2.bam"/> | |
227 <param name="c" value="control2.bam"/> | |
228 </repeat> | |
229 <param name="GTF" value="gtf.gtf"/> | |
230 <param name="TE" value="te.gtf"/> | |
231 <section name="io"> | |
232 <param name="stranded" value="reverse"/> | |
233 </section> | |
234 <section name="ap"> | |
235 <param name="out" value="cnttable,deseq2,gta,sgt,log"/> | |
236 </section> | |
237 <output name="out_cnt"> | |
238 <assert_contents> | |
239 <has_n_lines n="295"/> | |
240 <!-- depends on sample names --> | |
241 <has_text_matching expression="gene.+"/> | |
242 <!-- order changes --> | |
243 <has_text_matching expression="TIRANT.+"/> | |
244 </assert_contents> | |
245 </output> | |
246 <output name="out_deseq2"> | |
247 <assert_contents> | |
248 <has_n_lines n="14"/> | |
249 <has_text_matching expression="data.+"/> | |
250 </assert_contents> | |
251 </output> | |
252 <output name="out_log"> | |
253 <assert_contents> | |
254 <has_text_matching expression="INFO.+"/> | |
255 </assert_contents> | |
256 </output> | |
257 <output name="out_gta"> | |
258 <assert_contents> | |
259 <has_n_lines n="23"/> | |
260 <has_line line="baseMean	log2FoldChange	lfcSE	stat	pvalue	padj"/> | |
261 <!-- order changes --> | |
262 <has_text_matching expression="TART.+"/> | |
263 </assert_contents> | |
264 </output> | |
265 <output name="out_sgt"> | |
266 <assert_contents> | |
267 <has_n_lines n="2"/> | |
268 <has_line line="baseMean	log2FoldChange	lfcSE	stat	pvalue	padj"/> | |
269 <has_text_matching expression="Gypsy12.+"/> | |
270 </assert_contents> | |
271 </output> | |
272 </test> | |
273 </tests> | |
274 <help><![CDATA[ | |
275 .. class:: infomark | |
276 | |
277 **What it does** | |
278 | |
279 TEtranscripts is a software package that utilizes both unambiguously (uniquely) and ambiguously (multi-) mapped reads to perform differential enrichment analyses from high throughput sequencing experiments. Currently, most expression analysis software packates are not optimized for handling the complexities involved in quantifying highly repetitive regions of the genome, especially transposable elements (TE), from short sequencing reads. Although transposon elements make up between 20 to 80% of many eukaryotic genomes and contribute significantly to the cellular transcriptome output, the difficulty in quantifying their abundances from high throughput sequencing experiments has led them to be largely ignored in most studies. The TEtranscripts provides a noticeable improvement in the recovery of TE transcripts from RNA-Seq experiments and identification of peaks associated with repetitive regions of the genome. | |
280 | |
281 **Input** | |
282 | |
283 GTF files for gene annotation can be obtained from `UCSC RefSeq <http://genome.ucsc.edu/cgi-bin/hgTables>`_, Ensembl, `iGenomes <http://support.illumina.com/sequencing/sequencing_software/igenome.html>`_ or other annotation databases. GTF files for TE annotations are customly generated from `UCSC RepeatMasker <http://genome.ucsc.edu/cgi-bin/hgTables>`_ or other annotation database. They contain two custom attributes, class_id and family_id, corresponding to the class (e.g. LINE) and family (e.g. L1) of the corresponding transposable element. A unique ID (e.g. L1Md_Gf_dup1) is also assigned for each TE annotation in the transcript_id attribute. | |
284 | |
285 **Output** | |
286 | |
287 TEtranscripts quantifies both gene and transposable element (TE) transcript abundances from RNA-Seq experiments, utilizing both uniquely and ambiguously mapped short read sequences. It processes the short reads alignments (BAM files) and proportionally assigns read counts to the corresponding gene or TE based on the user-provided annotation files (GTF files). In addition, TEtranscripts combines multiple libraries and perform differential analysis using DESeq2. | |
288 | |
289 .. class:: infomark | |
290 | |
291 **References** | |
292 | |
293 More information are available on the `project website <http://hammelllab.labsites.cshl.edu/software/#TEtranscripts>`_ and `github <https://github.com/mhammell-laboratory/TEtranscripts>`_. | |
294 ]]></help> | |
295 <citations> | |
296 <citation type="doi">10.1093/bioinformatics/btv422</citation> | |
297 <citation type="doi">10.1007/978-1-4939-7710-9_11</citation> | |
298 </citations> | |
299 </tool> |