Mercurial > repos > iuc > stringtie
comparison stringtie.xml @ 13:a305d75e13f2 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stringtie commit e811a7887db870f4f94f620f52bce656c8d5ba23
author | iuc |
---|---|
date | Thu, 12 Apr 2018 17:30:07 -0400 |
parents | 76d290331481 |
children | eafd5dc95228 |
comparison
equal
deleted
inserted
replaced
12:76d290331481 | 13:a305d75e13f2 |
---|---|
1 <tool id="stringtie" name="StringTie" version="1.3.3.1"> | 1 <tool id="stringtie" name="StringTie" version="1.3.3.2"> |
2 <description>transcript assembly and quantification</description> | 2 <description>transcript assembly and quantification</description> |
3 <macros> | 3 <macros> |
4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="requirements" /> | 6 <expand macro="requirements" /> |
7 <expand macro="stdio" /> | 7 <expand macro="stdio" /> |
8 <expand macro="version_command" /> | 8 <expand macro="version_command" /> |
9 <command><![CDATA[ | 9 <command><![CDATA[ |
10 #import re | |
10 mkdir -p ./special_de_output/sample1/ && | 11 mkdir -p ./special_de_output/sample1/ && |
11 | 12 |
12 ## Get Guide GTF/GFF if selected | 13 ## Get Guide GTF/GFF if selected |
13 | 14 |
14 #if str($guide.use_guide) == 'yes': | 15 #if str($guide.use_guide) == 'yes': |
60 -x '$adv.omit_sequences' | 61 -x '$adv.omit_sequences' |
61 #end if | 62 #end if |
62 | 63 |
63 #if str($guide.use_guide) == 'yes': | 64 #if str($guide.use_guide) == 'yes': |
64 #if $guide.special_outputs.special_outputs_select == 'deseq2': | 65 #if $guide.special_outputs.special_outputs_select == 'deseq2': |
66 #set escaped_element_identifier = re.sub('[^\w\-]', '_', str($input_bam.element_identifier)) | |
65 && | 67 && |
66 ln -s '$output_gtf' ./special_de_output/sample1/output.gtf | 68 ln -s '$output_gtf' ./special_de_output/sample1/output.gtf |
67 && | 69 && |
70 TAB=\$(printf '\t') | |
71 && | |
72 CR=\$(printf '\r') | |
73 && | |
68 prepDE.py | 74 prepDE.py |
69 -i ./special_de_output/ | 75 -i ./special_de_output/ |
70 -g '$gene_counts' | 76 -g gene_counts.csv |
71 -t '$transcript_counts' | 77 -t transcript_counts.csv |
72 -l $guide.special_outputs.read_length | 78 -l $guide.special_outputs.read_length |
73 #if $guide.special_outputs.string: | 79 #if $guide.special_outputs.string: |
74 -s '$guide.special_outputs.string' | 80 -s '$guide.special_outputs.string' |
81 #end if | |
82 #if $guide.special_outputs.clustering: | |
83 -c | |
84 #if $guide.special_outputs.key: | |
85 -k '$guide.special_outputs.key' | |
75 #end if | 86 #end if |
76 #if $guide.special_outputs.clustering: | 87 --legend '$legend' |
77 -c | 88 > /dev/null |
78 #if $guide.special_outputs.key: | 89 && |
79 -k '$guide.special_outputs.key' | 90 sed -i.bak -e "s/,/\${TAB}/g" -e "s/\${CR}//g" '$legend' |
80 #end if | 91 #else |
81 --legend '$legend' | 92 > /dev/null |
82 > /dev/null | 93 #end if |
83 && | 94 |
84 sed -i.bak 's/,/\t/g' '$legend' | 95 ## Replace commas with tabs |
85 && | 96 && |
86 sed -i.bak 's/\r//g' '$legend' | 97 sed -i.bak -e "s/,/\${TAB}/g" -e "s/\${CR}//g" gene_counts.csv transcript_counts.csv |
87 #end if | 98 #if $guide.special_outputs.keep_header: |
88 | 99 && |
89 > /dev/null | 100 head -n 1 gene_counts.csv | sed -e 's/sample1/$escaped_element_identifier/' > '$gene_counts' |
90 | 101 && |
91 && | 102 head -n 1 transcript_counts.csv | sed -e 's/sample1/$escaped_element_identifier/' > '$transcript_counts' |
92 sed -i.bak 's/,/\t/g' '$transcript_counts' | 103 #end if |
93 && | 104 ## Sort count files on the first column |
94 sed -i.bak 's/\r//g' '$transcript_counts' | 105 && |
95 && | 106 tail -n +2 gene_counts.csv | sort -t"\${TAB}" -k1 >> '$gene_counts' |
96 sed -i.bak 's/,/\t/g' '$gene_counts' | 107 && |
97 && | 108 tail -n +2 transcript_counts.csv | sort -t"\${TAB}" -k1 >> '$transcript_counts' |
98 sed -i.bak 's/\r//g' '$gene_counts' | |
99 #end if | 109 #end if |
100 #end if | 110 #end if |
101 ]]></command> | 111 ]]></command> |
102 <inputs> | 112 <inputs> |
103 <param name="input_bam" type="data" format="sam,bam" label="Input mapped reads" help="Input BAM/SAM file containing reads you want to assemble into transcripts"/> | 113 <param name="input_bam" type="data" format="sam,bam" label="Input mapped reads" help="Input BAM/SAM file containing reads you want to assemble into transcripts"/> |
139 <option value="no" selected="True">No additional output</option> | 149 <option value="no" selected="True">No additional output</option> |
140 </param> | 150 </param> |
141 <when value="ballgown" /> | 151 <when value="ballgown" /> |
142 <when value="deseq2"> | 152 <when value="deseq2"> |
143 <param name="read_length" argument="--length" type="integer" min="0" value="75" label="Specify the average read length" help="Default: 75" /> | 153 <param name="read_length" argument="--length" type="integer" min="0" value="75" label="Specify the average read length" help="Default: 75" /> |
144 <param name="clustering" argument="--cluster" type="boolean" truevalue="--cluster" falsevalue="" checked="False" label="Cluster overlapping genes" help="Choose whether to cluster genes with different gene IDs that overlap. Transcripts containing the geneID prefix will be ignored. Default: No" /> | 154 <param name="clustering" argument="--cluster" type="boolean" truevalue="--cluster" falsevalue="" checked="false" label="Cluster overlapping genes" help="Choose whether to cluster genes with different gene IDs that overlap. Transcripts containing the geneID prefix will be ignored. Default: No" /> |
145 <param argument="--string" type="text" label="Prefix used for transcripts" help="If a different prefix was used for geneIDs assigned by StringTie than the default, specify it here. Only letters and numbers will be retained in this field. Default: MSTRG" > | 155 <param argument="--string" type="text" label="Prefix used for transcripts" help="If a different prefix was used for geneIDs assigned by StringTie than the default, specify it here. Only letters and numbers will be retained in this field. Default: MSTRG" > |
146 <sanitizer> | 156 <sanitizer> |
147 <valid initial="string.letters,string.digits"></valid> | 157 <valid initial="string.letters,string.digits"></valid> |
148 </sanitizer> | 158 </sanitizer> |
149 </param> | 159 </param> |
150 <param argument="--key" type="text" label="Prefix for clustering" help="If clustering, what prefix to use for geneIDs assigned by this script. Only letters and numbers will be retained in this field. Default: prepG"> | 160 <param argument="--key" type="text" label="Prefix for clustering" help="If clustering, what prefix to use for geneIDs assigned by this script. Only letters and numbers will be retained in this field. Default: prepG"> |
151 <sanitizer> | 161 <sanitizer> |
152 <valid initial="string.letters,string.digits"></valid> | 162 <valid initial="string.letters,string.digits"></valid> |
153 </sanitizer> | 163 </sanitizer> |
154 </param> | 164 </param> |
165 <param name="keep_header" type="boolean" checked="true" label="Output header line?" help="Keep the header line for edgeR, remove it for DESeq2" /> | |
155 </when> | 166 </when> |
156 <when value="no" /> | 167 <when value="no" /> |
157 </conditional> | 168 </conditional> |
158 <param name="coverage_file" argument="-C" type="boolean" truevalue="-C" falsevalue="" checked="False" label="Output coverage file?" help="If StringTie is run with this option (requires -G), it returns a file with all the transcripts in the reference annotation that are fully covered, end to end, by reads. The output format is a GTF file as described below. Each line of the GTF is corresponds to a gene or transcript in the reference annotation. Default: No"/> | 169 <param name="coverage_file" argument="-C" type="boolean" truevalue="-C" falsevalue="" checked="False" label="Output coverage file?" help="If StringTie is run with this option (requires -G), it returns a file with all the transcripts in the reference annotation that are fully covered, end to end, by reads. The output format is a GTF file as described below. Each line of the GTF is corresponds to a gene or transcript in the reference annotation. Default: No"/> |
159 </when> | 170 </when> |
160 </conditional> | 171 </conditional> |
161 <section name="adv" title="Advanced Options"> | 172 <section name="adv" title="Advanced Options"> |
162 <param name="abundance_estimation" argument="-A" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Output gene abundance estimation file?" help="If selected, gene abundances will be reported in a tab-delimited file, see below for more information. Default: No"/> | 173 <param name="abundance_estimation" argument="-A" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Output gene abundance estimation file?" help="If selected, gene abundances will be reported in a tab-delimited file, see below for more information. Default: No"/> |
163 <param name="omit_sequences" argument="-x" type="text" value="" label="Do not assemble any transcripts on these reference sequence(s)" help="Ignore all read alignments (and thus do not attempt to perform transcript assembly) on the specified reference sequences. This parameter can be a single reference sequence name (e.g. chrM) or a comma-delimited list of sequence names (e.g. chrM,chrX,chrY). This can speed up StringTie especially in the case of excluding the mitochondrial genome, whose genes may have very high coverage in some cases, even though they may be of no interest for a particular RNA-Seq analysis. The reference sequence names are case sensitive, they must match identically the names of chromosomes/contigs of the target genome against which the RNA-Seq reads were aligned in the first place." /> | 174 <param name="omit_sequences" argument="-x" type="text" value="" label="Do not assemble any transcripts on these reference sequence(s)" help="Ignore all read alignments (and thus do not attempt to perform transcript assembly) on the specified reference sequences. This parameter can be a single reference sequence name (e.g. chrM) or a comma-delimited list of sequence names (e.g. chrM,chrX,chrY). This can speed up StringTie especially in the case of excluding the mitochondrial genome, whose genes may have very high coverage in some cases, even though they may be of no interest for a particular RNA-Seq analysis. The reference sequence names are case sensitive, they must match identically the names of chromosomes/contigs of the target genome against which the RNA-Seq reads were aligned in the first place." /> |
258 <output name="exon_transcript_mapping" file="./ballgown/e2t.ctab" ftype="tabular" /> | 269 <output name="exon_transcript_mapping" file="./ballgown/e2t.ctab" ftype="tabular" /> |
259 <output name="intron_transcript_mapping" file="./ballgown/i2t.ctab" ftype="tabular" /> | 270 <output name="intron_transcript_mapping" file="./ballgown/i2t.ctab" ftype="tabular" /> |
260 <output name="output_gtf" file="stringtie_out5.gtf" ftype="gtf" lines_diff="2" /> | 271 <output name="output_gtf" file="stringtie_out5.gtf" ftype="gtf" lines_diff="2" /> |
261 <output name="coverage" file="stringtie_out_coverage.gtf" ftype="gtf" /> | 272 <output name="coverage" file="stringtie_out_coverage.gtf" ftype="gtf" /> |
262 </test> | 273 </test> |
263 <!--Ensure output for DESeq2/edgeR works --> | 274 <!--Ensure output for edgeR works --> |
264 <test expect_num_outputs="5"> | 275 <test expect_num_outputs="5"> |
265 <param name="input_bam" ftype="bam" value="stringtie_in1.bam" /> | 276 <param name="input_bam" ftype="bam" value="stringtie_in1.bam" /> |
266 <param name="use_guide" value="yes" /> | 277 <param name="use_guide" value="yes" /> |
267 <param name="special_outputs_select" value="deseq2" /> | 278 <param name="special_outputs_select" value="deseq2" /> |
268 <param name="input_estimation" value="True" /> | 279 <param name="input_estimation" value="True" /> |
269 <param name="guide_gff_select" value="history" /> | 280 <param name="guide_gff_select" value="history" /> |
270 <param name="ref_hist" ftype="gtf" value="stringtie_in.gtf" /> | 281 <param name="ref_hist" ftype="gtf" value="stringtie_in.gtf" /> |
271 <param name="coverage_file" value="True" /> | 282 <param name="coverage_file" value="True" /> |
272 <param name="clustering" value="True" /> | 283 <param name="clustering" value="True" /> |
273 <output name="gene_counts" file="./deseq2/gene_counts.tsv" ftype="tabular" /> | 284 <output name="gene_counts" file="gene_counts_edger.tsv" ftype="tabular" /> |
274 <output name="transcript_counts" file="./deseq2/transcript_counts.tsv" ftype="tabular" /> | 285 <output name="transcript_counts" file="transcript_counts_edger.tsv" ftype="tabular" /> |
275 <output name="legend" file="./deseq2/legend.tsv" ftype="tabular" /> | 286 <output name="legend" file="legend.tsv" ftype="tabular" /> |
287 <output name="output_gtf" file="stringtie_out6.gtf" ftype="gtf" lines_diff="2" /> | |
288 <output name="coverage" file="stringtie_out_coverage.gtf" ftype="gtf" /> | |
289 </test> | |
290 <!--Ensure output for DESeq2 works --> | |
291 <test expect_num_outputs="5"> | |
292 <param name="input_bam" ftype="bam" value="stringtie_in1.bam" /> | |
293 <param name="use_guide" value="yes" /> | |
294 <param name="special_outputs_select" value="deseq2" /> | |
295 <param name="keep_header" value="False" /> | |
296 <param name="input_estimation" value="True" /> | |
297 <param name="guide_gff_select" value="history" /> | |
298 <param name="ref_hist" ftype="gtf" value="stringtie_in.gtf" /> | |
299 <param name="coverage_file" value="True" /> | |
300 <param name="clustering" value="True" /> | |
301 <output name="gene_counts" file="gene_counts_deseq2.tsv" ftype="tabular" /> | |
302 <output name="transcript_counts" file="transcript_counts_deseq2.tsv" ftype="tabular" /> | |
303 <output name="legend" file="legend.tsv" ftype="tabular" /> | |
276 <output name="output_gtf" file="stringtie_out6.gtf" ftype="gtf" lines_diff="2" /> | 304 <output name="output_gtf" file="stringtie_out6.gtf" ftype="gtf" lines_diff="2" /> |
277 <output name="coverage" file="stringtie_out_coverage.gtf" ftype="gtf" /> | 305 <output name="coverage" file="stringtie_out_coverage.gtf" ftype="gtf" /> |
278 </test> | 306 </test> |
279 <!--Ensure gene abundances output works --> | 307 <!--Ensure gene abundances output works --> |
280 <test expect_num_outputs="2"> | 308 <test expect_num_outputs="2"> |