comparison stringtie.xml @ 13:a305d75e13f2 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stringtie commit e811a7887db870f4f94f620f52bce656c8d5ba23
author iuc
date Thu, 12 Apr 2018 17:30:07 -0400
parents 76d290331481
children eafd5dc95228
comparison
equal deleted inserted replaced
12:76d290331481 13:a305d75e13f2
1 <tool id="stringtie" name="StringTie" version="1.3.3.1"> 1 <tool id="stringtie" name="StringTie" version="1.3.3.2">
2 <description>transcript assembly and quantification</description> 2 <description>transcript assembly and quantification</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements" /> 6 <expand macro="requirements" />
7 <expand macro="stdio" /> 7 <expand macro="stdio" />
8 <expand macro="version_command" /> 8 <expand macro="version_command" />
9 <command><![CDATA[ 9 <command><![CDATA[
10 #import re
10 mkdir -p ./special_de_output/sample1/ && 11 mkdir -p ./special_de_output/sample1/ &&
11 12
12 ## Get Guide GTF/GFF if selected 13 ## Get Guide GTF/GFF if selected
13 14
14 #if str($guide.use_guide) == 'yes': 15 #if str($guide.use_guide) == 'yes':
60 -x '$adv.omit_sequences' 61 -x '$adv.omit_sequences'
61 #end if 62 #end if
62 63
63 #if str($guide.use_guide) == 'yes': 64 #if str($guide.use_guide) == 'yes':
64 #if $guide.special_outputs.special_outputs_select == 'deseq2': 65 #if $guide.special_outputs.special_outputs_select == 'deseq2':
66 #set escaped_element_identifier = re.sub('[^\w\-]', '_', str($input_bam.element_identifier))
65 && 67 &&
66 ln -s '$output_gtf' ./special_de_output/sample1/output.gtf 68 ln -s '$output_gtf' ./special_de_output/sample1/output.gtf
67 && 69 &&
70 TAB=\$(printf '\t')
71 &&
72 CR=\$(printf '\r')
73 &&
68 prepDE.py 74 prepDE.py
69 -i ./special_de_output/ 75 -i ./special_de_output/
70 -g '$gene_counts' 76 -g gene_counts.csv
71 -t '$transcript_counts' 77 -t transcript_counts.csv
72 -l $guide.special_outputs.read_length 78 -l $guide.special_outputs.read_length
73 #if $guide.special_outputs.string: 79 #if $guide.special_outputs.string:
74 -s '$guide.special_outputs.string' 80 -s '$guide.special_outputs.string'
81 #end if
82 #if $guide.special_outputs.clustering:
83 -c
84 #if $guide.special_outputs.key:
85 -k '$guide.special_outputs.key'
75 #end if 86 #end if
76 #if $guide.special_outputs.clustering: 87 --legend '$legend'
77 -c 88 > /dev/null
78 #if $guide.special_outputs.key: 89 &&
79 -k '$guide.special_outputs.key' 90 sed -i.bak -e "s/,/\${TAB}/g" -e "s/\${CR}//g" '$legend'
80 #end if 91 #else
81 --legend '$legend' 92 > /dev/null
82 > /dev/null 93 #end if
83 && 94
84 sed -i.bak 's/,/\t/g' '$legend' 95 ## Replace commas with tabs
85 && 96 &&
86 sed -i.bak 's/\r//g' '$legend' 97 sed -i.bak -e "s/,/\${TAB}/g" -e "s/\${CR}//g" gene_counts.csv transcript_counts.csv
87 #end if 98 #if $guide.special_outputs.keep_header:
88 99 &&
89 > /dev/null 100 head -n 1 gene_counts.csv | sed -e 's/sample1/$escaped_element_identifier/' > '$gene_counts'
90 101 &&
91 && 102 head -n 1 transcript_counts.csv | sed -e 's/sample1/$escaped_element_identifier/' > '$transcript_counts'
92 sed -i.bak 's/,/\t/g' '$transcript_counts' 103 #end if
93 && 104 ## Sort count files on the first column
94 sed -i.bak 's/\r//g' '$transcript_counts' 105 &&
95 && 106 tail -n +2 gene_counts.csv | sort -t"\${TAB}" -k1 >> '$gene_counts'
96 sed -i.bak 's/,/\t/g' '$gene_counts' 107 &&
97 && 108 tail -n +2 transcript_counts.csv | sort -t"\${TAB}" -k1 >> '$transcript_counts'
98 sed -i.bak 's/\r//g' '$gene_counts'
99 #end if 109 #end if
100 #end if 110 #end if
101 ]]></command> 111 ]]></command>
102 <inputs> 112 <inputs>
103 <param name="input_bam" type="data" format="sam,bam" label="Input mapped reads" help="Input BAM/SAM file containing reads you want to assemble into transcripts"/> 113 <param name="input_bam" type="data" format="sam,bam" label="Input mapped reads" help="Input BAM/SAM file containing reads you want to assemble into transcripts"/>
139 <option value="no" selected="True">No additional output</option> 149 <option value="no" selected="True">No additional output</option>
140 </param> 150 </param>
141 <when value="ballgown" /> 151 <when value="ballgown" />
142 <when value="deseq2"> 152 <when value="deseq2">
143 <param name="read_length" argument="--length" type="integer" min="0" value="75" label="Specify the average read length" help="Default: 75" /> 153 <param name="read_length" argument="--length" type="integer" min="0" value="75" label="Specify the average read length" help="Default: 75" />
144 <param name="clustering" argument="--cluster" type="boolean" truevalue="--cluster" falsevalue="" checked="False" label="Cluster overlapping genes" help="Choose whether to cluster genes with different gene IDs that overlap. Transcripts containing the geneID prefix will be ignored. Default: No" /> 154 <param name="clustering" argument="--cluster" type="boolean" truevalue="--cluster" falsevalue="" checked="false" label="Cluster overlapping genes" help="Choose whether to cluster genes with different gene IDs that overlap. Transcripts containing the geneID prefix will be ignored. Default: No" />
145 <param argument="--string" type="text" label="Prefix used for transcripts" help="If a different prefix was used for geneIDs assigned by StringTie than the default, specify it here. Only letters and numbers will be retained in this field. Default: MSTRG" > 155 <param argument="--string" type="text" label="Prefix used for transcripts" help="If a different prefix was used for geneIDs assigned by StringTie than the default, specify it here. Only letters and numbers will be retained in this field. Default: MSTRG" >
146 <sanitizer> 156 <sanitizer>
147 <valid initial="string.letters,string.digits"></valid> 157 <valid initial="string.letters,string.digits"></valid>
148 </sanitizer> 158 </sanitizer>
149 </param> 159 </param>
150 <param argument="--key" type="text" label="Prefix for clustering" help="If clustering, what prefix to use for geneIDs assigned by this script. Only letters and numbers will be retained in this field. Default: prepG"> 160 <param argument="--key" type="text" label="Prefix for clustering" help="If clustering, what prefix to use for geneIDs assigned by this script. Only letters and numbers will be retained in this field. Default: prepG">
151 <sanitizer> 161 <sanitizer>
152 <valid initial="string.letters,string.digits"></valid> 162 <valid initial="string.letters,string.digits"></valid>
153 </sanitizer> 163 </sanitizer>
154 </param> 164 </param>
165 <param name="keep_header" type="boolean" checked="true" label="Output header line?" help="Keep the header line for edgeR, remove it for DESeq2" />
155 </when> 166 </when>
156 <when value="no" /> 167 <when value="no" />
157 </conditional> 168 </conditional>
158 <param name="coverage_file" argument="-C" type="boolean" truevalue="-C" falsevalue="" checked="False" label="Output coverage file?" help="If StringTie is run with this option (requires -G), it returns a file with all the transcripts in the reference annotation that are fully covered, end to end, by reads. The output format is a GTF file as described below. Each line of the GTF is corresponds to a gene or transcript in the reference annotation. Default: No"/> 169 <param name="coverage_file" argument="-C" type="boolean" truevalue="-C" falsevalue="" checked="False" label="Output coverage file?" help="If StringTie is run with this option (requires -G), it returns a file with all the transcripts in the reference annotation that are fully covered, end to end, by reads. The output format is a GTF file as described below. Each line of the GTF is corresponds to a gene or transcript in the reference annotation. Default: No"/>
159 </when> 170 </when>
160 </conditional> 171 </conditional>
161 <section name="adv" title="Advanced Options"> 172 <section name="adv" title="Advanced Options">
162 <param name="abundance_estimation" argument="-A" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Output gene abundance estimation file?" help="If selected, gene abundances will be reported in a tab-delimited file, see below for more information. Default: No"/> 173 <param name="abundance_estimation" argument="-A" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Output gene abundance estimation file?" help="If selected, gene abundances will be reported in a tab-delimited file, see below for more information. Default: No"/>
163 <param name="omit_sequences" argument="-x" type="text" value="" label="Do not assemble any transcripts on these reference sequence(s)" help="Ignore all read alignments (and thus do not attempt to perform transcript assembly) on the specified reference sequences. This parameter can be a single reference sequence name (e.g. chrM) or a comma-delimited list of sequence names (e.g. chrM,chrX,chrY). This can speed up StringTie especially in the case of excluding the mitochondrial genome, whose genes may have very high coverage in some cases, even though they may be of no interest for a particular RNA-Seq analysis. The reference sequence names are case sensitive, they must match identically the names of chromosomes/contigs of the target genome against which the RNA-Seq reads were aligned in the first place." /> 174 <param name="omit_sequences" argument="-x" type="text" value="" label="Do not assemble any transcripts on these reference sequence(s)" help="Ignore all read alignments (and thus do not attempt to perform transcript assembly) on the specified reference sequences. This parameter can be a single reference sequence name (e.g. chrM) or a comma-delimited list of sequence names (e.g. chrM,chrX,chrY). This can speed up StringTie especially in the case of excluding the mitochondrial genome, whose genes may have very high coverage in some cases, even though they may be of no interest for a particular RNA-Seq analysis. The reference sequence names are case sensitive, they must match identically the names of chromosomes/contigs of the target genome against which the RNA-Seq reads were aligned in the first place." />
258 <output name="exon_transcript_mapping" file="./ballgown/e2t.ctab" ftype="tabular" /> 269 <output name="exon_transcript_mapping" file="./ballgown/e2t.ctab" ftype="tabular" />
259 <output name="intron_transcript_mapping" file="./ballgown/i2t.ctab" ftype="tabular" /> 270 <output name="intron_transcript_mapping" file="./ballgown/i2t.ctab" ftype="tabular" />
260 <output name="output_gtf" file="stringtie_out5.gtf" ftype="gtf" lines_diff="2" /> 271 <output name="output_gtf" file="stringtie_out5.gtf" ftype="gtf" lines_diff="2" />
261 <output name="coverage" file="stringtie_out_coverage.gtf" ftype="gtf" /> 272 <output name="coverage" file="stringtie_out_coverage.gtf" ftype="gtf" />
262 </test> 273 </test>
263 <!--Ensure output for DESeq2/edgeR works --> 274 <!--Ensure output for edgeR works -->
264 <test expect_num_outputs="5"> 275 <test expect_num_outputs="5">
265 <param name="input_bam" ftype="bam" value="stringtie_in1.bam" /> 276 <param name="input_bam" ftype="bam" value="stringtie_in1.bam" />
266 <param name="use_guide" value="yes" /> 277 <param name="use_guide" value="yes" />
267 <param name="special_outputs_select" value="deseq2" /> 278 <param name="special_outputs_select" value="deseq2" />
268 <param name="input_estimation" value="True" /> 279 <param name="input_estimation" value="True" />
269 <param name="guide_gff_select" value="history" /> 280 <param name="guide_gff_select" value="history" />
270 <param name="ref_hist" ftype="gtf" value="stringtie_in.gtf" /> 281 <param name="ref_hist" ftype="gtf" value="stringtie_in.gtf" />
271 <param name="coverage_file" value="True" /> 282 <param name="coverage_file" value="True" />
272 <param name="clustering" value="True" /> 283 <param name="clustering" value="True" />
273 <output name="gene_counts" file="./deseq2/gene_counts.tsv" ftype="tabular" /> 284 <output name="gene_counts" file="gene_counts_edger.tsv" ftype="tabular" />
274 <output name="transcript_counts" file="./deseq2/transcript_counts.tsv" ftype="tabular" /> 285 <output name="transcript_counts" file="transcript_counts_edger.tsv" ftype="tabular" />
275 <output name="legend" file="./deseq2/legend.tsv" ftype="tabular" /> 286 <output name="legend" file="legend.tsv" ftype="tabular" />
287 <output name="output_gtf" file="stringtie_out6.gtf" ftype="gtf" lines_diff="2" />
288 <output name="coverage" file="stringtie_out_coverage.gtf" ftype="gtf" />
289 </test>
290 <!--Ensure output for DESeq2 works -->
291 <test expect_num_outputs="5">
292 <param name="input_bam" ftype="bam" value="stringtie_in1.bam" />
293 <param name="use_guide" value="yes" />
294 <param name="special_outputs_select" value="deseq2" />
295 <param name="keep_header" value="False" />
296 <param name="input_estimation" value="True" />
297 <param name="guide_gff_select" value="history" />
298 <param name="ref_hist" ftype="gtf" value="stringtie_in.gtf" />
299 <param name="coverage_file" value="True" />
300 <param name="clustering" value="True" />
301 <output name="gene_counts" file="gene_counts_deseq2.tsv" ftype="tabular" />
302 <output name="transcript_counts" file="transcript_counts_deseq2.tsv" ftype="tabular" />
303 <output name="legend" file="legend.tsv" ftype="tabular" />
276 <output name="output_gtf" file="stringtie_out6.gtf" ftype="gtf" lines_diff="2" /> 304 <output name="output_gtf" file="stringtie_out6.gtf" ftype="gtf" lines_diff="2" />
277 <output name="coverage" file="stringtie_out_coverage.gtf" ftype="gtf" /> 305 <output name="coverage" file="stringtie_out_coverage.gtf" ftype="gtf" />
278 </test> 306 </test>
279 <!--Ensure gene abundances output works --> 307 <!--Ensure gene abundances output works -->
280 <test expect_num_outputs="2"> 308 <test expect_num_outputs="2">