stringtie: stringtie.xml comparison

comparison stringtie.xml @ 13:a305d75e13f2 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stringtie commit e811a7887db870f4f94f620f52bce656c8d5ba23

author	iuc
date	Thu, 12 Apr 2018 17:30:07 -0400
parents	76d290331481
children	eafd5dc95228

comparison

equal deleted inserted replaced

-:76d290331481
+:a305d75e13f2
-<tool id="stringtie" name="StringTie" version="1.3.3.1">
+<tool id="stringtie" name="StringTie" version="1.3.3.2">
 <description>transcript assembly and quantification</description>
 <macros>
 <import>macros.xml</import>
 </macros>
 <expand macro="requirements" />
 <expand macro="stdio" />
 <expand macro="version_command" />
 <command><![CDATA[
+#import re
 mkdir -p ./special_de_output/sample1/ &&
 ## Get Guide GTF/GFF if selected
 #if str($guide.use_guide) == 'yes':
 -x '$adv.omit_sequences'
 #end if
 #if str($guide.use_guide) == 'yes':
 #if $guide.special_outputs.special_outputs_select == 'deseq2':
+#set escaped_element_identifier = re.sub('[^\w\-]', '_', str($input_bam.element_identifier))
 &&
 ln -s '$output_gtf' ./special_de_output/sample1/output.gtf
 &&
+TAB=\$(printf '\t')
+&&
+CR=\$(printf '\r')
+&&
 prepDE.py
 -i ./special_de_output/
--g '$gene_counts'
+-g gene_counts.csv
--t '$transcript_counts'
+-t transcript_counts.csv
 -l $guide.special_outputs.read_length
 #if $guide.special_outputs.string:
 -s '$guide.special_outputs.string'
+#end if
+#if $guide.special_outputs.clustering:
+-c
+#if $guide.special_outputs.key:
+-k '$guide.special_outputs.key'
 #end if
-#if $guide.special_outputs.clustering:
+--legend '$legend'
--c
+> /dev/null
-#if $guide.special_outputs.key:
+&&
--k '$guide.special_outputs.key'
+sed -i.bak -e "s/,/\${TAB}/g" -e "s/\${CR}//g" '$legend'
-#end if
+#else
---legend '$legend'
+> /dev/null
-> /dev/null
+#end if
-&&
-sed -i.bak 's/,/\t/g' '$legend'
+## Replace commas with tabs
 &&
-sed -i.bak 's/\r//g' '$legend'
+sed -i.bak -e "s/,/\${TAB}/g" -e "s/\${CR}//g" gene_counts.csv transcript_counts.csv
-#end if
+#if $guide.special_outputs.keep_header:
+&&
-> /dev/null
+head -n 1 gene_counts.csv | sed -e 's/sample1/$escaped_element_identifier/' > '$gene_counts'
+&&
-&&
+head -n 1 transcript_counts.csv | sed -e 's/sample1/$escaped_element_identifier/' > '$transcript_counts'
-sed -i.bak 's/,/\t/g' '$transcript_counts'
+#end if
-&&
+## Sort count files on the first column
-sed -i.bak 's/\r//g' '$transcript_counts'
+&&
-&&
+tail -n +2 gene_counts.csv | sort -t"\${TAB}" -k1 >> '$gene_counts'
-sed -i.bak 's/,/\t/g' '$gene_counts'
+&&
-&&
+tail -n +2 transcript_counts.csv | sort -t"\${TAB}" -k1 >> '$transcript_counts'
-sed -i.bak 's/\r//g' '$gene_counts'
 #end if
 #end if
 ]]></command>
 <inputs>
 <param name="input_bam" type="data" format="sam,bam" label="Input mapped reads" help="Input BAM/SAM file containing reads you want to assemble into transcripts"/>
 <option value="no" selected="True">No additional output</option>
 </param>
 <when value="ballgown" />
 <when value="deseq2">
 <param name="read_length" argument="--length" type="integer" min="0" value="75" label="Specify the average read length" help="Default: 75" />
-<param name="clustering" argument="--cluster" type="boolean" truevalue="--cluster" falsevalue="" checked="False" label="Cluster overlapping genes" help="Choose whether to cluster genes with different gene IDs that overlap. Transcripts containing the geneID prefix will be ignored. Default: No" />
+<param name="clustering" argument="--cluster" type="boolean" truevalue="--cluster" falsevalue="" checked="false" label="Cluster overlapping genes" help="Choose whether to cluster genes with different gene IDs that overlap. Transcripts containing the geneID prefix will be ignored. Default: No" />
 <param argument="--string" type="text" label="Prefix used for transcripts" help="If a different prefix was used for geneIDs assigned by StringTie than the default, specify it here. Only letters and numbers will be retained in this field. Default: MSTRG" >
 <sanitizer>
 <valid initial="string.letters,string.digits"></valid>
 </sanitizer>
 </param>
 <param argument="--key" type="text" label="Prefix for clustering" help="If clustering, what prefix to use for geneIDs assigned by this script. Only letters and numbers will be retained in this field. Default: prepG">
 <sanitizer>
 <valid initial="string.letters,string.digits"></valid>
 </sanitizer>
 </param>
+<param name="keep_header" type="boolean" checked="true" label="Output header line?" help="Keep the header line for edgeR, remove it for DESeq2" />
 </when>
 <when value="no" />
 </conditional>
 <param name="coverage_file" argument="-C" type="boolean" truevalue="-C" falsevalue="" checked="False" label="Output coverage file?" help="If StringTie is run with this option (requires -G), it returns a file with all the transcripts in the reference annotation that are fully covered, end to end, by reads. The output format is a GTF file as described below. Each line of the GTF is corresponds to a gene or transcript in the reference annotation. Default: No"/>
 </when>
 </conditional>
 <section name="adv" title="Advanced Options">
 <param name="abundance_estimation" argument="-A" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Output gene abundance estimation file?" help="If selected, gene abundances will be reported in a tab-delimited file, see below for more information. Default: No"/>
 <param name="omit_sequences" argument="-x" type="text" value="" label="Do not assemble any transcripts on these reference sequence(s)" help="Ignore all read alignments (and thus do not attempt to perform transcript assembly) on the specified reference sequences. This parameter can be a single reference sequence name (e.g. chrM) or a comma-delimited list of sequence names (e.g. chrM,chrX,chrY). This can speed up StringTie especially in the case of excluding the mitochondrial genome, whose genes may have very high coverage in some cases, even though they may be of no interest for a particular RNA-Seq analysis. The reference sequence names are case sensitive, they must match identically the names of chromosomes/contigs of the target genome against which the RNA-Seq reads were aligned in the first place." />
 <output name="exon_transcript_mapping" file="./ballgown/e2t.ctab" ftype="tabular" />
 <output name="intron_transcript_mapping" file="./ballgown/i2t.ctab" ftype="tabular" />
 <output name="output_gtf" file="stringtie_out5.gtf" ftype="gtf" lines_diff="2" />
 <output name="coverage" file="stringtie_out_coverage.gtf" ftype="gtf" />
 </test>
-<!--Ensure output for DESeq2/edgeR works -->
+<!--Ensure output for edgeR works -->
 <test expect_num_outputs="5">
 <param name="input_bam" ftype="bam" value="stringtie_in1.bam" />
 <param name="use_guide" value="yes" />
 <param name="special_outputs_select" value="deseq2" />
 <param name="input_estimation" value="True" />
 <param name="guide_gff_select" value="history" />
 <param name="ref_hist" ftype="gtf" value="stringtie_in.gtf" />
 <param name="coverage_file" value="True" />
 <param name="clustering" value="True" />
-<output name="gene_counts" file="./deseq2/gene_counts.tsv" ftype="tabular" />
+<output name="gene_counts" file="gene_counts_edger.tsv" ftype="tabular" />
-<output name="transcript_counts" file="./deseq2/transcript_counts.tsv" ftype="tabular" />
+<output name="transcript_counts" file="transcript_counts_edger.tsv" ftype="tabular" />
-<output name="legend" file="./deseq2/legend.tsv" ftype="tabular" />
+<output name="legend" file="legend.tsv" ftype="tabular" />
+<output name="output_gtf" file="stringtie_out6.gtf" ftype="gtf" lines_diff="2" />
+<output name="coverage" file="stringtie_out_coverage.gtf" ftype="gtf" />
+</test>
+<!--Ensure output for DESeq2 works -->
+<test expect_num_outputs="5">
+<param name="input_bam" ftype="bam" value="stringtie_in1.bam" />
+<param name="use_guide" value="yes" />
+<param name="special_outputs_select" value="deseq2" />
+<param name="keep_header" value="False" />
+<param name="input_estimation" value="True" />
+<param name="guide_gff_select" value="history" />
+<param name="ref_hist" ftype="gtf" value="stringtie_in.gtf" />
+<param name="coverage_file" value="True" />
+<param name="clustering" value="True" />
+<output name="gene_counts" file="gene_counts_deseq2.tsv" ftype="tabular" />
+<output name="transcript_counts" file="transcript_counts_deseq2.tsv" ftype="tabular" />
+<output name="legend" file="legend.tsv" ftype="tabular" />
 <output name="output_gtf" file="stringtie_out6.gtf" ftype="gtf" lines_diff="2" />
 <output name="coverage" file="stringtie_out_coverage.gtf" ftype="gtf" />
 </test>
 <!--Ensure gene abundances output works -->
 <test expect_num_outputs="2">

Mercurial > repos > iuc > stringtie

comparison stringtie.xml @ 13:a305d75e13f2 draft