Mercurial > repos > iuc > stringtie
diff stringtie.xml @ 11:6e45b443ef1f draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stringtie commit d46e597732cda927a633e133c14dd4ece39f5edf
author | iuc |
---|---|
date | Thu, 01 Jun 2017 12:16:04 -0400 |
parents | c84d44519b2e |
children | 76d290331481 |
line wrap: on
line diff
--- a/stringtie.xml Tue Apr 04 12:58:27 2017 -0400 +++ b/stringtie.xml Thu Jun 01 12:16:04 2017 -0400 @@ -6,8 +6,7 @@ <expand macro="requirements" /> <expand macro="stdio" /> <expand macro="version_command" /> - <command> - <![CDATA[ + <command><![CDATA[ mkdir -p ./special_de_output/sample1/ && #if str($guide.use_guide) == 'yes': ln -s '$guide.guide_gff' ./special_de_output/sample1/guide.gtf && @@ -42,7 +41,7 @@ -A "$gene_abundance_estimation" #end if #if str($option_set.omit_sequences).strip() != "": - -x "$option_set.omit_sequences" + -x '$option_set.omit_sequences' #end if #end if @@ -71,122 +70,117 @@ sed -i.bak 's/,/\t/g' gene_cout_matrix.tsv #end if #end if - - ]]> - </command> + ]]></command> <inputs> - <param format="sam,bam" label="Mapped reads to assemble transcripts from" name="input_bam" type="data" /> + <param name="input_bam" type="data" format="sam,bam" label="Mapped reads to assemble transcripts from" /> <conditional name="guide"> - <param label="Use GFF file to guide assembly" name="use_guide" type="select"> + <param name="use_guide" type="select" label="Use GFF file to guide assembly"> <option value="yes">Use GFF/GTF</option> <option selected="True" value="no">Do not use GFF/GTF</option> </param> <when value="no" /> <when value="yes"> - <param argument="-G" format="gtf,gff3" name="guide_gff" type="data" - help="" label="Reference annotation to use for guiding the assembly process" /> - <param argument="-e" name="input_estimation" truevalue="-e" type="boolean" falsevalue="" - help="" label="Perform abundance estimation only of input transcripts" /> + <param name="guide_gff" argument="-G" type="data" format="gtf,gff3" + label="Reference annotation to use for guiding the assembly process" /> + <param name="input_estimation" argument="-e" type="boolean" truevalue="-e" falsevalue="" + label="Perform abundance estimation only of input transcripts" /> <conditional name="special_outputs"> - <param label="Output additional files for use in..." name="special_outputs_select" type="select"> + <param name="special_outputs_select" type="select" label="Output additional files for use in..."> <option value="ballgown">Ballgown</option> <option selected="True" value="deseq2">DESeq2/EdgeR</option> <option value="no">No addional output</option> </param> <when value="ballgown" /> <when value="deseq2"> - <param label="Average read length" name="read_length" type="integer" value="75" help="" /> - <param label="Whether to cluster genes that overlap with different gene IDs" - name="clustering" - truevalue="--cluster" - type="boolean" help="ignoring ones with geneID pattern" falsevalue="" /> + <param name="read_length" type="integer" value="75" label="Average read length" /> + <param name="clustering" type="boolean" truevalue="--cluster" falsevalue="" label="Whether to cluster genes that overlap with different gene IDs" help="ignoring ones with geneID pattern" /> </when> + <when value="no" /> </conditional> </when> </conditional> <conditional name="option_set"> - <param help="" label="Options" name="options" type="select"> + <param name="options" type="select" label="Options"> <option selected="True" value="default">Use defaults</option> <option value="advanced">Specify advanced options</option> </param> <when value="default" /> <when value="advanced"> - <param argument="-t" falsevalue="" name="disable_trimming" truevalue="-t" type="boolean" + <param name="disable_trimming" argument="-t" type="boolean" truevalue="-t" falsevalue="" label="Disable trimming of predicted transcripts based on coverage" /> - <param argument="-S" falsevalue="" - label="Increase sensitivity" name="sensitive" truevalue="-S" type="boolean" /> - <param argument="-l" label="Name prefix for output transcripts" name="name_prefix" type="text" value="STRG" /> - <param argument="-f" label="Minimum isoform fraction" max="1.0" min="0.0" name="fraction" type="float" value="0.15" /> - <param argument="-m" label="Minimum assembled transcript length" name="min_tlen" type="integer" value="200" /> - <param argument="-a" label="Minimum anchor length for junctions" name="min_anchor_len" type="integer" value="10" /> - <param argument="-j" label="Minimum junction coverage" name="min_anchor_cov" type="integer" value="1" /> - <param argument="-c" label="Minimum bundle reads per bp coverage to consider for assembly" name="min_bundle_cov" type="integer" value="2" /> - <param argument="-g" label="Gap between read mappings triggering a new bundle" name="bdist" type="integer" value="50" /> - <param argument="-M" label="Fraction of bundle allowed to be covered by multi-hit reads" name="bundle_fraction" type="float" value="0.95" /> - <param argument="-x" name="omit_sequences" type="text" value="" - help="e.g. chrM,chrX" label="Do not assemble any transcripts on these reference sequence(s)" /> - <param argument="-A" falsevalue="" name="abundance_estimation" truevalue="-A" type="boolean" + <param name="sensitive" argument="-S" type="boolean" truevalue="-S" falsevalue="" + label="Increase sensitivity" /> + <param name="name_prefix" argument="-l" type="text" value="STRG" label="Name prefix for output transcripts" /> + <param name="fraction" argument="-f" type="float" value="0.15" min="0.0" max="1.0" label="Minimum isoform fraction" /> + <param name="min_tlen" argument="-m" type="integer" value="200" label="Minimum assembled transcript length" /> + <param name="min_anchor_len" argument="-a" type="integer" value="10" label="Minimum anchor length for junctions" /> + <param name="min_anchor_cov" argument="-j" type="integer" value="1" label="Minimum junction coverage" /> + <param name="min_bundle_cov" argument="-c" type="integer" value="2" label="Minimum bundle reads per bp coverage to consider for assembly" /> + <param name="bdist" argument="-g" type="integer" value="50" label="Gap between read mappings triggering a new bundle" /> + <param name="bundle_fraction" argument="-M" type="float" value="0.95" label="Fraction of bundle allowed to be covered by multi-hit reads" /> + <param name="omit_sequences" argument="-x" type="text" value="" + label="Do not assemble any transcripts on these reference sequence(s)" help="e.g. chrM,chrX" /> + <param name="abundance_estimation" argument="-A" type="boolean" truevalue="-A" falsevalue="" label="Additional gene abundance estimation output file" /> - <param argument="-u" falsevalue="" truevalue="-u" type="boolean" - label="Disable multi-mapping correction" name="multi_mapping" /> + <param name="multi_mapping" argument="-u" type="boolean" truevalue="-u" falsevalue="" + label="Disable multi-mapping correction" /> </when> </conditional> </inputs> <outputs> - <data format="gtf" label="${tool.name} on ${on_string}: Assembled transcripts" name="output_gtf" /> - <data format="gtf" label="${tool.name} on ${on_string}: Gene abundance estimates" name="gene_abundance_estimation"> + <data name="output_gtf" format="gtf" label="${tool.name} on ${on_string}: Assembled transcripts" /> + <data name="gene_abundance_estimation" format="gtf" label="${tool.name} on ${on_string}: Gene abundance estimates"> <filter>option_set['options'] == 'advanced' and option_set['abundance_estimation']</filter> </data> - <data format="gff3" label="${tool.name} on ${on_string}: Coverage" name="coverage"> + <data name="coverage" format="gff3" label="${tool.name} on ${on_string}: Coverage"> <filter>guide['use_guide'] == 'yes'</filter> </data> - <data format="tabular" from_work_dir="special_de_output/sample1/e_data.ctab" - label="${tool.name} on ${on_string}: exon-level expression measurements" name="exon_expression"> + <data name="exon_expression" format="tabular" from_work_dir="special_de_output/sample1/e_data.ctab" + label="${tool.name} on ${on_string}: exon-level expression measurements"> <filter>guide['use_guide'] == 'yes' and guide['special_outputs']['special_outputs_select'] == 'ballgown'</filter> </data> - <data format="tabular" from_work_dir="special_de_output/sample1/i_data.ctab" - label="${tool.name} on ${on_string}: intron-level expression measurements" name="intron_expression"> + <data name="intron_expression" format="tabular" from_work_dir="special_de_output/sample1/i_data.ctab" + label="${tool.name} on ${on_string}: intron-level expression measurements"> <filter>guide['use_guide'] == 'yes' and guide['special_outputs']['special_outputs_select'] == 'ballgown'</filter> </data> - <data format="tabular" from_work_dir="special_de_output/sample1/t_data.ctab" - label="${tool.name} on ${on_string}: transcript-level expression measurements" name="transcript_expression"> + <data name="transcript_expression" format="tabular" from_work_dir="special_de_output/sample1/t_data.ctab" + label="${tool.name} on ${on_string}: transcript-level expression measurements"> <filter>guide['use_guide'] == 'yes' and guide['special_outputs']['special_outputs_select'] == 'ballgown'</filter> </data> - <data format="tabular" from_work_dir="special_de_output/sample1/e2t.ctab" - label="${tool.name} on ${on_string}: exon to transcript mapping" name="exon_transcript_mapping"> + <data name="exon_transcript_mapping" format="tabular" from_work_dir="special_de_output/sample1/e2t.ctab" + label="${tool.name} on ${on_string}: exon to transcript mapping"> <filter>guide['use_guide'] == 'yes' and guide['special_outputs']['special_outputs_select'] == 'ballgown'</filter> </data> - <data format="tabular" from_work_dir="special_de_output/sample1/i2t.ctab" - label="${tool.name} on ${on_string}: intron to transcript mapping" name="intron_transcript_mapping"> + <data name="intron_transcript_mapping" format="tabular" from_work_dir="special_de_output/sample1/i2t.ctab" + label="${tool.name} on ${on_string}: intron to transcript mapping"> <filter>guide['use_guide'] == 'yes' and guide['special_outputs']['special_outputs_select'] == 'ballgown'</filter> </data> - - <data format="tabular" from_work_dir="gene_cout_matrix.tsv" - label="${tool.name} on ${on_string}: Gene counts" name="gene_counts"> + <data name="gene_counts" format="tabular" from_work_dir="gene_cout_matrix.tsv" + label="${tool.name} on ${on_string}: Gene counts"> <filter>guide['use_guide'] == 'yes' and guide['special_outputs']['special_outputs_select'] == 'deseq2'</filter> </data> - <data format="tabular" from_work_dir="transcripts_count_matrix.tsv" - label="${tool.name} on ${on_string}: Transcript counts" name="transcript_counts"> + <data name="transcript_counts" format="tabular" from_work_dir="transcripts_count_matrix.tsv" + label="${tool.name} on ${on_string}: Transcript counts"> <filter>guide['use_guide'] == 'yes' and guide['special_outputs']['special_outputs_select'] == 'deseq2'</filter> </data> - <data format="tabular" from_work_dir="legend.tsv" - label="${tool.name} on ${on_string}: legend" name="legend"> + <data name="legend" format="tabular" from_work_dir="legend.tsv" + label="${tool.name} on ${on_string}: legend"> <filter>guide['use_guide'] == 'yes' and guide['special_outputs']['special_outputs_select'] == 'deseq2' and guide['special_outputs']['clustering'] is True</filter> </data> </outputs> <tests> <test> - <param ftype="bam" name="input_bam" value="stringtie_in1.bam" /> + <param name="input_bam" ftype="bam" value="stringtie_in1.bam" /> <param name="use_guide" value="no" /> <param name="options" value="default" /> - <output file="stringtie_out1.gtf" ftype="gtf" lines_diff="2" name="output_gtf" /> + <output name="output_gtf" file="stringtie_out1.gtf" ftype="gtf" lines_diff="2" /> </test> <test> - <param ftype="bam" name="input_bam" value="stringtie_in1.bam" /> + <param name="input_bam" ftype="bam" value="stringtie_in1.bam" /> <param name="use_guide" value="no" /> <param name="options" value="advanced" /> <param name="fraction" value="0.17" /> - <output file="stringtie_out2.gtf" ftype="gtf" lines_diff="2" name="output_gtf" /> + <output name="output_gtf" file="stringtie_out2.gtf" ftype="gtf" lines_diff="2" /> </test> <test> <param ftype="bam" name="input_bam" value="stringtie_in1.bam" /> @@ -197,101 +191,69 @@ <output file="stringtie_out3.gtf" ftype="gtf" lines_diff="2" name="output_gtf" /> </test> <test> - <param ftype="bam" name="input_bam" value="stringtie_in1.bam" /> + <param name="input_bam" ftype="bam" value="stringtie_in1.bam" /> <param name="use_guide" value="yes" /> <param name="special_outputs_select" value="no" /> <param name="guide_gff" value="stringtie_in.gtf" /> <param name="options" value="advanced" /> <param name="fraction" value="0.17" /> - <output file="stringtie_out4.gtf" ftype="gtf" lines_diff="2" name="output_gtf" /> + <output name="output_gtf" file="stringtie_out4.gtf" ftype="gtf" lines_diff="2" /> </test> <test> - <param ftype="bam" name="input_bam" value="stringtie_in1.bam" /> + <param name="input_bam" ftype="bam" value="stringtie_in1.bam" /> <param name="use_guide" value="yes" /> <param name="special_outputs_select" value="ballgown" /> <param name="guide_gff" value="stringtie_in.gtf" /> <param name="options" value="default" /> - <output file="./ballgown/e_data.ctab" ftype="tabular" name="exon_expression" /> - <output file="./ballgown/i_data.ctab" ftype="tabular" name="intron_expression" /> - <output file="./ballgown/t_data.ctab" ftype="tabular" name="transcript_expression" /> - <output file="./ballgown/e2t.ctab" ftype="tabular" name="exon_transcript_mapping" /> - <output file="./ballgown/i2t.ctab" ftype="tabular" name="intron_transcript_mapping" /> - <output file="stringtie_out5.gtf" ftype="gtf" lines_diff="2" name="output_gtf" /> - <output file="stringtie_out_coverage.gtf" ftype="gff3" name="coverage" /> + <output name="exon_expression" file="./ballgown/e_data.ctab" ftype="tabular" /> + <output name="intron_expression" file="./ballgown/i_data.ctab" ftype="tabular" /> + <output name="transcript_expression" file="./ballgown/t_data.ctab" ftype="tabular" /> + <output name="exon_transcript_mapping" file="./ballgown/e2t.ctab" ftype="tabular" /> + <output name="intron_transcript_mapping" file="./ballgown/i2t.ctab" ftype="tabular" /> + <output name="output_gtf" file="stringtie_out5.gtf" ftype="gtf" lines_diff="2" /> + <output name="coverage" file="stringtie_out_coverage.gtf" ftype="gff3" /> </test> <test> - <param ftype="bam" name="input_bam" value="stringtie_in1.bam" /> + <param name="input_bam" ftype="bam" value="stringtie_in1.bam" /> <param name="use_guide" value="yes" /> <param name="special_outputs_select" value="deseq2" /> <param name="input_estimation" value="True" /> <param name="guide_gff" value="stringtie_in.gtf" /> <param name="options" value="default" /> <param name="clustering" value="True" /> - <output file="./deseq2/gene_counts.tsv" ftype="tabular" lines_diff="2" name="gene_counts" /> - <output file="./deseq2/transcript_counts.tsv" ftype="tabular" name="transcript_counts" /> - <output file="./deseq2/legend.tsv" ftype="tabular" name="legend" /> - <output file="stringtie_out6.gtf" ftype="gtf" lines_diff="2" name="output_gtf" /> - <output file="stringtie_out_coverage.gtf" ftype="gff3" name="coverage" /> + <output name="gene_counts" file="./deseq2/gene_counts.tsv" ftype="tabular" lines_diff="2" /> + <output name="transcript_counts" file="./deseq2/transcript_counts.tsv" ftype="tabular" /> + <output name="legend" file="./deseq2/legend.tsv" ftype="tabular" /> + <output name="output_gtf" file="stringtie_out6.gtf" ftype="gtf" lines_diff="2" /> + <output name="coverage" file="stringtie_out_coverage.gtf" ftype="gff3" /> </test> <test> - <param ftype="bam" name="input_bam" value="stringtie_in1.bam" /> + <param name="input_bam" ftype="bam" value="stringtie_in1.bam" /> <param name="use_guide" value="yes" /> <param name="guide_gff" value="stringtie_in.gtf" /> <param name="options" value="advanced" /> <param name="fraction" value="0.17" /> <param name="abundance_estimation" value="True" /> - <output file="stringtie_out4.gtf" ftype="gtf" lines_diff="2" name="output_gtf" /> - <output file="stringtie_out7.gtf" ftype="gtf" lines_diff="2" name="gene_abundance_estimation" /> + <output name="output_gtf" file="stringtie_out4.gtf" ftype="gtf" lines_diff="2" /> + <output name="gene_abundance_estimation" file="stringtie_out7.gtf" ftype="gtf" lines_diff="2" /> </test> <test> - <param ftype="bam" name="input_bam" value="stringtie_in1.bam" /> + <param name="input_bam" ftype="bam" value="stringtie_in1.bam" /> <param name="use_guide" value="yes" /> <param name="special_outputs_select" value="no" /> <param name="guide_gff" value="stringtie_in.gtf" /> <param name="options" value="advanced" /> <param name="fraction" value="0.15" /> <param name="c" value="test_chromosome" /> - <output file="stringtie_out8.gtf" ftype="gtf" lines_diff="2" name="output_gtf" /> + <output name="output_gtf" file="stringtie_out8.gtf" ftype="gtf" lines_diff="2" /> </test> </tests> - <help> -<![CDATA[ - + <help><![CDATA[ **What it does?** StringTie_ is a fast and highly efficient assembler of RNA-Seq alignments into potential transcripts. It uses a novel network flow algorithm as well as an optional *de novo* assembly step to assemble and quantitate full-length transcripts representing multiple splice variants for each gene locus. Its input can include not only the alignments of raw reads used by other transcript assemblers, but also alignments longer sequences that have been assembled from those reads.To identify differentially expressed genes between experiments, StringTie's output can be processed either by the Cuffdiff or Ballgown programs. .. _StringTie: http://ccb.jhu.edu/software/stringtie/ - ------- - -StringTie has the following options:: - - -G reference annotation to use for guiding the assembly process (GTF/GFF3) - -l name prefix for output transcripts (default: STRG) - -f minimum isoform fraction (default: 0.1) - -m minimum assembled transcript length (default: 200) - -o output path/file name for the assembled transcripts GTF (default: stdout) - -a minimum anchor length for junctions (default: 10) - -j minimum junction coverage (default: 1) - -t disable trimming of predicted transcripts based on coverage - (default: coverage trimming is enabled) - -c minimum reads per bp coverage to consider for transcript assembly (default: 2.5) - -v verbose (log bundle processing details) - -g gap between read mappings triggering a new bundle (default: 50) - -C output file with reference transcripts that are covered by reads - -M fraction of bundle allowed to be covered by multi-hit reads (default:0.95) - -p number of threads (CPUs) to use (default: 1) - -A gene abundance estimation output file - -B enable output of Ballgown table files which will be created in the - same directory as the output GTF (requires -G, -o recommended) - -b enable output of Ballgown table files but these files will be - created under the directory path given as <dir_path> - -e only estimates the abundance of given reference transcripts (requires -G) - -x do not assemble any transcripts on these reference sequence(s) - -u no multi-mapping correction default: false) - - ]]> - </help> + ]]></help> <expand macro="citations" /> </tool>