Mercurial > repos > iuc > breseq
diff breseq.xml @ 0:f848a7f97332 draft
"planemo upload commit fadaff2d55736bf8c580541d6089c83cd4106a1f"
author | iuc |
---|---|
date | Thu, 31 Oct 2019 19:40:40 -0400 |
parents | |
children | 85c57cc9b558 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/breseq.xml Thu Oct 31 19:40:40 2019 -0400 @@ -0,0 +1,354 @@ +<tool id="breseq" name="breseq" version="@PACKAGE_VERSION@+@GALAXY_VERSION@"> + + <description>find mutations in haploid microbial genomes</description> + + <macros> + <import>macros.xml</import> + </macros> + + <requirements> + <requirement type="package" version="0.34.0">breseq</requirement> + </requirements> + + <version_command>breseq --version</version_command> + + <command detect_errors="aggressive"> + <![CDATA[ + #set $ref_opts = "" + #for $i, $r in enumerate( $references ): + #if str($references[$i].reference.source) == "history": + #for $ref in $references[$i].reference.own_genome: + #if $ref + #set $ref_opts = $ref_opts + " --reference '" + str($ref) + "'" + #end if + #end for + #else: + #set $ref_opts = $ref_opts + " --reference '" + $references[$i].reference.fixed_genome.fields.path + "'" + #end if + #end for + + #if str($run.mode) == 'detect' + breseq + + --num-processors \${GALAXY_SLOTS:-4} + + -o results + + $ref_opts + + #for $s in $run.fastqs: + ${s} + #end for + + #if $run.name + --name '$run.name' + #end if + + $run.polymorphism_prediction + $run.predict_junctions + + #if 'gd' in str($run.output_options.formats).split(','): + && cp results/output/output.gd '$output' + #end if + + #if 'html' in str($run.output_options.formats).split(','): + && cp results/output/index.html '$report' + && mkdir $report.extra_files_path + && cp -R results/output/* $report.extra_files_path + #end if + + #if 'zip' in str($run.output_options.formats).split(','): + && tar -zcf '$zip_output' results + #end if + + #if 'log' in str($run.output_options.formats).split(','): + && cp results/output/log.txt '$log' + #end if + #else + #set $first = 1 + #for $o in str($run.output_options.formats).split(','): + + #if $first == 0 + && + #end if + #set $first = 0 + + gdtools ANNOTATE + + --format '$o' + + -o + #if $o == 'html': + '$annreport' + #else if $o == 'gd': + '$genomediff' + #else if $o == 'tsv': + '$tabdelim' + #else if $o == 'phylip': + '$phylipout' + #else if $o == 'json': + '$jsonout' + #end if + + $ref_opts + + #for $s in $run.gds: + ${s} + #end for + #end for + #end if + ]]> + </command> + + <inputs> + <repeat name="references" title="Reference Genome" min="1"> + <conditional name="reference"> + <param name="source" type="select" label="Reference source" > + <option value="builtin">built-in</option> + <option value="history" selected="true">history</option> + </param> + <when value="builtin"> + <param name="fixed_genome" argument="--reference" type="select" optional="false" label="Galaxy Built-in Reference(s)"> + <options from_data_table="genbank_files"> + <filter type="sort_by" column="3"/> + <validator type="no_options" message="No built-in genbank records have been configured"/> + </options> + </param> + </when> + <when value="history"> + <param name="own_genome" argument="--reference" type="data" format="fasta,genbank" multiple="true" optional="false" label="Fasta or Genbank Reference(s)" /> + </when> + </conditional> + </repeat> + + <conditional name="run"> + <param name="mode" type="select" label="Run Mode" help="Detect, annotate, or compare variants."> + <option value="detect" selected="true">Detect</option> + <option value="annotate">Annotate</option> + <option value="compare">Compare</option> + </param> + <when value="detect"> + + <param name="fastqs" type="data" format="fastq" multiple="true" label="Fastq Read Files" /> + + <param argument="--polymorphism-prediction" name="polymorphism_prediction" type="select" label="Detection Mode" help="**Polymorphism mode**: Detect variants with frequencies between 0% and 100% if a mixture model is well-supported by the read alignment evidence. Use to analyze a mixed population of genomes evolved from a common ancestor. **Consensus mode**: Detect variants present in 100% of the sample. Use when re-sequencing a clonal haploid genome. This mode is the default."> + <option value="" selected="true">Consensus</option> + <option value="--polymorphism-prediction">Polymorphism</option> + </param> + + <param name="name" argument="--name" type="text" value="" label="Analysis Name" help="Human-readable name of the analysis run for output (DEFAULT=none)." /> + + <param name="predict_junctions" type="boolean" truevalue="" falsevalue="--no-junction-prediction" checked="true" label="Predict Junctions" help="Predict new sequence junctions (default). --no-junction-prediction is supplied if 'No' is selected. Otherwise, there is no flag." /> + + <section name="output_options" title="Output Options" expanded="false"> + <param name="formats" type="select" multiple="true" optional="false" display="checkboxes" label="Output Formats"> + <option value="gd" selected="true">Variants (GenomeDiff)</option> + <option value="html">Variant Report (Webpage)</option> + <option value="zip">All Variant Results (Gzip)</option> + <option value="log">Log (Text)</option> + </param> + </section> + + </when> + <when value="annotate"> + + <param name="gds" type="data" format="tabular" multiple="true" optional="false" label="GenomeDiff (gd) Files" help="Files as produced by breseq" /> + + <expand macro="annotate_format_opts"> + <option value="gd" selected="true">Annotated Variants (GenomeDiff)</option> + </expand> + + </when> + <when value="compare"> + + <param name="gds" type="data" format="tabular" multiple="true" optional="false" label="GenomeDiff (gd) Files" help="Files as produced by breseq" min="2" /> + + <expand macro="annotate_format_opts"> + <option value="phylip" selected="true">Variant Comparison (Phylip)</option> + <option value="gd">Annotated Variants (GenomeDiff)</option> + </expand> + + </when> + </conditional> + + </inputs> + + <outputs> + <data format="html" name="report" label="${tool.name} on ${on_string}: Variants (Webpage)"> + <filter>run['mode'] == 'detect' and 'html' in run['output_options']['formats']</filter> + </data> + <data format="html" name="annreport" label="${tool.name} on ${on_string}: Annotated Variants Report (Webpage)"> + <filter>run['mode'] != 'detect' and 'html' in run['output_options']['formats']</filter> + </data> + + <data format="tabular" name="output" label="${tool.name} on ${on_string}: Variants (GenomeDiff)"> + <filter>run['mode'] == 'detect' and 'gd' in run['output_options']['formats']</filter> + </data> + <data format="tabular" name="genomediff" label="${tool.name} on ${on_string}: Annotated Variants (GenomeDiff)"> + <filter>run['mode'] != 'detect' and 'gd' in run['output_options']['formats']</filter> + </data> + + <data format="zip" name="zip_output" label="${tool.name} on ${on_string}: All Variant Results (Gzip)"> + <filter>'zip' in run['output_options']['formats']</filter> + </data> + <data format="txt" name="log" label="${tool.name} on ${on_string}: Breseq Log"> + <filter>'log' in run['output_options']['formats']</filter> + </data> + <data format="tabular" name="tabdelim" label="${tool.name} on ${on_string}: Annotated Variants (Tabular)"> + <filter>'tsv' in run['output_options']['formats']</filter> + </data> + <data format="phylip" name="phylipout" label="${tool.name} on ${on_string}: Variant Comparison (Phylip)"> + <filter>'phylip' in run['output_options']['formats']</filter> + </data> + <data format="txt" name="jsonout" label="${tool.name} on ${on_string}: Annotated Variants (JSON)"> + <filter>'json' in run['output_options']['formats']</filter> + </data> + </outputs> + + <tests> + <test> + <repeat name="references"> + <conditional name="reference"> + <param name="source" value="history" /> + <param name="own_genome" value="lambda.gbk" /> + </conditional> + </repeat> + <conditional name="run"> + <param name="mode" value="detect" /> + <param name="fastqs" value="lambda.short_sequence_repeats.fastq" /> + <param name="polymorphism_prediction" value="" /> + <param name="name" value="smallest" /> + <param name="predict_junctions" value="" /> + <section name="output_options"> + <param name="formats" value="html,log,gd,zip" /> + </section> + </conditional> + + <output name="report" file="report.html" compare="sim_size" delta="100" /> + <output name="log" file="log.txt" lines_diff="4"> + <assert_contents> + <has_text text="breseq --num-processors" /> + </assert_contents> + </output> + <output name="output" file="gdout.txt" lines_diff="8" /> + <output name="zip_output"> + <assert_contents> + <has_archive_member path="results/output/output.gd" /> + </assert_contents> + </output> + </test> + <test> + <repeat name="references"> + <conditional name="reference"> + <param name="source" value="builtin" /> + <param name="fixed_genome" value="lambda1" /> + </conditional> + </repeat> + <conditional name="run"> + <param name="mode" value="detect" /> + <param name="fastqs" value="lambda.short_sequence_repeats.fastq" /> + <param name="polymorphism_prediction" value="" /> + <param name="name" value="smallest" /> + <param name="predict_junctions" value="" /> + <section name="output_options"> + <param name="formats" value="gd" /> + </section> + </conditional> + + <output name="output" file="gdout.txt" lines_diff="8" /> + </test> + <test> + <repeat name="references"> + <conditional name="reference"> + <param name="source" value="history" /> + <param name="own_genome" value="lambda.gbk" /> + </conditional> + </repeat> + <conditional name="run"> + <param name="mode" value="annotate" /> + <param name="gds" value="gdout.txt" /> + <section name="output_options"> + <param name="formats" value="html" /> + </section> + </conditional> + + <output name="annreport" file="gdtoolsout.html" compare="sim_size" delta="100" /> + </test> + </tests> + + <help> + <![CDATA[ +**Detect Variants** + +breseq (pronounced: \\brēz-ˈsēk\\ or breeze-seq) is a computational pipeline for +the analysis of short-read re-sequencing data (e.g. Illumina, 454, IonTorrent, +etc.). It uses reference-based alignment approaches to predict mutations in a +sample relative to an already sequenced genome. breseq is intended for microbial +genomes (<10 Mb) and re-sequenced samples that are only slightly diverged from +the reference sequence (<1 mutation per 1000 bp). + +breseq's primary advantages over other software programs are that it can: + +- Accurately predict new sequence junctions, such as those associated with mobile element insertions. +- Integrate multiple sources of evidence for genetic changes into mutation predictions. +- Produce annotated output describing biologically relevant mutational events. + +breseq was initially developed to analyze data from the Lenski long-term +evolution experiment with `E. coli`_. References: barrick2009a_ barrick2009b_. + +.. _`E. coli`: http://myxo.css.msu.edu/ecoli/ +.. _barrick2009a: http://barricklab.org/twiki/pub/Lab/ToolsBacterialGenomeResequencing/documentation/references.html#barrick2009a +.. _barrick2009b: http://barricklab.org/twiki/pub/Lab/ToolsBacterialGenomeResequencing/documentation/references.html#barrick2009b + +However, breseq may be generally useful to researchers who are: + +- Tracking mutations over time in microbial evolution experiments. +- Checking strains for unwanted second-site mutations after genetic manipulations. +- Identifying mutations that occur during strain improvement or after long-term culture of engineered strains. +- Discovering what mutations arise in pathogens during infection or cause antibiotic resistance. + + +*Inputs* + +Breseq accepts files in FASTQ format. It does not take pair-end information into +account. + +You can either run in clonal (consensus) mode or search for polymorphisms in a +population. + +You can also select an external sequence (eg. a transposon) to detect for +insertions or horizontal transfer. + + +*Outputs* + +Breseq outputs a number of files. These are all condensed in a single zipped +file. + +It contains output files with the final results, accessible through +``output/index.html`` + +It also contains data files with accessory data, including: + +- ``data/reference.fasta`` (file with reference genome: can be used in eg. IGV browser) +- ``data/reference.gff`` (file with genomic annotations: can be used in eg. IGV browser) +- ``data/areference.bam`` (file with read alignments: can be used in eg. IGV browser) +- ``data/unmatched.*`` (files with read that failed to align: can be used to build an assembly or to eg. blast against NCBI) + + +---- + +**Annotate Variants** + +Annotate a GenomeDiff file (generated by breseq) with information about +mutations (what genes they affect, amino acid substitutions, etc.) If multiple +input files are provided, then also COMPARE the frequencies for identical +mutations across samples. + ]]> + </help> + + <citations> + <citation type="doi">10.1007/978-1-4939-0554-6_12</citation> + </citations> + +</tool> \ No newline at end of file