comparison bacteria_tradis.xml @ 0:738e58ed9cc2 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/biotradis commit 1c0a0f88149bf8863a89c58bace81e070b3adb5a"
author iuc
date Wed, 29 Jan 2020 10:41:06 -0500
parents
children 58234d95978d
comparison
equal deleted inserted replaced
-1:000000000000 0:738e58ed9cc2
1 <tool id="bacteria_tradis" name="Bio-TraDis reads to counts" version="@VERSION@">
2 <description></description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <command>
8 <![CDATA[
9 ls '${input_fastq}' > file.txt &&
10 bacteria_tradis -v -f file.txt -r '${input_ref}'
11 #if str($map_parameters.map_options) == "modify":
12
13 #if str($map_parameters.set_kmers_options.set) == "yes":
14 --smalt_k '$map_parameters.set_kmers_options.kmer_length'
15 --smalt_s '$map_parameters.set_kmers_options.step_size'
16 #end if
17
18 --smalt_y '$map_parameters.min_percentage'
19 --smalt_r '$map_parameters.duplicate_reads'
20 -m '$map_parameters.min_quality'
21
22 #end if
23
24 #if str($tranposon_tag.use) == "yes":
25 -m '$tranposon_tag.nb_mismatches'
26 -t '$tranposon_tag.sequence'
27 #end if
28 2>&1
29 ]]>
30 </command>
31
32 <inputs>
33 <param name="input_fastq" type="data" format="fastq" label="Fastq file containing TraDis reads"/>
34 <param name="input_ref" type="data" format="fasta" label="Fasta File of the reference Genome"/>
35
36 <conditional name="map_parameters">
37 <param name="map_options" type="select" label="Mapping Parameters" help="By default, the bacteria_tradis pipeline determines appropriate read mapping parameters automatically from the length of the first read in the fastq file. These parameters have been tested for data issue from TraDIS protocol of Barquist et al.">
38 <option value="default" selected="true">Use Default Parameters</option>
39 <option value="modify">Set Mapping parameters</option>
40
41 </param>
42 <when value="modify">
43
44 <conditional name="set_kmers_options">
45 <param name="set" type="boolean" label="Modify kmers parameters" truevalue='yes' falsevalue='no' />
46 <when value="yes">
47 <param name="kmer_length" type="integer" value="" min="9" max="20" label=" Length of kmers hashed (--smalt_k)" help=" The minimum length of an exact match between a read and the genome needed to trigger an alignment attempt. Appropriate values are between ~10 and 20 for bacterial genomes depending on read length. Lower values lead to increased sensitivity at the expense of runtime." />
48 <param name="step_size" type="integer" value="" min="1" max="15" label="Step size for smalt kmers (--smalt_s)" help=" Distance between the start of hashed kmers. Appropriate values are between 1 and ~15, but should be less than --smalt_k to ensure kmers overlap. Lower values lead to increased sensitivity at the expense of runtime." />
49 </when>
50 <when value="no">
51 </when>
52 </conditional>
53
54 <param name="min_percentage" type="float" value="0.96" min="0" max="1" label="Minimum percentage of identical bases between read and reference (--smalt_y)" help="May be lowered to improve sensitivity in the case of low quality or short reads." />
55 <param name="duplicate_reads" type="boolean" truevalue="-1" falsevalue="0" label="Randomly assign position to reads that align in multiple location (--smalt_r)" help="If not, reads mapping in multiples positions are ignored" />
56 <param name="min_quality" type="integer" value="30" label="Minimum mapping quality score (-m) " help="Multi-mapping reads have a quality score of 0 by definition, so this parameter needs to be set to 0 for these reads to be properly processed. Can be lowered without dramatically affecting results in most cases, particularly if --smalt_y is set reasonably." />
57
58 </when>
59 <when value="default">
60 </when>
61 </conditional>
62
63 <conditional name="tranposon_tag">
64 <param name="use" type="boolean" truevalue="yes" falsevalue="no" label="Search for a tranposon tag" help="Use with data containing a transposon tag attached to the reads. Only reads containing the transposon tag will be processed, and the tag will be removed before mapping." />
65
66 <when value="yes">
67
68 <param name="sequence" type="text" value="" help="" />
69 <param name="nb_mismatches" type="integer" value="2" min="0" max="1" help="If there is evidence for low-quality bases in the transposon tag (from FastQC, for instance), setting this to 1 or 2 may result in higher recovery of insertion sites. Higher than 2 is not advisable with the typical transposon tag lengths (10 - 12 bases) produced by TraDIS protocols, but may be appropriate with protocols that produce significantly longer transposon tags." />
70 <param name="tagdir" type="select" label="Direction of the transposon tag" help="" >
71 <option value="3" selected="true">3'</option>
72 <option value="5">5bacteria_tradis.xml'</option>
73 </param>
74
75 </when>
76 <when value="no">
77 </when>
78 </conditional>
79
80
81 </inputs>
82
83 <outputs>
84 <data format="txt" name="Statistics" label="${input_fastq.name} Statistics​" from_work_dir="file.stats" />
85 <data name="Counts" format="tabular" from_work_dir="./*.gz" />
86 <data name="Aligned_reads" format="bam" from_work_dir="./*.bam" />
87 </outputs>
88
89 <tests>
90 <test>
91 <param name="input_fastq" ftype="fastq" value="tiny.fastq.gz"/>
92 <param name="input_ref" ftype="fasta" value="tiny_ref.fasta"/>
93 <param name="map_options" value="default"/>
94 <param name="min_quality" ftype="float" value="0"/>
95 <param name="use" value="no"/>
96 <param name="set" ftype="select" value="no"/>
97 <output name="Statistics" file="file.stats" lines_diff="2" />
98 <output name="Counts" file="tiny.out.gz.CP009273.1_60_120.insert_site_plot.gz" compare="diff" decompress="true" lines_diff="0" />
99 </test>
100 <test>
101 <param name="input_fastq" ftype="fastq" value="tiny.fastq.gz"/>
102 <param name="input_ref" ftype="fasta" value="tiny_ref.fasta"/>
103 <param name="min_quality" ftype="integer" value="0"/>
104 <param name="map_options" value="modify"/>
105 <param name="min_percentage" ftype="float" value="0.5"/>
106 <param name="duplicate_reads" ftype="boolean" value="-1"/>
107 <param name="min_quality" ftype="float" value="20"/>
108 <param name="use" value="no"/>
109 <param name="set" ftype="select" value="yes"/>
110 <param name="kmer_length" ftype="integer" value="10"/>
111 <param name="step_size" ftype="integer" value="5"/>
112 <output name="Statistics" file="file.stats" lines_diff="2" />
113 <output name="Counts" file="tiny_1.out.gz.CP009273.1_60_120.insert_site_plot.gz" compare="diff" decompress="true" lines_diff="0" />
114 </test>
115
116 </tests>
117 <help>
118 <![CDATA[
119
120 **What is does**
121
122 Bio-TraDis provides software utilities for the processing, mapping, and analysis of transposon insertion sequencing data. The pipeline was designed with the data from the TraDIS sequencing protocol in mind, but should work with a variety of transposon insertion sequencing protocols as long as they produce data in the expected format.
123
124 -----
125
126 **Parameters**
127
128 The --smalt_r 0 and -m 0 options specify that we want to map reads with multiple best mappings to a random position and use these in our downstream analyses; by default these reads are left unmapped. Mapping and processing this library will take about 30 minutes to an hour on a typical desktop computer.
129
130 By default, the ​bacteria_tradis​ pipeline determines appropriate read mapping parameters automatically from the length of the first read in the fastq file. It should be noted that the default parameters have been tested using the optimized TraDIS protocol of Barquist ​et al​., 20XX in the hands of an experienced sequencing specialist; these will need to be tuned for other protocols, or for pilot runs, etc. There are various other scenarios in which it would be appropriate to reduce the stringency of these parameters: in the case that read trimming has been applied, if there are quality issues in the library, for certain types of studies (particularly gene essentiality studies as above), or if the quality of the reference genome is low (or of a different strain).
131
132
133 The *-mm* option specifies the number of mismatches allowed when matching the transposon tag; by default none are allowed. We sometimes observe one or two positions within the transposon tag that seem to have generally low quality. If there is evidence for low-quality bases in the transposon tag (from FastQC, for instance), setting this to 1 or 2 may result in higher recovery of insertion sites. Higher than 2 is not advisable with the typical transposon tag lengths (10 - 12 bases) produced by TraDIS protocols, but may be appropriate with protocols that produce significantly longer transposon tags.
134
135
136 The *-m* option sets the minimum mapping quality score to use an alignment in downstream analysis (e.g. plot files); defaults to 30. Multi-mapping reads have a quality score of 0 by definition, so this parameter needs to be set to 0 for these reads to be properly processed. Can be lowered without dramatically affecting results in most cases, particularly if *smalt_y* is set reasonably.
137
138
139 The other options specify parameters for the smalt mapper, which are discussed in more detail in the smalt manual (ftp.sanger.ac.uk/pub/resources/software/smalt/smalt-manual-0.7.4.pdf). We will discuss their effects on TraDIS mapping briefly here:
140
141 *-smalt_k*: length of kmers hashed; roughly, the minimum length of an exact match between a read and the genome needed to trigger an alignment attempt. Appropriate values are between ~10 and 20 for bacterial genomes depending on read length. Lower values lead to increased sensitivity at the expense of runtime.
142
143 *-smalt_s*: skipstep. Sampling step size, i.e. the distance between successive words that are hashed along the genomic reference sequence. With the option -s 1
144 every word is hashed, with -s 2 every second word, with -s 3 very third etc. Appropriate values are between 1 and ~15, but should be less than --smalt_k to ensure kmers overlap. Lower values lead to increased sensitivity at the expense of runtime.
145
146
147 *-smalt_y*: minimum percentage of identical bases between read and reference, defaults to .96 - 96% identity, or 4 mismatches allowed in a 100 base read. May be lowered to improve sensitivity in the case of low quality or short reads.
148
149
150 *-smalt_r*: specifies what to do with reads that map equally well in multiple locations. By default this is set to -1, meaning that multi-mapping reads are left unmapped. This is appropriate in studies comparing insertion frequency in the same library passaged through multiple conditions, as in this case a change in frequency of one repetitive gene could lead to many genes appearing to be selected artifactually. For studies of gene essentiality in a newly created library, this should be set to 0 (randomly assign a position) to avoid repetitive elements (particularly insertion sequences and the like) artificially appearing to be essential.
151
152 -----
153
154 **Output files**
155
156 On completion, bacteria tradis ​produces a number of files. These include:
157 **(input list name).stats**​ : Mapping statistics file. This is comma delimited, and includes one line for each library mapped along with a header. It can be easily opened in e.g. Excel or R.
158 **(library name.replicon_name).insert_site_plot.gz**​: Plot files, one for each replicon and library. These contain insertion counts on each strand for every nucleotide position in the replicon. They can be opened as “user plots” in the Artemis genome browser, and will be used for further analysis.
159 **(library name).mapped.bam** : BAM file containing mapped reads.
160
161 -----
162
163 **More information**
164
165 .. class:: infomark
166
167 Additional information about Bio-TraDis can be found at https://github.com/sanger-pathogens/Bio-Tradis
168 ]]>
169 </help>
170
171 <expand macro="citations" />
172
173 </tool>
174
175
176
177