Mercurial > repos > iuc > biotradis

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bacteria_tradis.xml	Wed Jan 29 10:41:06 2020 -0500
@@ -0,0 +1,177 @@
+<tool id="bacteria_tradis" name="Bio-TraDis reads to counts" version="@VERSION@">
+    <description></description>
+    <macros>
+          <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+        <![CDATA[
+            ls '${input_fastq}' > file.txt &&
+            bacteria_tradis -v -f file.txt -r  '${input_ref}'
+                #if str($map_parameters.map_options) == "modify":
+
+                    #if str($map_parameters.set_kmers_options.set) == "yes":
+                       --smalt_k  '$map_parameters.set_kmers_options.kmer_length'
+                       --smalt_s  '$map_parameters.set_kmers_options.step_size'
+                    #end if
+
+                    --smalt_y '$map_parameters.min_percentage'
+                    --smalt_r '$map_parameters.duplicate_reads'
+                    -m '$map_parameters.min_quality'
+
+                #end if
+
+                #if str($tranposon_tag.use) == "yes":
+                    -m '$tranposon_tag.nb_mismatches'
+                    -t '$tranposon_tag.sequence'
+                #end if
+            2>&1
+        ]]>
+    </command>
+
+    <inputs>
+        <param name="input_fastq" type="data" format="fastq" label="Fastq file containing TraDis reads"/>
+        <param name="input_ref" type="data" format="fasta" label="Fasta File of the reference Genome"/>
+
+        <conditional name="map_parameters">
+            <param name="map_options" type="select" label="Mapping Parameters" help="By default, the bacteria_tradis pipeline determines appropriate read mapping parameters automatically from the length of the first read in the fastq file. These parameters have been tested for data issue from TraDIS protocol of Barquist et al.">
+                <option value="default" selected="true">Use Default Parameters</option>
+                <option value="modify">Set Mapping parameters</option>
+
+            </param>
+            <when value="modify">
+
+                <conditional name="set_kmers_options">
+                    <param name="set" type="boolean" label="Modify kmers parameters" truevalue='yes' falsevalue='no' />
+                    <when value="yes">
+                      <param name="kmer_length" type="integer" value="" min="9" max="20" label=" Length of kmers hashed (--smalt_k)" help=" The minimum length of an exact match between a read and the genome needed to trigger an alignment attempt. Appropriate values are between ~10 and 20 for bacterial genomes depending on read length. Lower values lead to increased sensitivity at the expense of runtime." />
+                     <param name="step_size" type="integer" value="" min="1" max="15" label="Step size for smalt kmers (--smalt_s)" help=" Distance between the start of hashed kmers. Appropriate values are between 1 and ~15, but should be less than --smalt_k to ensure kmers overlap. Lower values lead to increased sensitivity at the expense of runtime." />
+                     </when>
+                     <when value="no">
+                     </when>
+                </conditional>
+
+                <param name="min_percentage" type="float" value="0.96" min="0" max="1" label="Minimum percentage of identical bases between read and reference (--smalt_y)" help="May be lowered to improve sensitivity in the case of low quality or short reads." />
+                <param name="duplicate_reads" type="boolean" truevalue="-1" falsevalue="0" label="Randomly assign position to reads that align in multiple location (--smalt_r)" help="If not, reads mapping in multiples positions are ignored" />
+                <param name="min_quality" type="integer" value="30" label="Minimum mapping quality score (-m) " help="Multi-mapping reads have a quality score of 0 by definition, so this parameter needs to be set to 0 for these reads to be properly processed. Can be lowered without dramatically affecting results in most cases, particularly if --smalt_y is set reasonably." />
+
+            </when>
+            <when value="default">
+            </when>
+        </conditional>
+
+        <conditional name="tranposon_tag">
+            <param name="use" type="boolean" truevalue="yes" falsevalue="no" label="Search for a tranposon tag" help="Use with data containing a transposon tag attached to the reads. Only reads containing the transposon tag will be processed, and the tag will be removed before mapping." />
+
+            <when value="yes">
+
+                <param name="sequence" type="text" value="" help="" />
+                <param name="nb_mismatches" type="integer" value="2" min="0" max="1" help="If there is evidence for low-quality bases in the transposon tag (from FastQC, for instance), setting this to 1 or 2 may result in higher recovery of insertion sites. Higher than 2 is not advisable with the typical transposon tag lengths (10 - 12 bases) produced by TraDIS protocols, but may be appropriate with protocols that produce significantly longer transposon tags." />
+                <param name="tagdir" type="select" label="Direction of the transposon tag" help="" >
+                    <option value="3" selected="true">3'</option>
+                    <option value="5">5bacteria_tradis.xml'</option>
+                </param>
+
+            </when>
+            <when value="no">
+            </when>
+        </conditional>
+
+
+    </inputs>
+
+    <outputs>
+        <data format="txt" name="Statistics" label="${input_fastq.name} Statistics" from_work_dir="file.stats" />
+        <data name="Counts" format="tabular" from_work_dir="./*.gz" />
+        <data name="Aligned_reads" format="bam" from_work_dir="./*.bam" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input_fastq" ftype="fastq" value="tiny.fastq.gz"/>
+            <param name="input_ref" ftype="fasta" value="tiny_ref.fasta"/>
+            <param name="map_options" value="default"/>
+            <param name="min_quality" ftype="float" value="0"/>
+            <param name="use" value="no"/>
+            <param name="set" ftype="select" value="no"/>
+            <output name="Statistics" file="file.stats" lines_diff="2"  />
+            <output name="Counts" file="tiny.out.gz.CP009273.1_60_120.insert_site_plot.gz" compare="diff" decompress="true" lines_diff="0" />
+        </test>
+        <test>
+            <param name="input_fastq" ftype="fastq" value="tiny.fastq.gz"/>
+            <param name="input_ref" ftype="fasta" value="tiny_ref.fasta"/>
+            <param name="min_quality" ftype="integer" value="0"/>
+            <param name="map_options" value="modify"/>
+            <param name="min_percentage" ftype="float" value="0.5"/>
+            <param name="duplicate_reads" ftype="boolean" value="-1"/>
+            <param name="min_quality" ftype="float" value="20"/>
+            <param name="use" value="no"/>
+            <param name="set" ftype="select" value="yes"/>
+            <param name="kmer_length" ftype="integer" value="10"/>
+            <param name="step_size" ftype="integer" value="5"/>
+            <output name="Statistics" file="file.stats" lines_diff="2"  />
+            <output name="Counts" file="tiny_1.out.gz.CP009273.1_60_120.insert_site_plot.gz" compare="diff" decompress="true" lines_diff="0" />
+        </test>
+
+    </tests>
+    <help>
+<![CDATA[
+
+**What is does**
+
+Bio-TraDis provides software utilities for the processing, mapping, and analysis of transposon insertion sequencing data. The pipeline was designed with the data from the TraDIS sequencing protocol in mind, but should work with a variety of transposon insertion sequencing protocols as long as they produce data in the expected format.
+
+-----
+
+**Parameters**
+
+The --smalt_r 0 and -m 0 options specify that we want to map reads with multiple best mappings to a random position and use these in our downstream analyses; by default these reads are left unmapped. Mapping and processing this library will take about 30 minutes to an hour on a typical desktop computer.
+
+By default, the bacteria_tradis pipeline determines appropriate read mapping parameters automatically from the length of the first read in the fastq file. It should be noted that the default parameters have been tested using the optimized TraDIS protocol of Barquist et al., 20XX in the hands of an experienced sequencing specialist; these will need to be tuned for other protocols, or for pilot runs, etc. There are various other scenarios in which it would be appropriate to reduce the stringency of these parameters: in the case that read trimming has been applied, if there are quality issues in the library, for certain types of studies (particularly gene essentiality studies as above), or if the quality of the reference genome is low (or of a different strain).
+
+
+The *-mm* option specifies the number of mismatches allowed when matching the transposon tag; by default none are allowed. We sometimes observe one or two positions within the transposon tag that seem to have generally low quality. If there is evidence for low-quality bases in the transposon tag (from FastQC, for instance), setting this to 1 or 2 may result in higher recovery of insertion sites. Higher than 2 is not advisable with the typical transposon tag lengths (10 - 12 bases) produced by TraDIS protocols, but may be appropriate with protocols that produce significantly longer transposon tags.
+
+
+The *-m* option sets the minimum mapping quality score to use an alignment in downstream analysis (e.g. plot files); defaults to 30. Multi-mapping reads have a quality score of 0 by definition, so this parameter needs to be set to 0 for these reads to be properly processed. Can be lowered without dramatically affecting results in most cases, particularly if  *smalt_y* is set reasonably.
+
+
+The other options specify parameters for the smalt mapper, which are discussed in more detail in the smalt manual (ftp.sanger.ac.uk/pub/resources/software/smalt/smalt-manual-0.7.4.pdf). We will discuss their effects on TraDIS mapping briefly here:
+
+*-smalt_k*: length of kmers hashed; roughly, the minimum length of an exact match between a read and the genome needed to trigger an alignment attempt. Appropriate values are between ~10 and 20 for bacterial genomes depending on read length. Lower values lead to increased sensitivity at the expense of runtime.
+
+*-smalt_s*: skipstep. Sampling step size, i.e. the distance between successive words that are hashed along the genomic reference sequence. With the option -s 1
+every word is hashed, with -s 2 every second word, with -s 3 very third etc. Appropriate values are between 1 and ~15, but should be less than --smalt_k to ensure kmers overlap. Lower values lead to increased sensitivity at the expense of runtime.
+
+
+*-smalt_y*: minimum percentage of identical bases between read and reference, defaults to .96 - 96% identity, or 4 mismatches allowed in a 100 base read. May be lowered to improve sensitivity in the case of low quality or short reads.
+
+
+*-smalt_r*: specifies what to do with reads that map equally well in multiple locations. By default this is set to -1, meaning that multi-mapping reads are left unmapped. This is appropriate in studies comparing insertion frequency in the same library passaged through multiple conditions, as in this case a change in frequency of one repetitive gene could lead to many genes appearing to be selected artifactually. For studies of gene essentiality in a newly created library, this should be set to 0 (randomly assign a position) to avoid repetitive elements (particularly insertion sequences and the like) artificially appearing to be essential.
+
+-----
+
+**Output files**
+
+On completion, bacteria tradis produces a number of files. These include:
+**(input list name).stats** : Mapping statistics file. This is comma delimited, and includes one line for each library mapped along with a header. It can be easily opened in e.g. Excel or R.
+**(library name.replicon_name).insert_site_plot.gz**: Plot files, one for each replicon and library. These contain insertion counts on each strand for every nucleotide position in the replicon. They can be opened as “user plots” in the Artemis genome browser, and will be used for further analysis.
+**(library name).mapped.bam** : BAM file containing mapped reads.
+
+-----
+
+**More information**
+
+.. class:: infomark
+
+Additional information about Bio-TraDis can be found at https://github.com/sanger-pathogens/Bio-Tradis
+]]>
+    </help>
+
+<expand macro="citations" />
+
+ </tool>
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Wed Jan 29 10:41:06 2020 -0500
@@ -0,0 +1,14 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@VERSION@">1</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="1.4.5">biotradis</requirement>
+        </requirements>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1093/bioinformatics/btw022</citation>
+        </citations>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/file.stats	Wed Jan 29 10:41:06 2020 -0500
@@ -0,0 +1,2 @@
+File,Total Reads,Reads Matched,% Matched,Reads Mapped,% Mapped,Unique Insertion Sites : CP009273.1:60-120,Seq Len/UIS : CP009273.1:60-120,Total Unique Insertion Sites,Total Seq Len/Total UIS
+tiny.fastq,804,804,100,367,45.6467661691542,27,2.25925925925926,27,2.25925925925926
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.csv	Wed Jan 29 10:41:06 2020 -0500
@@ -0,0 +1,27 @@
+locus_tag	gene_name	ncrna	start	end	strand	read_count	ins_index	gene_length	ins_count	fcn
+BW25113_0001	thrL	0	190	255	1	44	0.181818181818182	66	12	"thr operon leader peptide"
+BW25113_0002	thrA	0	337	2799	1	969	0.135200974421437	2463	333	"Bifunctional aspartokinase/homoserine dehydrogenase 1"
+BW25113_0003	thrB	0	2801	3733	1	423	0.170418006430868	933	159	"homoserine kinase"
+BW25113_0004	thrC	0	3734	5020	1	518	0.136752136752137	1287	176	"L-threonine synthase"
+BW25113_0005	yaaX	0	5234	5530	1	155	0.151515151515152	297	45	"DUF2502 family putative periplasmic protein"
+BW25113_0006	yaaA	0	5683	6459	-1	293	0.141570141570142	777	110	"peroxide resistance protein, lowers intracellular iron"
+BW25113_0007	yaaJ	0	6529	7959	-1	600	0.140461215932914	1431	201	"putative transporter"
+BW25113_0008	talB	0	8238	9191	1	492	0.161425576519916	954	154	"transaldolase B"
+BW25113_0009	mog	0	9306	9893	1	239	0.158163265306122	588	93	"molybdochelatase incorporating molybdenum into molybdopterin"
+BW25113_0010	satP	0	9928	10494	-1	306	0.194003527336861	567	110	"succinate-acetate transporter"
+BW25113_0011	yaaW	0	10643	11356	-1	245	0.138655462184874	714	99	"UPF0174 family protein"
+BW25113_0013	yaaI	0	11382	11786	-1	270	0.17037037037037	405	69	"UPF0412 family protein"
+BW25113_0014	dnaK	0	12163	14079	1	96	0.0276473656755347	1917	53	"chaperone Hsp70, with co-chaperone DnaJ"
+BW25113_0015	dnaJ	0	14168	15298	1	436	0.138815207780725	1131	157	"chaperone Hsp40, DnaK co-chaperone"
+BW25113_0016	insL1	0	15445	16557	1	333	0.153638814016173	1113	171	"IS186 transposase"
+BW25113_0018	mokC	0	16751	16960	-1	350	0.304761904761905	210	64	"regulatory protein for HokC, overlaps CDS of hokC"
+BW25113_4412	hokC	0	16751	16903	-1	107	0.196078431372549	153	30	"toxic membrane protein, small"
+BW25113_4413	sokC	0	16952	17006	1	124	0.4	55	22
+BW25113_0019	nhaA	0	17489	18655	1	498	0.162810625535561	1167	190	"sodium-proton antiporter"
+BW25113_0020	nhaR	0	18715	19620	1	473	0.183222958057395	906	166	"transcriptional activator of nhaA"
+BW25113_0021	insB1	0	19811	20314	-1	68	0.0615079365079365	504	31	"IS1 transposase B"
+BW25113_0022	insA	0	20233	20508	-1	30	0.0434782608695652	276	12	"IS1 repressor TnpA"
+BW25113_0023	rpsT	0	20815	21078	-1	1	0.00378787878787879	264	1	"30S ribosomal subunit protein S20"
+BW25113_0024	yaaY	0	21181	21399	1	32	0.0547945205479452	219	12	"uncharacterized protein"
+BW25113_0025	ribF	0	21407	22348	1	8	0.00318471337579618	942	3	"bifunctional riboflavin kinase/FAD synthetase"
+BW25113_0026	ileS	0	22391	25207	1	14	0.0035498757543486	2817	10	"isoleucyl-tRNA synthetase"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.csv.all.csv	Wed Jan 29 10:41:06 2020 -0500
@@ -0,0 +1,27 @@
+locus_tag,gene_name,ncrna,start,end,strand,read_count,ins_index,gene_length,ins_count,fcn
+BW25113_0001,thrL,0,190,255,1,44,0.181818181818182,66,12,thr operon leader peptide
+BW25113_0002,thrA,0,337,2799,1,969,0.135200974421437,2463,333,Bifunctional aspartokinase/homoserine dehydrogenase 1
+BW25113_0003,thrB,0,2801,3733,1,423,0.170418006430868,933,159,homoserine kinase
+BW25113_0004,thrC,0,3734,5020,1,518,0.136752136752137,1287,176,L-threonine synthase
+BW25113_0005,yaaX,0,5234,5530,1,155,0.151515151515152,297,45,DUF2502 family putative periplasmic protein
+BW25113_0006,yaaA,0,5683,6459,-1,293,0.141570141570142,777,110,peroxide resistance protein, lowers intracellular iron
+BW25113_0007,yaaJ,0,6529,7959,-1,600,0.140461215932914,1431,201,putative transporter
+BW25113_0008,talB,0,8238,9191,1,492,0.161425576519916,954,154,transaldolase B
+BW25113_0009,mog,0,9306,9893,1,239,0.158163265306122,588,93,molybdochelatase incorporating molybdenum into molybdopterin
+BW25113_0010,satP,0,9928,10494,-1,306,0.194003527336861,567,110,succinate-acetate transporter
+BW25113_0011,yaaW,0,10643,11356,-1,245,0.138655462184874,714,99,UPF0174 family protein
+BW25113_0013,yaaI,0,11382,11786,-1,270,0.17037037037037,405,69,UPF0412 family protein
+BW25113_0014,dnaK,0,12163,14079,1,96,0.0276473656755347,1917,53,chaperone Hsp70, with co-chaperone DnaJ
+BW25113_0015,dnaJ,0,14168,15298,1,436,0.138815207780725,1131,157,chaperone Hsp40, DnaK co-chaperone
+BW25113_0016,insL1,0,15445,16557,1,333,0.153638814016173,1113,171,IS186 transposase
+BW25113_0018,mokC,0,16751,16960,-1,350,0.304761904761905,210,64,regulatory protein for HokC, overlaps CDS of hokC
+BW25113_4412,hokC,0,16751,16903,-1,107,0.196078431372549,153,30,toxic membrane protein, small
+BW25113_4413,sokC,0,16952,17006,1,124,0.4,55,22,
+BW25113_0019,nhaA,0,17489,18655,1,498,0.162810625535561,1167,190,sodium-proton antiporter
+BW25113_0020,nhaR,0,18715,19620,1,473,0.183222958057395,906,166,transcriptional activator of nhaA
+BW25113_0021,insB1,0,19811,20314,-1,68,0.0615079365079365,504,31,IS1 transposase B
+BW25113_0022,insA,0,20233,20508,-1,30,0.0434782608695652,276,12,IS1 repressor TnpA
+BW25113_0023,rpsT,0,20815,21078,-1,1,0.00378787878787879,264,1,30S ribosomal subunit protein S20
+BW25113_0024,yaaY,0,21181,21399,1,32,0.0547945205479452,219,12,uncharacterized protein
+BW25113_0025,ribF,0,21407,22348,1,8,0.00318471337579618,942,3,bifunctional riboflavin kinase/FAD synthetase
+BW25113_0026,ileS,0,22391,25207,1,14,0.0035498757543486,2817,10,isoleucyl-tRNA synthetase
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.csv.ambig.csv	Wed Jan 29 10:41:06 2020 -0500
@@ -0,0 +1,20 @@
+locus_tag,gene_name,ncrna,start,end,strand,read_count,ins_index,gene_length,ins_count,fcn
+BW25113_0001,thrL,0,190,255,1,44,0.181818181818182,66,12,thr operon leader peptide
+BW25113_0002,thrA,0,337,2799,1,969,0.135200974421437,2463,333,Bifunctional aspartokinase/homoserine dehydrogenase 1
+BW25113_0003,thrB,0,2801,3733,1,423,0.170418006430868,933,159,homoserine kinase
+BW25113_0004,thrC,0,3734,5020,1,518,0.136752136752137,1287,176,L-threonine synthase
+BW25113_0005,yaaX,0,5234,5530,1,155,0.151515151515152,297,45,DUF2502 family putative periplasmic protein
+BW25113_0006,yaaA,0,5683,6459,-1,293,0.141570141570142,777,110,peroxide resistance protein, lowers intracellular iron
+BW25113_0007,yaaJ,0,6529,7959,-1,600,0.140461215932914,1431,201,putative transporter
+BW25113_0008,talB,0,8238,9191,1,492,0.161425576519916,954,154,transaldolase B
+BW25113_0009,mog,0,9306,9893,1,239,0.158163265306122,588,93,molybdochelatase incorporating molybdenum into molybdopterin
+BW25113_0010,satP,0,9928,10494,-1,306,0.194003527336861,567,110,succinate-acetate transporter
+BW25113_0011,yaaW,0,10643,11356,-1,245,0.138655462184874,714,99,UPF0174 family protein
+BW25113_0013,yaaI,0,11382,11786,-1,270,0.17037037037037,405,69,UPF0412 family protein
+BW25113_0015,dnaJ,0,14168,15298,1,436,0.138815207780725,1131,157,chaperone Hsp40, DnaK co-chaperone
+BW25113_0016,insL1,0,15445,16557,1,333,0.153638814016173,1113,171,IS186 transposase
+BW25113_0018,mokC,0,16751,16960,-1,350,0.304761904761905,210,64,regulatory protein for HokC, overlaps CDS of hokC
+BW25113_4412,hokC,0,16751,16903,-1,107,0.196078431372549,153,30,toxic membrane protein, small
+BW25113_4413,sokC,0,16952,17006,1,124,0.4,55,22,
+BW25113_0019,nhaA,0,17489,18655,1,498,0.162810625535561,1167,190,sodium-proton antiporter
+BW25113_0020,nhaR,0,18715,19620,1,473,0.183222958057395,906,166,transcriptional activator of nhaA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.csv.essen.csv	Wed Jan 29 10:41:06 2020 -0500
@@ -0,0 +1,8 @@
+locus_tag,gene_name,ncrna,start,end,strand,read_count,ins_index,gene_length,ins_count,fcn
+BW25113_0014,dnaK,0,12163,14079,1,96,0.0276473656755347,1917,53,chaperone Hsp70, with co-chaperone DnaJ
+BW25113_0021,insB1,0,19811,20314,-1,68,0.0615079365079365,504,31,IS1 transposase B
+BW25113_0022,insA,0,20233,20508,-1,30,0.0434782608695652,276,12,IS1 repressor TnpA
+BW25113_0023,rpsT,0,20815,21078,-1,1,0.00378787878787879,264,1,30S ribosomal subunit protein S20
+BW25113_0024,yaaY,0,21181,21399,1,32,0.0547945205479452,219,12,uncharacterized protein
+BW25113_0025,ribF,0,21407,22348,1,8,0.00318471337579618,942,3,bifunctional riboflavin kinase/FAD synthetase
+BW25113_0026,ileS,0,22391,25207,1,14,0.0035498757543486,2817,10,isoleucyl-tRNA synthetase
Binary file test-data/tiny.fastq.gz has changed
Binary file test-data/tiny.out.gz.CP009273.1_60_120.insert_site_plot.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tiny.out.gz.CP009273.1_60_120.tradis_gene_insert_sites.csv	Wed Jan 29 10:41:06 2020 -0500
@@ -0,0 +1,2 @@
+locus_tag	gene_name	ncrna	start	end	strand	read_count	ins_index	gene_length	ins_count	fcn
+BW25113_0001	thrL	0	190	255	1	0	0	66	0	"thr operon leader peptide"
Binary file test-data/tiny_1.out.gz.CP009273.1_60_120.insert_site_plot.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tiny_ref.embl	Wed Jan 29 10:41:06 2020 -0500
@@ -0,0 +1,68 @@
+ID   CP009273; SV 1; circular; genomic DNA; STD; PRO; 4631469 BP.
+XX
+AC   CP009273;
+XX
+PR   Project:PRJNA257976;
+XX
+DT   10-SEP-2014 (Rel. 122, Created)
+XX
+DE   Escherichia coli BW25113, complete genome.
+XX
+KW   .
+XX
+OS   Escherichia coli BW25113
+OC   Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales;
+OC   Enterobacteriaceae; Escherichia.
+XX
+RN   [1]
+RP   1-4631469
+RA   Grenier F., Matteau D., Baby V., Rodrigue S.;
+RT   "Complete genome sequence of Escherichia coli BW25113";
+RL   Unpublished.
+XX
+CC   ##Genome-Assembly-Data-START##
+CC   Assembly Method           :: Newbler v. 2.6
+CC   Reference-guided Assembly :: U00096.3
+CC   Genome Coverage           :: 100x
+CC   Sequencing Technology     :: Illumina HiSeq; Sanger
+CC   ##Genome-Assembly-Data-END##
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..300
+FT                   /organism="Escherichia coli BW25113"
+FT                   /strain="K-12"
+FT                   /sub_strain="BW25113"
+FT                   /mol_type="genomic DNA"
+FT                   /country="USA:Indiana"
+FT                   /collection_date="2000"
+FT                   /note="from B. L. Wanner laboratory; genotype: rrnB3
+FT                   lacZ4787 hsdR514 (araBAD)567 (rhaBAD)568 rph-1"
+FT                   /db_xref="taxon:679895"
+FT                   /culture_collection="CGSC:7636"
+FT   gene            190..255
+FT                   /gene="thrL"
+FT                   /gene_synonym="ECK0001"
+FT                   /gene_synonym="JW4367"
+FT                   /locus_tag="BW25113_0001"
+FT   CDS             190..255
+FT                   /codon_start=1
+FT                   /transl_table=11
+FT                   /gene="thrL"
+FT                   /gene_synonym="ECK0001"
+FT                   /gene_synonym="JW4367"
+FT                   /locus_tag="BW25113_0001"
+FT                   /product="thr operon leader peptide"
+FT                   /function="leader; Amino acid biosynthesis: Threonine"
+FT                   /db_xref="EnsemblGenomes-Gn:BW25113_0001"
+FT                   /db_xref="EnsemblGenomes-Tr:AIN30539"
+FT                   /protein_id="AIN30539.1"
+FT                   /translation="MKRISTTITTTITITTGNGAG"
+XX
+SQ   Sequence 4631469 BP; 1140509 A; 1177154 C; 1174646 G; 1139160 T; 0 other;
+     agcttttcat tctgactgca acgggcaata tgtctctgtg tggattaaaa aaagagtgtc        60
+     tgatagcagc ttctgaactg gttacctgcc gtgagtaaat taaaatttta ttgacttagg       120
+     tcactaaata ctttaaccaa tataggcata gcgcacagac agataaaaat tacagagtac       180
+     acaacatcca tgaaacgcat tagcaccacc attaccacca ccatcaccat taccacaggt       240
+     aacggtgcgg gctgacgcgt acaggaaaca cagaaaaaag cccgcacctg acagtgcggg       300
+//
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tiny_ref.fasta	Wed Jan 29 10:41:06 2020 -0500
@@ -0,0 +1,3 @@
+>CP009273.1:60-120
+CTGATAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAG
+G
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tradis_essentiality.xml	Wed Jan 29 10:41:06 2020 -0500
@@ -0,0 +1,81 @@
+<tool id="tradis_essentiality" name="Bio-TraDis Essentiality Predictions" version="@VERSION@">
+    <description></description>
+    <macros>
+          <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+        <![CDATA[
+            ln -s '$input_insert_file' input.csv &&
+            tradis_essentiality.R input.csv
+        ]]>
+    </command>
+
+    <inputs>
+        <param name="input_insert_file" type="data" format="csv" label="Gene insertion counts" help="Ourput file of `Bio-TraDis counts to gene insertion data` tool"/>
+    </inputs>
+
+    <outputs>
+
+        <data name="ess_genes" format="csv" label="${tool.name} on ${on_string} : Essential genes" from_work_dir="input.csv.essen.csv"/>
+        <data name="ambig_genes" format="csv" label="${tool.name} on ${on_string} : Unclassified genes" from_work_dir="input.csv.ambig.csv"/>
+        <data name="all_genes" format="csv" label="${tool.name} on ${on_string} : All genes" from_work_dir="input.csv.all.csv"/>
+        <data name="qc_results" format="pdf" label="${tool.name} on ${on_string} : QC report" from_work_dir="input.csv.QC_and_changepoint_plots.pdf"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input_insert_file" ftype="csv" value="test.csv"/>
+            <output name="ess_genes" file="test.csv.essen.csv" compare="sim_size" delta="200" />
+            <output name="ambig_genes" file="test.csv.ambig.csv" compare="sim_size" delta="200" />
+            <output name="all_genes" file="test.csv.all.csv" compare="sim_size" delta="200" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+
+**What is does**
+
+Bio-TraDis provides software utilities for the processing, mapping, and analysis of transposon insertion sequencing data. The pipeline was designed with the data from the TraDIS sequencing protocol in mind, but should work with a variety of transposon insertion sequencing protocols as long as they produce data in the expected format.
+
+tradis_essentiality tool performs an Essentiality analysis using the annotation counts output of tradis_gene_insert_sites tool
+
+-----
+
+**Output files**
+
+-    Essential genes table : Table containing the essential genes
+-    Unclassified genes : Table containing genes that couldn't be classified as essential or non essential
+-    All genes : Table containing all genes
+-    QC report : PDF file containing the regression plot
+
+All tables contain the following columns:
+-   locus_tag
+-   gene_name
+-   ncrna : Non-coding RNA, 1 if the feature is a non coding DNA, 0 if not.
+-   start
+-   end
+-   strand
+-   read_count : Total number of reads mapping on the feature
+-   ins_index : Insertion index, number of insertion divided by the gene length
+-   gene_length
+-   ins_count : Number of insertion within the feature.
+-   fcn : Function
+
+-----
+
+**More information**
+
+.. class:: infomark
+
+Additional information about Bio-TraDis can be found at https://github.com/sanger-pathogens/Bio-Tradis
+]]>
+    </help>
+
+<expand macro="citations" />
+
+ </tool>
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tradis_gene_insert_sites.xml	Wed Jan 29 10:41:06 2020 -0500
@@ -0,0 +1,76 @@
+<tool id="tradis_gene_insert_sites" name="Bio-TraDis counts to gene insertion data" version="@VERSION@">
+    <description></description>
+    <macros>
+          <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+        <![CDATA[
+            tradis_gene_insert_sites -trim3 '$trim3' -trim5 '$trim5' '$input_annot' '$input_counts'
+        ]]>
+    </command>
+
+    <inputs>
+        <param name="input_counts" type="data" format="tabular" label="Read counts" help="Read counts at each nucleotide position for both strands. The first column contains forward strand counts, and the second contains reverse strand counts."/>
+        <param name="input_annot" type="data" format="txt" label="EMBL file containing the reference genome annotations"/>
+       <param name="trim3" type="float" value="0.1" min="0" max="1" label="Portion of the gene to trim in 3' end'" help="Trim reads at the 3'end of genes. Some transposon insertions can be little disruptive when they occur end of genes. Trim to ignore these reads."/>
+       <param name="trim5" type="float" value="0" min="0" max="1" label="Portion of the gene to trim" help="Trim reads at the 5'end of genes. Some transposon insertions can be little disruptive when they occur end of genes. Trim to ignore these reads."/>
+
+    </inputs>
+
+    <outputs>
+        <data name="gene_insert_sites" format="csv" from_work_dir="./*.csv"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input_counts" ftype="tabular" value="tiny.out.gz.CP009273.1_60_120.insert_site_plot.gz"/>
+            <param name="input_annot" ftype="txt" value="tiny_ref.embl"/>
+            <param name="trim3" ftype="float" value="0"/>
+            <param name="trim5" ftype="float" value="0"/>
+            <output name="gene_insert_sites" file="tiny.out.gz.CP009273.1_60_120.tradis_gene_insert_sites.csv"  compare="sim_size" delta="200" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+
+**What is does**
+
+Bio-TraDis provides software utilities for the processing, mapping, and analysis of transposon insertion sequencing data. The pipeline was designed with the data from the TraDIS sequencing protocol in mind, but should work with a variety of transposon insertion sequencing protocols as long as they produce data in the expected format.
+
+tradis_gene_insert_sites that combine the counts at each position and an annotation file to provide the number of reads and insertion for each feature.
+
+-----
+
+**Output files**
+
+The tool outputs a table containing the columns :
+-   locus_tag
+-   gene_name
+-   ncrna : Is the feature a non-coding DNA. 1=yes, 0=no.
+-   start
+-   end
+-   strand
+-   read_count : Numer of reads mapping on the feature
+-   ins_index : Insertion index, number of insertion divided by the gene length
+-   gene_length
+-   ins_count : Number of insertion within the feature.
+-   fcn : Function
+
+-----
+
+**More information**
+
+.. class:: infomark
+
+Additional information about Bio-TraDis can be found at https://github.com/sanger-pathogens/Bio-Tradis
+]]>
+    </help>
+
+<expand macro="citations" />
+
+ </tool>
+
+
+
+