Mercurial > repos > iuc > biotradis
changeset 0:738e58ed9cc2 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/biotradis commit 1c0a0f88149bf8863a89c58bace81e070b3adb5a"
author | iuc |
---|---|
date | Wed, 29 Jan 2020 10:41:06 -0500 |
parents | |
children | 58234d95978d |
files | bacteria_tradis.xml macros.xml test-data/file.stats test-data/test.csv test-data/test.csv.all.csv test-data/test.csv.ambig.csv test-data/test.csv.essen.csv test-data/tiny.fastq.gz test-data/tiny.out.gz.CP009273.1_60_120.insert_site_plot.gz test-data/tiny.out.gz.CP009273.1_60_120.tradis_gene_insert_sites.csv test-data/tiny_1.out.gz.CP009273.1_60_120.insert_site_plot.gz test-data/tiny_ref.embl test-data/tiny_ref.fasta tradis_essentiality.xml tradis_gene_insert_sites.xml |
diffstat | 15 files changed, 505 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bacteria_tradis.xml Wed Jan 29 10:41:06 2020 -0500 @@ -0,0 +1,177 @@ +<tool id="bacteria_tradis" name="Bio-TraDis reads to counts" version="@VERSION@"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> + <![CDATA[ + ls '${input_fastq}' > file.txt && + bacteria_tradis -v -f file.txt -r '${input_ref}' + #if str($map_parameters.map_options) == "modify": + + #if str($map_parameters.set_kmers_options.set) == "yes": + --smalt_k '$map_parameters.set_kmers_options.kmer_length' + --smalt_s '$map_parameters.set_kmers_options.step_size' + #end if + + --smalt_y '$map_parameters.min_percentage' + --smalt_r '$map_parameters.duplicate_reads' + -m '$map_parameters.min_quality' + + #end if + + #if str($tranposon_tag.use) == "yes": + -m '$tranposon_tag.nb_mismatches' + -t '$tranposon_tag.sequence' + #end if + 2>&1 + ]]> + </command> + + <inputs> + <param name="input_fastq" type="data" format="fastq" label="Fastq file containing TraDis reads"/> + <param name="input_ref" type="data" format="fasta" label="Fasta File of the reference Genome"/> + + <conditional name="map_parameters"> + <param name="map_options" type="select" label="Mapping Parameters" help="By default, the bacteria_tradis pipeline determines appropriate read mapping parameters automatically from the length of the first read in the fastq file. These parameters have been tested for data issue from TraDIS protocol of Barquist et al."> + <option value="default" selected="true">Use Default Parameters</option> + <option value="modify">Set Mapping parameters</option> + + </param> + <when value="modify"> + + <conditional name="set_kmers_options"> + <param name="set" type="boolean" label="Modify kmers parameters" truevalue='yes' falsevalue='no' /> + <when value="yes"> + <param name="kmer_length" type="integer" value="" min="9" max="20" label=" Length of kmers hashed (--smalt_k)" help=" The minimum length of an exact match between a read and the genome needed to trigger an alignment attempt. Appropriate values are between ~10 and 20 for bacterial genomes depending on read length. Lower values lead to increased sensitivity at the expense of runtime." /> + <param name="step_size" type="integer" value="" min="1" max="15" label="Step size for smalt kmers (--smalt_s)" help=" Distance between the start of hashed kmers. Appropriate values are between 1 and ~15, but should be less than --smalt_k to ensure kmers overlap. Lower values lead to increased sensitivity at the expense of runtime." /> + </when> + <when value="no"> + </when> + </conditional> + + <param name="min_percentage" type="float" value="0.96" min="0" max="1" label="Minimum percentage of identical bases between read and reference (--smalt_y)" help="May be lowered to improve sensitivity in the case of low quality or short reads." /> + <param name="duplicate_reads" type="boolean" truevalue="-1" falsevalue="0" label="Randomly assign position to reads that align in multiple location (--smalt_r)" help="If not, reads mapping in multiples positions are ignored" /> + <param name="min_quality" type="integer" value="30" label="Minimum mapping quality score (-m) " help="Multi-mapping reads have a quality score of 0 by definition, so this parameter needs to be set to 0 for these reads to be properly processed. Can be lowered without dramatically affecting results in most cases, particularly if --smalt_y is set reasonably." /> + + </when> + <when value="default"> + </when> + </conditional> + + <conditional name="tranposon_tag"> + <param name="use" type="boolean" truevalue="yes" falsevalue="no" label="Search for a tranposon tag" help="Use with data containing a transposon tag attached to the reads. Only reads containing the transposon tag will be processed, and the tag will be removed before mapping." /> + + <when value="yes"> + + <param name="sequence" type="text" value="" help="" /> + <param name="nb_mismatches" type="integer" value="2" min="0" max="1" help="If there is evidence for low-quality bases in the transposon tag (from FastQC, for instance), setting this to 1 or 2 may result in higher recovery of insertion sites. Higher than 2 is not advisable with the typical transposon tag lengths (10 - 12 bases) produced by TraDIS protocols, but may be appropriate with protocols that produce significantly longer transposon tags." /> + <param name="tagdir" type="select" label="Direction of the transposon tag" help="" > + <option value="3" selected="true">3'</option> + <option value="5">5bacteria_tradis.xml'</option> + </param> + + </when> + <when value="no"> + </when> + </conditional> + + + </inputs> + + <outputs> + <data format="txt" name="Statistics" label="${input_fastq.name} Statistics" from_work_dir="file.stats" /> + <data name="Counts" format="tabular" from_work_dir="./*.gz" /> + <data name="Aligned_reads" format="bam" from_work_dir="./*.bam" /> + </outputs> + + <tests> + <test> + <param name="input_fastq" ftype="fastq" value="tiny.fastq.gz"/> + <param name="input_ref" ftype="fasta" value="tiny_ref.fasta"/> + <param name="map_options" value="default"/> + <param name="min_quality" ftype="float" value="0"/> + <param name="use" value="no"/> + <param name="set" ftype="select" value="no"/> + <output name="Statistics" file="file.stats" lines_diff="2" /> + <output name="Counts" file="tiny.out.gz.CP009273.1_60_120.insert_site_plot.gz" compare="diff" decompress="true" lines_diff="0" /> + </test> + <test> + <param name="input_fastq" ftype="fastq" value="tiny.fastq.gz"/> + <param name="input_ref" ftype="fasta" value="tiny_ref.fasta"/> + <param name="min_quality" ftype="integer" value="0"/> + <param name="map_options" value="modify"/> + <param name="min_percentage" ftype="float" value="0.5"/> + <param name="duplicate_reads" ftype="boolean" value="-1"/> + <param name="min_quality" ftype="float" value="20"/> + <param name="use" value="no"/> + <param name="set" ftype="select" value="yes"/> + <param name="kmer_length" ftype="integer" value="10"/> + <param name="step_size" ftype="integer" value="5"/> + <output name="Statistics" file="file.stats" lines_diff="2" /> + <output name="Counts" file="tiny_1.out.gz.CP009273.1_60_120.insert_site_plot.gz" compare="diff" decompress="true" lines_diff="0" /> + </test> + + </tests> + <help> +<![CDATA[ + +**What is does** + +Bio-TraDis provides software utilities for the processing, mapping, and analysis of transposon insertion sequencing data. The pipeline was designed with the data from the TraDIS sequencing protocol in mind, but should work with a variety of transposon insertion sequencing protocols as long as they produce data in the expected format. + +----- + +**Parameters** + +The --smalt_r 0 and -m 0 options specify that we want to map reads with multiple best mappings to a random position and use these in our downstream analyses; by default these reads are left unmapped. Mapping and processing this library will take about 30 minutes to an hour on a typical desktop computer. + +By default, the bacteria_tradis pipeline determines appropriate read mapping parameters automatically from the length of the first read in the fastq file. It should be noted that the default parameters have been tested using the optimized TraDIS protocol of Barquist et al., 20XX in the hands of an experienced sequencing specialist; these will need to be tuned for other protocols, or for pilot runs, etc. There are various other scenarios in which it would be appropriate to reduce the stringency of these parameters: in the case that read trimming has been applied, if there are quality issues in the library, for certain types of studies (particularly gene essentiality studies as above), or if the quality of the reference genome is low (or of a different strain). + + +The *-mm* option specifies the number of mismatches allowed when matching the transposon tag; by default none are allowed. We sometimes observe one or two positions within the transposon tag that seem to have generally low quality. If there is evidence for low-quality bases in the transposon tag (from FastQC, for instance), setting this to 1 or 2 may result in higher recovery of insertion sites. Higher than 2 is not advisable with the typical transposon tag lengths (10 - 12 bases) produced by TraDIS protocols, but may be appropriate with protocols that produce significantly longer transposon tags. + + +The *-m* option sets the minimum mapping quality score to use an alignment in downstream analysis (e.g. plot files); defaults to 30. Multi-mapping reads have a quality score of 0 by definition, so this parameter needs to be set to 0 for these reads to be properly processed. Can be lowered without dramatically affecting results in most cases, particularly if *smalt_y* is set reasonably. + + +The other options specify parameters for the smalt mapper, which are discussed in more detail in the smalt manual (ftp.sanger.ac.uk/pub/resources/software/smalt/smalt-manual-0.7.4.pdf). We will discuss their effects on TraDIS mapping briefly here: + +*-smalt_k*: length of kmers hashed; roughly, the minimum length of an exact match between a read and the genome needed to trigger an alignment attempt. Appropriate values are between ~10 and 20 for bacterial genomes depending on read length. Lower values lead to increased sensitivity at the expense of runtime. + +*-smalt_s*: skipstep. Sampling step size, i.e. the distance between successive words that are hashed along the genomic reference sequence. With the option -s 1 +every word is hashed, with -s 2 every second word, with -s 3 very third etc. Appropriate values are between 1 and ~15, but should be less than --smalt_k to ensure kmers overlap. Lower values lead to increased sensitivity at the expense of runtime. + + +*-smalt_y*: minimum percentage of identical bases between read and reference, defaults to .96 - 96% identity, or 4 mismatches allowed in a 100 base read. May be lowered to improve sensitivity in the case of low quality or short reads. + + +*-smalt_r*: specifies what to do with reads that map equally well in multiple locations. By default this is set to -1, meaning that multi-mapping reads are left unmapped. This is appropriate in studies comparing insertion frequency in the same library passaged through multiple conditions, as in this case a change in frequency of one repetitive gene could lead to many genes appearing to be selected artifactually. For studies of gene essentiality in a newly created library, this should be set to 0 (randomly assign a position) to avoid repetitive elements (particularly insertion sequences and the like) artificially appearing to be essential. + +----- + +**Output files** + +On completion, bacteria tradis produces a number of files. These include: +**(input list name).stats** : Mapping statistics file. This is comma delimited, and includes one line for each library mapped along with a header. It can be easily opened in e.g. Excel or R. +**(library name.replicon_name).insert_site_plot.gz**: Plot files, one for each replicon and library. These contain insertion counts on each strand for every nucleotide position in the replicon. They can be opened as “user plots” in the Artemis genome browser, and will be used for further analysis. +**(library name).mapped.bam** : BAM file containing mapped reads. + +----- + +**More information** + +.. class:: infomark + +Additional information about Bio-TraDis can be found at https://github.com/sanger-pathogens/Bio-Tradis +]]> + </help> + +<expand macro="citations" /> + + </tool> + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Wed Jan 29 10:41:06 2020 -0500 @@ -0,0 +1,14 @@ +<?xml version="1.0"?> +<macros> + <token name="@VERSION@">1</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="1.4.5">biotradis</requirement> + </requirements> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1093/bioinformatics/btw022</citation> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/file.stats Wed Jan 29 10:41:06 2020 -0500 @@ -0,0 +1,2 @@ +File,Total Reads,Reads Matched,% Matched,Reads Mapped,% Mapped,Unique Insertion Sites : CP009273.1:60-120,Seq Len/UIS : CP009273.1:60-120,Total Unique Insertion Sites,Total Seq Len/Total UIS +tiny.fastq,804,804,100,367,45.6467661691542,27,2.25925925925926,27,2.25925925925926
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test.csv Wed Jan 29 10:41:06 2020 -0500 @@ -0,0 +1,27 @@ +locus_tag gene_name ncrna start end strand read_count ins_index gene_length ins_count fcn +BW25113_0001 thrL 0 190 255 1 44 0.181818181818182 66 12 "thr operon leader peptide" +BW25113_0002 thrA 0 337 2799 1 969 0.135200974421437 2463 333 "Bifunctional aspartokinase/homoserine dehydrogenase 1" +BW25113_0003 thrB 0 2801 3733 1 423 0.170418006430868 933 159 "homoserine kinase" +BW25113_0004 thrC 0 3734 5020 1 518 0.136752136752137 1287 176 "L-threonine synthase" +BW25113_0005 yaaX 0 5234 5530 1 155 0.151515151515152 297 45 "DUF2502 family putative periplasmic protein" +BW25113_0006 yaaA 0 5683 6459 -1 293 0.141570141570142 777 110 "peroxide resistance protein, lowers intracellular iron" +BW25113_0007 yaaJ 0 6529 7959 -1 600 0.140461215932914 1431 201 "putative transporter" +BW25113_0008 talB 0 8238 9191 1 492 0.161425576519916 954 154 "transaldolase B" +BW25113_0009 mog 0 9306 9893 1 239 0.158163265306122 588 93 "molybdochelatase incorporating molybdenum into molybdopterin" +BW25113_0010 satP 0 9928 10494 -1 306 0.194003527336861 567 110 "succinate-acetate transporter" +BW25113_0011 yaaW 0 10643 11356 -1 245 0.138655462184874 714 99 "UPF0174 family protein" +BW25113_0013 yaaI 0 11382 11786 -1 270 0.17037037037037 405 69 "UPF0412 family protein" +BW25113_0014 dnaK 0 12163 14079 1 96 0.0276473656755347 1917 53 "chaperone Hsp70, with co-chaperone DnaJ" +BW25113_0015 dnaJ 0 14168 15298 1 436 0.138815207780725 1131 157 "chaperone Hsp40, DnaK co-chaperone" +BW25113_0016 insL1 0 15445 16557 1 333 0.153638814016173 1113 171 "IS186 transposase" +BW25113_0018 mokC 0 16751 16960 -1 350 0.304761904761905 210 64 "regulatory protein for HokC, overlaps CDS of hokC" +BW25113_4412 hokC 0 16751 16903 -1 107 0.196078431372549 153 30 "toxic membrane protein, small" +BW25113_4413 sokC 0 16952 17006 1 124 0.4 55 22 +BW25113_0019 nhaA 0 17489 18655 1 498 0.162810625535561 1167 190 "sodium-proton antiporter" +BW25113_0020 nhaR 0 18715 19620 1 473 0.183222958057395 906 166 "transcriptional activator of nhaA" +BW25113_0021 insB1 0 19811 20314 -1 68 0.0615079365079365 504 31 "IS1 transposase B" +BW25113_0022 insA 0 20233 20508 -1 30 0.0434782608695652 276 12 "IS1 repressor TnpA" +BW25113_0023 rpsT 0 20815 21078 -1 1 0.00378787878787879 264 1 "30S ribosomal subunit protein S20" +BW25113_0024 yaaY 0 21181 21399 1 32 0.0547945205479452 219 12 "uncharacterized protein" +BW25113_0025 ribF 0 21407 22348 1 8 0.00318471337579618 942 3 "bifunctional riboflavin kinase/FAD synthetase" +BW25113_0026 ileS 0 22391 25207 1 14 0.0035498757543486 2817 10 "isoleucyl-tRNA synthetase"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test.csv.all.csv Wed Jan 29 10:41:06 2020 -0500 @@ -0,0 +1,27 @@ +locus_tag,gene_name,ncrna,start,end,strand,read_count,ins_index,gene_length,ins_count,fcn +BW25113_0001,thrL,0,190,255,1,44,0.181818181818182,66,12,thr operon leader peptide +BW25113_0002,thrA,0,337,2799,1,969,0.135200974421437,2463,333,Bifunctional aspartokinase/homoserine dehydrogenase 1 +BW25113_0003,thrB,0,2801,3733,1,423,0.170418006430868,933,159,homoserine kinase +BW25113_0004,thrC,0,3734,5020,1,518,0.136752136752137,1287,176,L-threonine synthase +BW25113_0005,yaaX,0,5234,5530,1,155,0.151515151515152,297,45,DUF2502 family putative periplasmic protein +BW25113_0006,yaaA,0,5683,6459,-1,293,0.141570141570142,777,110,peroxide resistance protein, lowers intracellular iron +BW25113_0007,yaaJ,0,6529,7959,-1,600,0.140461215932914,1431,201,putative transporter +BW25113_0008,talB,0,8238,9191,1,492,0.161425576519916,954,154,transaldolase B +BW25113_0009,mog,0,9306,9893,1,239,0.158163265306122,588,93,molybdochelatase incorporating molybdenum into molybdopterin +BW25113_0010,satP,0,9928,10494,-1,306,0.194003527336861,567,110,succinate-acetate transporter +BW25113_0011,yaaW,0,10643,11356,-1,245,0.138655462184874,714,99,UPF0174 family protein +BW25113_0013,yaaI,0,11382,11786,-1,270,0.17037037037037,405,69,UPF0412 family protein +BW25113_0014,dnaK,0,12163,14079,1,96,0.0276473656755347,1917,53,chaperone Hsp70, with co-chaperone DnaJ +BW25113_0015,dnaJ,0,14168,15298,1,436,0.138815207780725,1131,157,chaperone Hsp40, DnaK co-chaperone +BW25113_0016,insL1,0,15445,16557,1,333,0.153638814016173,1113,171,IS186 transposase +BW25113_0018,mokC,0,16751,16960,-1,350,0.304761904761905,210,64,regulatory protein for HokC, overlaps CDS of hokC +BW25113_4412,hokC,0,16751,16903,-1,107,0.196078431372549,153,30,toxic membrane protein, small +BW25113_4413,sokC,0,16952,17006,1,124,0.4,55,22, +BW25113_0019,nhaA,0,17489,18655,1,498,0.162810625535561,1167,190,sodium-proton antiporter +BW25113_0020,nhaR,0,18715,19620,1,473,0.183222958057395,906,166,transcriptional activator of nhaA +BW25113_0021,insB1,0,19811,20314,-1,68,0.0615079365079365,504,31,IS1 transposase B +BW25113_0022,insA,0,20233,20508,-1,30,0.0434782608695652,276,12,IS1 repressor TnpA +BW25113_0023,rpsT,0,20815,21078,-1,1,0.00378787878787879,264,1,30S ribosomal subunit protein S20 +BW25113_0024,yaaY,0,21181,21399,1,32,0.0547945205479452,219,12,uncharacterized protein +BW25113_0025,ribF,0,21407,22348,1,8,0.00318471337579618,942,3,bifunctional riboflavin kinase/FAD synthetase +BW25113_0026,ileS,0,22391,25207,1,14,0.0035498757543486,2817,10,isoleucyl-tRNA synthetase
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test.csv.ambig.csv Wed Jan 29 10:41:06 2020 -0500 @@ -0,0 +1,20 @@ +locus_tag,gene_name,ncrna,start,end,strand,read_count,ins_index,gene_length,ins_count,fcn +BW25113_0001,thrL,0,190,255,1,44,0.181818181818182,66,12,thr operon leader peptide +BW25113_0002,thrA,0,337,2799,1,969,0.135200974421437,2463,333,Bifunctional aspartokinase/homoserine dehydrogenase 1 +BW25113_0003,thrB,0,2801,3733,1,423,0.170418006430868,933,159,homoserine kinase +BW25113_0004,thrC,0,3734,5020,1,518,0.136752136752137,1287,176,L-threonine synthase +BW25113_0005,yaaX,0,5234,5530,1,155,0.151515151515152,297,45,DUF2502 family putative periplasmic protein +BW25113_0006,yaaA,0,5683,6459,-1,293,0.141570141570142,777,110,peroxide resistance protein, lowers intracellular iron +BW25113_0007,yaaJ,0,6529,7959,-1,600,0.140461215932914,1431,201,putative transporter +BW25113_0008,talB,0,8238,9191,1,492,0.161425576519916,954,154,transaldolase B +BW25113_0009,mog,0,9306,9893,1,239,0.158163265306122,588,93,molybdochelatase incorporating molybdenum into molybdopterin +BW25113_0010,satP,0,9928,10494,-1,306,0.194003527336861,567,110,succinate-acetate transporter +BW25113_0011,yaaW,0,10643,11356,-1,245,0.138655462184874,714,99,UPF0174 family protein +BW25113_0013,yaaI,0,11382,11786,-1,270,0.17037037037037,405,69,UPF0412 family protein +BW25113_0015,dnaJ,0,14168,15298,1,436,0.138815207780725,1131,157,chaperone Hsp40, DnaK co-chaperone +BW25113_0016,insL1,0,15445,16557,1,333,0.153638814016173,1113,171,IS186 transposase +BW25113_0018,mokC,0,16751,16960,-1,350,0.304761904761905,210,64,regulatory protein for HokC, overlaps CDS of hokC +BW25113_4412,hokC,0,16751,16903,-1,107,0.196078431372549,153,30,toxic membrane protein, small +BW25113_4413,sokC,0,16952,17006,1,124,0.4,55,22, +BW25113_0019,nhaA,0,17489,18655,1,498,0.162810625535561,1167,190,sodium-proton antiporter +BW25113_0020,nhaR,0,18715,19620,1,473,0.183222958057395,906,166,transcriptional activator of nhaA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test.csv.essen.csv Wed Jan 29 10:41:06 2020 -0500 @@ -0,0 +1,8 @@ +locus_tag,gene_name,ncrna,start,end,strand,read_count,ins_index,gene_length,ins_count,fcn +BW25113_0014,dnaK,0,12163,14079,1,96,0.0276473656755347,1917,53,chaperone Hsp70, with co-chaperone DnaJ +BW25113_0021,insB1,0,19811,20314,-1,68,0.0615079365079365,504,31,IS1 transposase B +BW25113_0022,insA,0,20233,20508,-1,30,0.0434782608695652,276,12,IS1 repressor TnpA +BW25113_0023,rpsT,0,20815,21078,-1,1,0.00378787878787879,264,1,30S ribosomal subunit protein S20 +BW25113_0024,yaaY,0,21181,21399,1,32,0.0547945205479452,219,12,uncharacterized protein +BW25113_0025,ribF,0,21407,22348,1,8,0.00318471337579618,942,3,bifunctional riboflavin kinase/FAD synthetase +BW25113_0026,ileS,0,22391,25207,1,14,0.0035498757543486,2817,10,isoleucyl-tRNA synthetase
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tiny.out.gz.CP009273.1_60_120.tradis_gene_insert_sites.csv Wed Jan 29 10:41:06 2020 -0500 @@ -0,0 +1,2 @@ +locus_tag gene_name ncrna start end strand read_count ins_index gene_length ins_count fcn +BW25113_0001 thrL 0 190 255 1 0 0 66 0 "thr operon leader peptide"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tiny_ref.embl Wed Jan 29 10:41:06 2020 -0500 @@ -0,0 +1,68 @@ +ID CP009273; SV 1; circular; genomic DNA; STD; PRO; 4631469 BP. +XX +AC CP009273; +XX +PR Project:PRJNA257976; +XX +DT 10-SEP-2014 (Rel. 122, Created) +XX +DE Escherichia coli BW25113, complete genome. +XX +KW . +XX +OS Escherichia coli BW25113 +OC Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; +OC Enterobacteriaceae; Escherichia. +XX +RN [1] +RP 1-4631469 +RA Grenier F., Matteau D., Baby V., Rodrigue S.; +RT "Complete genome sequence of Escherichia coli BW25113"; +RL Unpublished. +XX +CC ##Genome-Assembly-Data-START## +CC Assembly Method :: Newbler v. 2.6 +CC Reference-guided Assembly :: U00096.3 +CC Genome Coverage :: 100x +CC Sequencing Technology :: Illumina HiSeq; Sanger +CC ##Genome-Assembly-Data-END## +XX +FH Key Location/Qualifiers +FH +FT source 1..300 +FT /organism="Escherichia coli BW25113" +FT /strain="K-12" +FT /sub_strain="BW25113" +FT /mol_type="genomic DNA" +FT /country="USA:Indiana" +FT /collection_date="2000" +FT /note="from B. L. Wanner laboratory; genotype: rrnB3 +FT lacZ4787 hsdR514 (araBAD)567 (rhaBAD)568 rph-1" +FT /db_xref="taxon:679895" +FT /culture_collection="CGSC:7636" +FT gene 190..255 +FT /gene="thrL" +FT /gene_synonym="ECK0001" +FT /gene_synonym="JW4367" +FT /locus_tag="BW25113_0001" +FT CDS 190..255 +FT /codon_start=1 +FT /transl_table=11 +FT /gene="thrL" +FT /gene_synonym="ECK0001" +FT /gene_synonym="JW4367" +FT /locus_tag="BW25113_0001" +FT /product="thr operon leader peptide" +FT /function="leader; Amino acid biosynthesis: Threonine" +FT /db_xref="EnsemblGenomes-Gn:BW25113_0001" +FT /db_xref="EnsemblGenomes-Tr:AIN30539" +FT /protein_id="AIN30539.1" +FT /translation="MKRISTTITTTITITTGNGAG" +XX +SQ Sequence 4631469 BP; 1140509 A; 1177154 C; 1174646 G; 1139160 T; 0 other; + agcttttcat tctgactgca acgggcaata tgtctctgtg tggattaaaa aaagagtgtc 60 + tgatagcagc ttctgaactg gttacctgcc gtgagtaaat taaaatttta ttgacttagg 120 + tcactaaata ctttaaccaa tataggcata gcgcacagac agataaaaat tacagagtac 180 + acaacatcca tgaaacgcat tagcaccacc attaccacca ccatcaccat taccacaggt 240 + aacggtgcgg gctgacgcgt acaggaaaca cagaaaaaag cccgcacctg acagtgcggg 300 +//
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tiny_ref.fasta Wed Jan 29 10:41:06 2020 -0500 @@ -0,0 +1,3 @@ +>CP009273.1:60-120 +CTGATAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAG +G
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tradis_essentiality.xml Wed Jan 29 10:41:06 2020 -0500 @@ -0,0 +1,81 @@ +<tool id="tradis_essentiality" name="Bio-TraDis Essentiality Predictions" version="@VERSION@"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> + <![CDATA[ + ln -s '$input_insert_file' input.csv && + tradis_essentiality.R input.csv + ]]> + </command> + + <inputs> + <param name="input_insert_file" type="data" format="csv" label="Gene insertion counts" help="Ourput file of `Bio-TraDis counts to gene insertion data` tool"/> + </inputs> + + <outputs> + + <data name="ess_genes" format="csv" label="${tool.name} on ${on_string} : Essential genes" from_work_dir="input.csv.essen.csv"/> + <data name="ambig_genes" format="csv" label="${tool.name} on ${on_string} : Unclassified genes" from_work_dir="input.csv.ambig.csv"/> + <data name="all_genes" format="csv" label="${tool.name} on ${on_string} : All genes" from_work_dir="input.csv.all.csv"/> + <data name="qc_results" format="pdf" label="${tool.name} on ${on_string} : QC report" from_work_dir="input.csv.QC_and_changepoint_plots.pdf"/> + </outputs> + + <tests> + <test> + <param name="input_insert_file" ftype="csv" value="test.csv"/> + <output name="ess_genes" file="test.csv.essen.csv" compare="sim_size" delta="200" /> + <output name="ambig_genes" file="test.csv.ambig.csv" compare="sim_size" delta="200" /> + <output name="all_genes" file="test.csv.all.csv" compare="sim_size" delta="200" /> + </test> + </tests> + <help> +<![CDATA[ + +**What is does** + +Bio-TraDis provides software utilities for the processing, mapping, and analysis of transposon insertion sequencing data. The pipeline was designed with the data from the TraDIS sequencing protocol in mind, but should work with a variety of transposon insertion sequencing protocols as long as they produce data in the expected format. + +tradis_essentiality tool performs an Essentiality analysis using the annotation counts output of tradis_gene_insert_sites tool + +----- + +**Output files** + +- Essential genes table : Table containing the essential genes +- Unclassified genes : Table containing genes that couldn't be classified as essential or non essential +- All genes : Table containing all genes +- QC report : PDF file containing the regression plot + +All tables contain the following columns: +- locus_tag +- gene_name +- ncrna : Non-coding RNA, 1 if the feature is a non coding DNA, 0 if not. +- start +- end +- strand +- read_count : Total number of reads mapping on the feature +- ins_index : Insertion index, number of insertion divided by the gene length +- gene_length +- ins_count : Number of insertion within the feature. +- fcn : Function + +----- + +**More information** + +.. class:: infomark + +Additional information about Bio-TraDis can be found at https://github.com/sanger-pathogens/Bio-Tradis +]]> + </help> + +<expand macro="citations" /> + + </tool> + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tradis_gene_insert_sites.xml Wed Jan 29 10:41:06 2020 -0500 @@ -0,0 +1,76 @@ +<tool id="tradis_gene_insert_sites" name="Bio-TraDis counts to gene insertion data" version="@VERSION@"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> + <![CDATA[ + tradis_gene_insert_sites -trim3 '$trim3' -trim5 '$trim5' '$input_annot' '$input_counts' + ]]> + </command> + + <inputs> + <param name="input_counts" type="data" format="tabular" label="Read counts" help="Read counts at each nucleotide position for both strands. The first column contains forward strand counts, and the second contains reverse strand counts."/> + <param name="input_annot" type="data" format="txt" label="EMBL file containing the reference genome annotations"/> + <param name="trim3" type="float" value="0.1" min="0" max="1" label="Portion of the gene to trim in 3' end'" help="Trim reads at the 3'end of genes. Some transposon insertions can be little disruptive when they occur end of genes. Trim to ignore these reads."/> + <param name="trim5" type="float" value="0" min="0" max="1" label="Portion of the gene to trim" help="Trim reads at the 5'end of genes. Some transposon insertions can be little disruptive when they occur end of genes. Trim to ignore these reads."/> + + </inputs> + + <outputs> + <data name="gene_insert_sites" format="csv" from_work_dir="./*.csv"/> + </outputs> + + <tests> + <test> + <param name="input_counts" ftype="tabular" value="tiny.out.gz.CP009273.1_60_120.insert_site_plot.gz"/> + <param name="input_annot" ftype="txt" value="tiny_ref.embl"/> + <param name="trim3" ftype="float" value="0"/> + <param name="trim5" ftype="float" value="0"/> + <output name="gene_insert_sites" file="tiny.out.gz.CP009273.1_60_120.tradis_gene_insert_sites.csv" compare="sim_size" delta="200" /> + </test> + </tests> + <help> +<![CDATA[ + +**What is does** + +Bio-TraDis provides software utilities for the processing, mapping, and analysis of transposon insertion sequencing data. The pipeline was designed with the data from the TraDIS sequencing protocol in mind, but should work with a variety of transposon insertion sequencing protocols as long as they produce data in the expected format. + +tradis_gene_insert_sites that combine the counts at each position and an annotation file to provide the number of reads and insertion for each feature. + +----- + +**Output files** + +The tool outputs a table containing the columns : +- locus_tag +- gene_name +- ncrna : Is the feature a non-coding DNA. 1=yes, 0=no. +- start +- end +- strand +- read_count : Numer of reads mapping on the feature +- ins_index : Insertion index, number of insertion divided by the gene length +- gene_length +- ins_count : Number of insertion within the feature. +- fcn : Function + +----- + +**More information** + +.. class:: infomark + +Additional information about Bio-TraDis can be found at https://github.com/sanger-pathogens/Bio-Tradis +]]> + </help> + +<expand macro="citations" /> + + </tool> + + + +