Mercurial > repos > nml > smalt_index

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/smalt_index.xml	Tue Sep 19 16:38:57 2017 -0400
@@ -0,0 +1,62 @@
+<tool id="smalt_index" name="smalt index" version="1.2.0">
+  <description>Index a reference </description>
+  <requirements>
+    <requirement type="package" version="0.7.6">smalt</requirement>
+  </requirements>
+   <stdio>
+    <exit_code range="1:"   level="fatal"   description="Unknown error" />
+  </stdio>
+ <command>
+    smalt index
+    #if $k:
+    -k "$k"
+    #end if
+
+    #if $s:
+    -s "$s"
+    #end if
+    'temp' "$reference"
+  </command>
+  <inputs>
+    <param name="reference" type="data" format="fasta" label="Fasta reference file"/>
+    <param name="k" type="integer" value="13" label="K-mer size" help="Specifies the word length. [wordlen] is an integer within the limits. between 3 and 20. The default word length is 13" max="20" min="3"/>
+    <param name="s" type="integer" optional="true" label="Step size" help="Specifies how many bases are skipped between indexed words."/>
+  </inputs>
+  <outputs>
+    <data name="output" label="SMI" from_work_dir="temp.smi" format="binary"/>
+    <data name="output2" label="SMA" from_work_dir="temp.sma" format="binary"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="reference" value="ref.fasta"/>
+      <output name="output" file="output.smi"/>
+      <output name="output2" file="output.sma"/>
+    </test>
+  </tests>
+  <help>
+
+**What it does**
+
+Generates an index of k-mer words for the genomic reference sequences. The words are of fixed length &#060;wordlen&#062; and are sampled at equidistant steps &#060;stepsiz&#062; bases apart. The reference sequences are provided in a single file &#060;reference_file&#062; in FASTA or FASTQ format. Two binary files are output. The file &#060;index_name&#062;.sma contains the  reference sequences in compressed form. The file &#060;index_name&#062;.smi contains the k-mer word index.
+
+
+------
+
+Please cite the website "http://www.sanger.ac.uk/resources/software/smalt/".
+
+------
+
+  -k &#060;wordlen&#062;
+     Specifies the word length. &#060;wordlen&#062; is an integer within the limits
+     3 &#060; wordlen &#060;= 20. The default word length is 13.
+
+  -s &#060;stepsiz&#062;
+     Specifies how many bases are skipped between indexed words. With '-s 1'
+     every k-mer word along the reference sequences is indexed. With '-s 2'
+     every other word is indexed etc. By default the step size is set equal
+     to the word length (tiling words).
+
+
+  </help>
+</tool>
+
Binary file test-data/output.sma has changed
Binary file test-data/output.smi has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ref.fasta	Tue Sep 19 16:38:57 2017 -0400
@@ -0,0 +1,16 @@
+>gi|49175990|ref|NC_000913.2|_Escherichia_coli_str._K-12_substr._MG1655,_complete_genome,_cropped_to_first_1000_nucleotides
+AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC
+TTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAA
+TATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATGAAACGCATTAGCACCACC
+ATTACCACCACCATCACCATTACCACAGGTAACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAG
+CCCGCACCTGACAGTGCGGGCTTTTTTTTTCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAA
+GTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCC
+AGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTGGCGATGATTG
+AAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAACGTATTTTTGCCGAACTTTT
+GACGGGACTCGCCGCCGCCCAGCCGGGGTTCCCGCTGGCGCAATTGAAAACTTTCGTCGATCAGGAATTT
+GCCCAAATAAAACATGTCCTGCATGGCATTAGTTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGC
+TGATTTGCCGTGGCGAGAAAATGTCGATCGCCATTATGGCCGGCGTATTAGAAGCGCGCGGTCACAACGT
+TACTGTTATCGATCCGGTCGAAAAACTGCTGGCAGTGGGGCATTACCTCGAATCTACCGTCGATATTGCT
+GAGTCCACCCGCCGTATTGCGGCAAGCCGCATTCCGGCTGATCACATGGTGCTGATGGCAGGTTTCACCG
+CCGGTAATGAAAAAGGCGAACTGGTGGTGCTTGGACGCAACGGTTCCGACTACTCTGCTGCGGTGCTGGC
+TGCCTGTTTACGCGCCGATT