Mercurial > repos > nml > smalt_index
changeset 0:4b79af35baf9 draft default tip
planemo upload for repository https://sourceforge.net/projects/smalt/ commit 008f4667b70be22e9ddf496738b3f74bb942ed28
author | nml |
---|---|
date | Tue, 19 Sep 2017 16:38:57 -0400 |
parents | |
children | |
files | smalt_index.xml test-data/output.sma test-data/output.smi test-data/ref.fasta |
diffstat | 4 files changed, 78 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smalt_index.xml Tue Sep 19 16:38:57 2017 -0400 @@ -0,0 +1,62 @@ +<tool id="smalt_index" name="smalt index" version="1.2.0"> + <description>Index a reference </description> + <requirements> + <requirement type="package" version="0.7.6">smalt</requirement> + </requirements> + <stdio> + <exit_code range="1:" level="fatal" description="Unknown error" /> + </stdio> + <command> + smalt index + #if $k: + -k "$k" + #end if + + #if $s: + -s "$s" + #end if + 'temp' "$reference" + </command> + <inputs> + <param name="reference" type="data" format="fasta" label="Fasta reference file"/> + <param name="k" type="integer" value="13" label="K-mer size" help="Specifies the word length. [wordlen] is an integer within the limits. between 3 and 20. The default word length is 13" max="20" min="3"/> + <param name="s" type="integer" optional="true" label="Step size" help="Specifies how many bases are skipped between indexed words."/> + </inputs> + <outputs> + <data name="output" label="SMI" from_work_dir="temp.smi" format="binary"/> + <data name="output2" label="SMA" from_work_dir="temp.sma" format="binary"/> + </outputs> + <tests> + <test> + <param name="reference" value="ref.fasta"/> + <output name="output" file="output.smi"/> + <output name="output2" file="output.sma"/> + </test> + </tests> + <help> + +**What it does** + +Generates an index of k-mer words for the genomic reference sequences. The words are of fixed length <wordlen> and are sampled at equidistant steps <stepsiz> bases apart. The reference sequences are provided in a single file <reference_file> in FASTA or FASTQ format. Two binary files are output. The file <index_name>.sma contains the reference sequences in compressed form. The file <index_name>.smi contains the k-mer word index. + + +------ + +Please cite the website "http://www.sanger.ac.uk/resources/software/smalt/". + +------ + + -k <wordlen> + Specifies the word length. <wordlen> is an integer within the limits + 3 < wordlen <= 20. The default word length is 13. + + -s <stepsiz> + Specifies how many bases are skipped between indexed words. With '-s 1' + every k-mer word along the reference sequences is indexed. With '-s 2' + every other word is indexed etc. By default the step size is set equal + to the word length (tiling words). + + + </help> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ref.fasta Tue Sep 19 16:38:57 2017 -0400 @@ -0,0 +1,16 @@ +>gi|49175990|ref|NC_000913.2|_Escherichia_coli_str._K-12_substr._MG1655,_complete_genome,_cropped_to_first_1000_nucleotides +AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC +TTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAA +TATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATGAAACGCATTAGCACCACC +ATTACCACCACCATCACCATTACCACAGGTAACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAG +CCCGCACCTGACAGTGCGGGCTTTTTTTTTCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAA +GTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCC +AGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTGGCGATGATTG +AAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAACGTATTTTTGCCGAACTTTT +GACGGGACTCGCCGCCGCCCAGCCGGGGTTCCCGCTGGCGCAATTGAAAACTTTCGTCGATCAGGAATTT +GCCCAAATAAAACATGTCCTGCATGGCATTAGTTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGC +TGATTTGCCGTGGCGAGAAAATGTCGATCGCCATTATGGCCGGCGTATTAGAAGCGCGCGGTCACAACGT +TACTGTTATCGATCCGGTCGAAAAACTGCTGGCAGTGGGGCATTACCTCGAATCTACCGTCGATATTGCT +GAGTCCACCCGCCGTATTGCGGCAAGCCGCATTCCGGCTGATCACATGGTGCTGATGGCAGGTTTCACCG +CCGGTAATGAAAAAGGCGAACTGGTGGTGCTTGGACGCAACGGTTCCGACTACTCTGCTGCGGTGCTGGC +TGCCTGTTTACGCGCCGATT