sharplabtool: tools/fasta_tools/fasta_to

annotate tools/fasta_tools/fasta_to_tabular.xml @ 1:cdcb0ce84a1b

Uploaded

author	xuebing
date	Fri, 09 Mar 2012 19:45:15 -0500
parents	9071e359b9a3
children

rev	line source
0 9071e359b9a3 Uploaded xuebing parents: diff changeset	1 <tool id="fasta2tab" name="FASTA-to-Tabular" version="1.1.0">
9071e359b9a3 Uploaded xuebing parents: diff changeset	2 <description>converter</description>
9071e359b9a3 Uploaded xuebing parents: diff changeset	3 <command interpreter="python">fasta_to_tabular.py $input $output $keep_first $descr_columns</command>
9071e359b9a3 Uploaded xuebing parents: diff changeset	4 <inputs>
9071e359b9a3 Uploaded xuebing parents: diff changeset	5 <param name="input" type="data" format="fasta" label="Convert these sequences"/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	6 <param name="descr_columns" type="integer" size="2" value="1" label="How many columns to divide title string into?" help="Typically 2 to take the ID (first word) and decription (rest) as two columns, or 1 to give a single column">
9071e359b9a3 Uploaded xuebing parents: diff changeset	7 <validator type="in_range" min="1" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	8 </param>
9071e359b9a3 Uploaded xuebing parents: diff changeset	9 <param name="keep_first" type="integer" size="5" value="0" label="How many title characters to keep?" help="Applies only to the first column taken from the title string ('0' = keep the whole thing), useful when your sequence identifiers are all the same length.">
9071e359b9a3 Uploaded xuebing parents: diff changeset	10 <validator type="in_range" min="0" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	11 </param>
9071e359b9a3 Uploaded xuebing parents: diff changeset	12 </inputs>
9071e359b9a3 Uploaded xuebing parents: diff changeset	13 <outputs>
9071e359b9a3 Uploaded xuebing parents: diff changeset	14 <data name="output" format="tabular"/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	15 </outputs>
9071e359b9a3 Uploaded xuebing parents: diff changeset	16 <tests>
9071e359b9a3 Uploaded xuebing parents: diff changeset	17 <test>
9071e359b9a3 Uploaded xuebing parents: diff changeset	18 <param name="input" value="454.fasta" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	19 <param name="descr_columns" value="1"/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	20 <param name="keep_first" value="0"/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	21 <output name="output" file="fasta_to_tabular_out1.tabular" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	22 </test>
9071e359b9a3 Uploaded xuebing parents: diff changeset	23
9071e359b9a3 Uploaded xuebing parents: diff changeset	24 <test>
9071e359b9a3 Uploaded xuebing parents: diff changeset	25 <param name="input" value="4.fasta" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	26 <param name="descr_columns" value="1"/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	27 <param name="keep_first" value="0"/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	28 <output name="output" file="fasta_to_tabular_out2.tabular" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	29 </test>
9071e359b9a3 Uploaded xuebing parents: diff changeset	30
9071e359b9a3 Uploaded xuebing parents: diff changeset	31 <test>
9071e359b9a3 Uploaded xuebing parents: diff changeset	32 <param name="input" value="454.fasta" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	33 <param name="descr_columns" value="1"/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	34 <param name="keep_first" value="14"/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	35 <output name="output" file="fasta_to_tabular_out3.tabular" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	36 </test>
9071e359b9a3 Uploaded xuebing parents: diff changeset	37
9071e359b9a3 Uploaded xuebing parents: diff changeset	38 <test>
9071e359b9a3 Uploaded xuebing parents: diff changeset	39 <param name="input" value="454.fasta" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	40 <param name="descr_columns" value="2"/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	41 <param name="keep_first" value="0"/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	42 <output name="output" file="fasta_to_tabular_out4.tabular" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	43 </test>
9071e359b9a3 Uploaded xuebing parents: diff changeset	44
9071e359b9a3 Uploaded xuebing parents: diff changeset	45 <test>
9071e359b9a3 Uploaded xuebing parents: diff changeset	46 <param name="input" value="454.fasta" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	47 <param name="descr_columns" value="5"/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	48 <param name="keep_first" value="0"/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	49 <output name="output" file="fasta_to_tabular_out5.tabular" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	50 </test>
9071e359b9a3 Uploaded xuebing parents: diff changeset	51
9071e359b9a3 Uploaded xuebing parents: diff changeset	52 <test>
9071e359b9a3 Uploaded xuebing parents: diff changeset	53 <param name="input" value="454.fasta" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	54 <param name="descr_columns" value="5"/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	55 <param name="keep_first" value="10"/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	56 <output name="output" file="fasta_to_tabular_out6.tabular" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	57 </test>
9071e359b9a3 Uploaded xuebing parents: diff changeset	58
9071e359b9a3 Uploaded xuebing parents: diff changeset	59 </tests>
9071e359b9a3 Uploaded xuebing parents: diff changeset	60 <help>
9071e359b9a3 Uploaded xuebing parents: diff changeset	61
9071e359b9a3 Uploaded xuebing parents: diff changeset	62 What it does
9071e359b9a3 Uploaded xuebing parents: diff changeset	63
9071e359b9a3 Uploaded xuebing parents: diff changeset	64 This tool converts FASTA formatted sequences to TAB-delimited format.
9071e359b9a3 Uploaded xuebing parents: diff changeset	65
9071e359b9a3 Uploaded xuebing parents: diff changeset	66 Many tools consider the first word of the FASTA ">" title line to be an identifier, and any remaining text to be a free form description.
9071e359b9a3 Uploaded xuebing parents: diff changeset	67 It is therefore useful to split this text into two columns in Galaxy (identifier and any description) by setting How many columns to divide title string into? to 2.
9071e359b9a3 Uploaded xuebing parents: diff changeset	68 In some cases the description can be usefully broken up into more columns -- see the examples .
9071e359b9a3 Uploaded xuebing parents: diff changeset	69
9071e359b9a3 Uploaded xuebing parents: diff changeset	70 The option How many characters to keep? allows to select a specified number of letters from the beginning of each FASTA entry.
9071e359b9a3 Uploaded xuebing parents: diff changeset	71 With the introduction of the How many columns to divide title string into? option this setting is of limited use, but does still allow you to truncate the identifier.
9071e359b9a3 Uploaded xuebing parents: diff changeset	72
9071e359b9a3 Uploaded xuebing parents: diff changeset	73 -----
9071e359b9a3 Uploaded xuebing parents: diff changeset	74
9071e359b9a3 Uploaded xuebing parents: diff changeset	75 Example
9071e359b9a3 Uploaded xuebing parents: diff changeset	76
9071e359b9a3 Uploaded xuebing parents: diff changeset	77 Suppose you have the following FASTA formatted sequences from a Roche (454) FLX sequencing run::
9071e359b9a3 Uploaded xuebing parents: diff changeset	78
9071e359b9a3 Uploaded xuebing parents: diff changeset	79 >EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_
9071e359b9a3 Uploaded xuebing parents: diff changeset	80 TCCGCGCCGAGCATGCCCATCTTGGATTCCGGCGCGATGACCATCGCCCGCTCCACCACG
9071e359b9a3 Uploaded xuebing parents: diff changeset	81 TTCGGCCGGCCCTTCTCGTCGAGGAATGACACCAGCGCTTCGCCCACG
9071e359b9a3 Uploaded xuebing parents: diff changeset	82 >EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_
9071e359b9a3 Uploaded xuebing parents: diff changeset	83 AATAAAACTAAATCAGCAAAGACTGGCAAATACTCACAGGCTTATACAATACAAATGTAA
9071e359b9a3 Uploaded xuebing parents: diff changeset	84
9071e359b9a3 Uploaded xuebing parents: diff changeset	85 Running this tool with the default settings will produce this (2 column output):
9071e359b9a3 Uploaded xuebing parents: diff changeset	86
9071e359b9a3 Uploaded xuebing parents: diff changeset	87 ========================================================================== =======================================
9071e359b9a3 Uploaded xuebing parents: diff changeset	88 EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
9071e359b9a3 Uploaded xuebing parents: diff changeset	89 EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
9071e359b9a3 Uploaded xuebing parents: diff changeset	90 ========================================================================== =======================================
9071e359b9a3 Uploaded xuebing parents: diff changeset	91
9071e359b9a3 Uploaded xuebing parents: diff changeset	92 Having the full title line (the FASTA ">" line text) as a column is not always ideal.
9071e359b9a3 Uploaded xuebing parents: diff changeset	93
9071e359b9a3 Uploaded xuebing parents: diff changeset	94 The How many characters to keep? option is useful if your identifiers are all the same length.
9071e359b9a3 Uploaded xuebing parents: diff changeset	95 In this example the identifier is 14 characters, so setting How many characters to keep? to 14 (and leaving How many columns to divide title string into? as the default, 1) will produce this (2 column output):
9071e359b9a3 Uploaded xuebing parents: diff changeset	96
9071e359b9a3 Uploaded xuebing parents: diff changeset	97 ============== =======================================
9071e359b9a3 Uploaded xuebing parents: diff changeset	98 EYKX4VC02EQLO5 TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
9071e359b9a3 Uploaded xuebing parents: diff changeset	99 EYKX4VC02D4GS2 AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
9071e359b9a3 Uploaded xuebing parents: diff changeset	100 ============== =======================================
9071e359b9a3 Uploaded xuebing parents: diff changeset	101
9071e359b9a3 Uploaded xuebing parents: diff changeset	102 If however your FASTA file has identifiers of variable length, it is better to split the text into at least two columns.
9071e359b9a3 Uploaded xuebing parents: diff changeset	103 Running this tool with How many columns to divide title string into? to 2 will produce this (3 column output):
9071e359b9a3 Uploaded xuebing parents: diff changeset	104
9071e359b9a3 Uploaded xuebing parents: diff changeset	105 ============== =========================================================== =======================================
9071e359b9a3 Uploaded xuebing parents: diff changeset	106 EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
9071e359b9a3 Uploaded xuebing parents: diff changeset	107 EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
9071e359b9a3 Uploaded xuebing parents: diff changeset	108 ============== =========================================================== =======================================
9071e359b9a3 Uploaded xuebing parents: diff changeset	109
9071e359b9a3 Uploaded xuebing parents: diff changeset	110 Running this tool with How many columns to divide title string into? to 5 will produce this (5 column output):
9071e359b9a3 Uploaded xuebing parents: diff changeset	111
9071e359b9a3 Uploaded xuebing parents: diff changeset	112 ============== ========== ============ ======== ========================== =======================================
9071e359b9a3 Uploaded xuebing parents: diff changeset	113 EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
9071e359b9a3 Uploaded xuebing parents: diff changeset	114 EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
9071e359b9a3 Uploaded xuebing parents: diff changeset	115 ============== ========== ============ ======== ========================== =======================================
9071e359b9a3 Uploaded xuebing parents: diff changeset	116
9071e359b9a3 Uploaded xuebing parents: diff changeset	117 Running this tool with How many columns to divide title string into? to 5 and How many characters to keep? to 10 will produce this (5 column output).
9071e359b9a3 Uploaded xuebing parents: diff changeset	118 Notice that only the first column is truncated to 10 characters -- and be careful not to trim your sequence names too much (generally they should be unique):
9071e359b9a3 Uploaded xuebing parents: diff changeset	119
9071e359b9a3 Uploaded xuebing parents: diff changeset	120 ========== ========== ============ ======== ========================== =======================================
9071e359b9a3 Uploaded xuebing parents: diff changeset	121 EYKX4VC02E length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_ TCCGCGCCGAGCATGCCCATCTTGGATTCCGGC...ACG
9071e359b9a3 Uploaded xuebing parents: diff changeset	122 EYKX4VC02D length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_ AATAAAACTAAATCAGCAAAGACTGGCAAATAC...TAA
9071e359b9a3 Uploaded xuebing parents: diff changeset	123 ========== ========== ============ ======== ========================== =======================================
9071e359b9a3 Uploaded xuebing parents: diff changeset	124
9071e359b9a3 Uploaded xuebing parents: diff changeset	125 Note the sequences have been truncated for display purposes in the above tables.
9071e359b9a3 Uploaded xuebing parents: diff changeset	126
9071e359b9a3 Uploaded xuebing parents: diff changeset	127 </help>
9071e359b9a3 Uploaded xuebing parents: diff changeset	128 </tool>

Mercurial > repos > xuebing > sharplabtool

annotate tools/fasta_tools/fasta_to_tabular.xml @ 1:cdcb0ce84a1b