Mercurial > repos > galaxyp > nbic_fasta
diff FastaStats.xml @ 0:163892325845 draft default tip
Initial commit.
author | galaxyp |
---|---|
date | Fri, 10 May 2013 17:15:08 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/FastaStats.xml Fri May 10 17:15:08 2013 -0400 @@ -0,0 +1,87 @@ +<!-- +# ===================================================== +# $Id: FastaStats.xml 90 2011-01-19 13:20:31Z pieter.neerincx@gmail.com $ +# $URL: https://trac.nbic.nl/svn/galaxytools/trunk/tools/general/FastaTools/FastaStats.xml $ +# $LastChangedDate: 2011-01-19 07:20:31 -0600 (Wed, 19 Jan 2011) $ +# $LastChangedRevision: 90 $ +# $LastChangedBy: pieter.neerincx@gmail.com $ +# ===================================================== +--> +<tool id="FastaStats1" name="FastaStats"> + <description>List statistics for sequences in a FASTA file</description> + <command interpreter="perl">FastaStats.pl $get_positional_composition_stats -i $input -o $output -l WARN</command> + <inputs> + <param format="fasta" name="input" type="data" label="FASTA sequences"/> + <param name="get_positional_composition_stats" type="boolean" truevalue="-p" falsevalue="" optional="true" label="Calculate positional acid frequencies"/> + </inputs> + <outputs> + <data format="txt" name="output" label="FASTA Statistics for ${input.name}"/> + </outputs> + <tests> + <test> + <param name="input" value="fasta_2_proteins.fasta" ftype="fasta"/> + <output name="output" file="FastaStats_example_output.txt"/> + </test> + </tests> + <help> + +.. class:: infomark + +**What it does** + +This tool analyzes a collection of sequences in FASTA format and reports: \ + + - The total number of sequences. + - The total number of nucleotide or amino acids. + - The total frequency of nucleotide or amino acids. + - The positional frequency of nucleotide or amino acids (optional). + +----- + +**Example** + +If the FASTA sequence collection contains these two sequences:: + + >UniProtKB:Q42593 L-ascorbate peroxidase T, chloroplastic; + MSVSLSAASHLLCSSTRVSLSPAVTSSSSSPVVALSSSTSPHSLGSVASSSLFPHSSFVL + QKKHPINGTSTRMISPKCAASDAAQLISAKEDIKVLLRTKFCHPILVRLGWHDAGTYNKN + IEEWPLRGGANGSLRFEAELKHAANAGLLNALKLIQPLKDKYPNISYADLFQLASATAIE + EAGGPDIPMKYGRVDVVAPEQCPEEGRLPDAGPPSPADHLRDVFYRMGLDDKEIVALSGA + HTLGRARPDRSGWGKPETKYTKTGPGEAGGQSWTVKWLKFDNSYFKDIKEKRDDDLLVLP + TDAALFEDPSFKNYAEKYAEDVAAFFKDYAEAHAKLSNLGAKFDPPEGIVIENVPEKFVA + AKYSTGKKELSDSMKKKIRAEYEAIGGSPDKPLPTNYFLNIIIAIGVLVLLSTLFGGNNN + SDFSGF + >UniProtKB:A0MQ79 Ascorbate peroxidase; + MVKNYPVVSEEYLIAVDKAKKKLRGFIAEKNCAPLMLRLAWHSAGTFDQCSRTGGPFGTM + RFKAEQAHSANNGIDIAIRLLEPIKEQFPILSYADFYQLAGVVAVEVTGGPEVPFHPGRP + DKEEPPVEGRLPDAYKGSDHLRDVFIKQMGLSDQDIVALSGGHTLGRCHKERSGFEGPWT + ENPLIFDNSYFKELVCGERDGLLQLPSDKALLADPVFHPLVEKYAADEDAFFADYAEAHL + KLSELGFADA + +The reported stats (without optional positional acid frequencies) will be this:: + + Sequences 2 + Acid A 69 + Acid C 8 + Acid D 44 + Acid E 44 + Acid F 33 + Acid G 52 + Acid H 18 + Acid I 30 + Acid K 50 + Acid L 67 + Acid M 9 + Acid N 22 + Acid P 46 + Acid Q 13 + Acid R 26 + Acid S 57 + Acid T 23 + Acid V 37 + Acid W 7 + Acid Y 21 + Total acids 676 + + </help> +</tool>