Mercurial > repos > devteam > fastx_quality_statistics

diff fastx_quality_statistics.xml @ 0:45f077341b24
Uploaded tool tarball.
author: devteam
date: Wed, 25 Sep 2013 11:15:30 -0400
children: c2af34024061
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastx_quality_statistics.xml	Wed Sep 25 11:15:30 2013 -0400
@@ -0,0 +1,72 @@
+<tool id="cshl_fastx_quality_statistics" version="1.0.0" name="Compute quality statistics">
+	<description></description>
+    <requirements>
+        <requirement type="package" version="0.0.13">fastx_toolkit</requirement>
+    </requirements>
+	<command>zcat -f $input | fastx_quality_stats -o $output -Q 33</command>
+
+	<inputs>
+		<param format="fastqsanger" version="1.0.0" name="input" type="data" label="Library to analyse" />
+	</inputs>
+
+	<tests>
+		<test>
+			<param version="1.0.0" name="input" value="fastq_stats1.fastq" ftype="fastqsanger"/>
+			<output version="1.0.0" name="output" file="fastq_stats1.out" />
+		</test>
+	</tests>
+
+	<outputs>
+		<data format="txt" version="1.0.0" name="output" metadata_source="input" />
+	</outputs>
+
+<help>
+
+**What it does**
+
+Creates quality statistics report for the given Solexa/FASTQ library.
+
+.. class:: infomark
+
+**TIP:** This statistics report can be used as input for **Quality Score** and **Nucleotides Distribution** tools.
+
+-----
+
+**The output file will contain the following fields:**
+
+* column	= column number (1 to 36 for a 36-cycles read Solexa file)
+* count   = number of bases found in this column.
+* min     = Lowest quality score value found in this column.
+* max     = Highest quality score value found in this column.
+* sum     = Sum of quality score values for this column.
+* mean    = Mean quality score value for this column.
+* Q1	= 1st quartile quality score.
+* med	= Median quality score.
+* Q3	= 3rd quartile quality score.
+* IQR	= Inter-Quartile range (Q3-Q1).
+* lW	= 'Left-Whisker' value (for boxplotting).
+* rW	= 'Right-Whisker' value (for boxplotting).
+* A_Count	= Count of 'A' nucleotides found in this column.
+* C_Count	= Count of 'C' nucleotides found in this column.
+* G_Count	= Count of 'G' nucleotides found in this column.
+* T_Count	= Count of 'T' nucleotides found in this column.
+* N_Count = Count of 'N' nucleotides found in this column.  
+
+
+For example::
+
+     1  6362991 -4 40 250734117 39.41 40 40 40  0 40 40 1396976 1329101  678730 2958184   0
+     2  6362991 -5 40 250531036 39.37 40 40 40  0 40 40 1786786 1055766 1738025 1782414   0
+     3  6362991 -5 40 248722469 39.09 40 40 40  0 40 40 2296384  984875 1443989 1637743   0
+     4  6362991 -4 40 248214827 39.01 40 40 40  0 40 40 2536861 1167423 1248968 1409739   0
+    36  6362991 -5 40 117158566 18.41  7 15 30 23 -5 40 4074444 1402980   63287  822035 245
+    
+------
+
+This tool is based on `FASTX-toolkit`__ by Assaf Gordon.
+
+ .. __: http://hannonlab.cshl.edu/fastx_toolkit/
+
+</help>
+<!-- FASTQ-Statistics is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) -->
+</tool>
author	devteam
date	Wed, 25 Sep 2013 11:15:30 -0400
parents
children	c2af34024061