diff fastx_quality_statistics.xml @ 0:78a7d28f2a15 draft

Uploaded
author idot
date Wed, 10 Jul 2013 06:13:48 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastx_quality_statistics.xml	Wed Jul 10 06:13:48 2013 -0400
@@ -0,0 +1,112 @@
+<tool id="cshl_fastx_quality_statistics" name="Compute quality statistics">
+	<description></description>
+	<command>
+cat '$input' |
+fastx_quality_stats
+#if $input.ext == "fastqsanger":
+ -Q 33
+#elif $input.ext == "fastq":
+ -Q 64
+#end if
+ -o '$output'
+</command>
+
+	<inputs>
+		<param format="fasta,fastq,fastqsanger" name="input" type="data" label="Library to analyse" />
+	</inputs>
+
+	<tests>
+		<test>
+			<param name="input" value="fastq_stats1.fastq" ftype="fastq"/>
+			<output name="output" file="fastq_stats1.out" />
+		</test>
+	</tests>
+
+	<outputs>
+		<data format="txt" name="output" metadata_source="input"
+		/>
+	</outputs>
+
+<help>
+
+**What it does**
+
+Creates quality statistics report for the given Solexa/FASTQ library.
+
+.. class:: infomark
+
+**TIP:** This statistics report can be used as input for **Quality Score** and **Nucleotides Distribution** tools.
+
+-----
+
+**The output file will contain the following fields:**
+
+* column	= column number (1 to 36 for a 36-cycles read FASTQ file)
+* count   = number of bases found in this column.
+* min     = Lowest quality score value found in this column.
+* max     = Highest quality score value found in this column.
+* sum     = Sum of quality score values for this column.
+* mean    = Mean quality score value for this column.
+* Q1	= 1st quartile quality score.
+* med	= Median quality score.
+* Q3	= 3rd quartile quality score.
+* IQR	= Inter-Quartile range (Q3-Q1).
+* lW	= 'Left-Whisker' value (for boxplotting).
+* rW	= 'Right-Whisker' value (for boxplotting).
+* A_Count	= Count of 'A' nucleotides found in this column.
+* C_Count	= Count of 'C' nucleotides found in this column.
+* G_Count	= Count of 'G' nucleotides found in this column.
+* T_Count	= Count of 'T' nucleotides found in this column.
+* N_Count = Count of 'N' nucleotides found in this column.  
+
+
+
+**Output Example**::
+
+    column	count	min	max	sum	mean	Q1	med	Q3	IQR	lW	rW	A_Count	C_Count	G_Count	T_Count	N_Count
+    1	6362991	-4	40	250734117	39.41	40	40	40	0	40	40	1396976	1329101	678730	2958184	0
+    2	6362991	-5	40	250531036	39.37	40	40	40	0	40	40	1786786	1055766	1738025	1782414	0
+    3	6362991	-5	40	248722469	39.09	40	40	40	0	40	40	2296384	984875	1443989	1637743	0
+    4	6362991	-5	40	247654797	38.92	40	40	40	0	40	40	1683197	1410855	1722633	1546306	0
+    5	6362991	-4	40	248214827	39.01	40	40	40	0	40	40	2536861	1167423	1248968	1409739	0
+    6	6362991	-5	40	248499903	39.05	40	40	40	0	40	40	1598956	1236081	1568608	1959346	0
+    7	6362991	-4	40	247719760	38.93	40	40	40	0	40	40	1692667	1822140	1496741	1351443	0
+    8	6362991	-5	40	245745205	38.62	40	40	40	0	40	40	2230936	1343260	1529928	1258867	0
+    9	6362991	-5	40	245766735	38.62	40	40	40	0	40	40	1702064	1306257	1336511	2018159	0
+    10	6362991	-5	40	245089706	38.52	40	40	40	0	40	40	1519917	1446370	1450995	1945709	0
+    11	6362991	-5	40	242641359	38.13	40	40	40	0	40	40	1717434	1282975	1387804	1974778	0
+    12	6362991	-5	40	242026113	38.04	40	40	40	0	40	40	1662872	1202041	1519721	1978357	0
+    13	6362991	-5	40	238704245	37.51	40	40	40	0	40	40	1549965	1271411	1973291	1566681	1643
+    14	6362991	-5	40	235622401	37.03	40	40	40	0	40	40	2101301	1141451	1603990	1515774	475
+    15	6362991	-5	40	230766669	36.27	40	40	40	0	40	40	2344003	1058571	1440466	1519865	86
+    16	6362991	-5	40	224466237	35.28	38	40	40	2	35	40	2203515	1026017	1474060	1651582	7817
+    17	6362991	-5	40	219990002	34.57	34	40	40	6	25	40	1522515	1125455	2159183	1555765	73
+    18	6362991	-5	40	214104778	33.65	30	40	40	10	15	40	1479795	2068113	1558400	1249337	7346
+    19	6362991	-5	40	212934712	33.46	30	40	40	10	15	40	1432749	1231352	1769799	1920093	8998
+    20	6362991	-5	40	212787944	33.44	29	40	40	11	13	40	1311657	1411663	2126316	1513282	73
+    21	6362991	-5	40	211369187	33.22	28	40	40	12	10	40	1887985	1846300	1300326	1318380	10000
+    22	6362991	-5	40	213371720	33.53	30	40	40	10	15	40	542299	3446249	516615	1848190	9638
+    23	6362991	-5	40	221975899	34.89	36	40	40	4	30	40	347679	1233267	926621	3855355	69
+    24	6362991	-5	40	194378421	30.55	21	40	40	19	-5	40	433560	674358	3262764	1992242	67
+    25	6362991	-5	40	199773985	31.40	23	40	40	17	-2	40	944760	325595	1322800	3769641	195
+    26	6362991	-5	40	179404759	28.20	17	34	40	23	-5	40	3457922	156013	1494664	1254293	99
+    27	6362991	-5	40	163386668	25.68	13	28	40	27	-5	40	1392177	281250	3867895	821491	178
+    28	6362991	-5	40	156230534	24.55	12	25	40	28	-5	40	907189	981249	4174945	299437	171
+    29	6362991	-5	40	163236046	25.65	13	28	40	27	-5	40	1097171	3418678	1567013	280008	121
+    30	6362991	-5	40	151309826	23.78	12	23	40	28	-5	40	3514775	2036194	566277	245613	132
+    31	6362991	-5	40	141392520	22.22	10	21	40	30	-5	40	1569000	4571357	124732	97721	181
+    32	6362991	-5	40	143436943	22.54	10	21	40	30	-5	40	1453607	4519441	38176	351107	660
+    33	6362991	-5	40	114269843	17.96	6	14	30	24	-5	40	3311001	2161254	155505	734297	934
+    34	6362991	-5	40	140638447	22.10	10	20	40	30	-5	40	1501615	1637357	18113	3205237	669
+    35	6362991	-5	40	138910532	21.83	10	20	40	30	-5	40	1532519	3495057	23229	1311834	352
+    36	6362991	-5	40	117158566	18.41	7	15	30	23	-5	40	4074444	1402980	63287	822035	245
+    
+------
+
+This tool is based on `FASTX-toolkit`__ by Assaf Gordon.
+
+ .. __: http://hannonlab.cshl.edu/fastx_toolkit/
+
+</help>
+</tool>
+<!-- FASTQ-Statistics is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) -->