changeset 0:45f077341b24

Uploaded tool tarball.
author devteam
date Wed, 25 Sep 2013 11:15:30 -0400
parents
children c2af34024061
files fastx_quality_statistics.xml test-data/fastq_stats1.fastq test-data/fastq_stats1.out tool_dependencies.xml
diffstat 4 files changed, 151 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastx_quality_statistics.xml	Wed Sep 25 11:15:30 2013 -0400
@@ -0,0 +1,72 @@
+<tool id="cshl_fastx_quality_statistics" version="1.0.0" name="Compute quality statistics">
+	<description></description>
+    <requirements>
+        <requirement type="package" version="0.0.13">fastx_toolkit</requirement>
+    </requirements>
+	<command>zcat -f $input | fastx_quality_stats -o $output -Q 33</command>
+
+	<inputs>
+		<param format="fastqsanger" version="1.0.0" name="input" type="data" label="Library to analyse" />
+	</inputs>
+
+	<tests>
+		<test>
+			<param version="1.0.0" name="input" value="fastq_stats1.fastq" ftype="fastqsanger"/>
+			<output version="1.0.0" name="output" file="fastq_stats1.out" />
+		</test>
+	</tests>
+
+	<outputs>
+		<data format="txt" version="1.0.0" name="output" metadata_source="input" />
+	</outputs>
+
+<help>
+
+**What it does**
+
+Creates quality statistics report for the given Solexa/FASTQ library.
+
+.. class:: infomark
+
+**TIP:** This statistics report can be used as input for **Quality Score** and **Nucleotides Distribution** tools.
+
+-----
+
+**The output file will contain the following fields:**
+
+* column	= column number (1 to 36 for a 36-cycles read Solexa file)
+* count   = number of bases found in this column.
+* min     = Lowest quality score value found in this column.
+* max     = Highest quality score value found in this column.
+* sum     = Sum of quality score values for this column.
+* mean    = Mean quality score value for this column.
+* Q1	= 1st quartile quality score.
+* med	= Median quality score.
+* Q3	= 3rd quartile quality score.
+* IQR	= Inter-Quartile range (Q3-Q1).
+* lW	= 'Left-Whisker' value (for boxplotting).
+* rW	= 'Right-Whisker' value (for boxplotting).
+* A_Count	= Count of 'A' nucleotides found in this column.
+* C_Count	= Count of 'C' nucleotides found in this column.
+* G_Count	= Count of 'G' nucleotides found in this column.
+* T_Count	= Count of 'T' nucleotides found in this column.
+* N_Count = Count of 'N' nucleotides found in this column.  
+
+
+For example::
+
+     1  6362991 -4 40 250734117 39.41 40 40 40  0 40 40 1396976 1329101  678730 2958184   0
+     2  6362991 -5 40 250531036 39.37 40 40 40  0 40 40 1786786 1055766 1738025 1782414   0
+     3  6362991 -5 40 248722469 39.09 40 40 40  0 40 40 2296384  984875 1443989 1637743   0
+     4  6362991 -4 40 248214827 39.01 40 40 40  0 40 40 2536861 1167423 1248968 1409739   0
+    36  6362991 -5 40 117158566 18.41  7 15 30 23 -5 40 4074444 1402980   63287  822035 245
+    
+------
+
+This tool is based on `FASTX-toolkit`__ by Assaf Gordon.
+
+ .. __: http://hannonlab.cshl.edu/fastx_toolkit/
+
+</help>
+<!-- FASTQ-Statistics is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) -->
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastq_stats1.fastq	Wed Sep 25 11:15:30 2013 -0400
@@ -0,0 +1,36 @@
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT
++CSHL_3_FC042AGLLWW:1:2:7:203
+BBC?7?B6>ABB?B;BBBCC9&;BCBBBBBBBB>>A
+@CSHL_3_FC042AGLLWW:1:2:7:33
+CAATGCCTCCAATTGGTTAATCCCCCTATATATACT
++CSHL_3_FC042AGLLWW:1:2:7:33
+8BBB?B;BB8?6@9B8BB=8.&1?,&;931&&&(BB
+@CSHL_3_FC042AGLLWW:1:2:7:169
+GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC
++CSHL_3_FC042AGLLWW:1:2:7:169
+B@.?B=6BBB@.@BBBBBBBBBBBBBBB7=;6(663
+@CSHL_3_FC042AGLLWW:1:2:7:1436
+AATTATTTATTAAATTTTAATAATATGGGAGACACT
++CSHL_3_FC042AGLLWW:1:2:7:1436
+B?BBBBBBBBBBBBBBB@6ABBBBB@4@BBBBB77<
+@CSHL_3_FC042AGLLWW:1:2:7:292
+GGAGAAATACACACAATTGGTTAATCCCCCTATATA
++CSHL_3_FC042AGLLWW:1:2:7:292
+CBCBBBBBBB6.BBBBBBBBBBB=9&66&1@>6&3&
+@CSHL_3_FC042AGLLWW:1:2:7:1819
+AATTCAAACCACCCCAACCCACACACAGAGATACAA
++CSHL_3_FC042AGLLWW:1:2:7:1819
+B==2777-BB-0&96866&,66-&.6&66,6-*2.6
+@CSHL_3_FC042AGLLWW:1:2:7:1875
+GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC
++CSHL_3_FC042AGLLWW:1:2:7:1875
+BBBBBBBBB9699&9BBBBBA@;BBBBBBBBB9&96
+@CSHL_3_FC042AGLLWW:1:2:8:624
+ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG
++CSHL_3_FC042AGLLWW:1:2:8:624
+BB<4?A9ABB@>>009.6?@<.6@BBBBBBBBBBBB
+@CSHL_3_FC042AGLLWW:1:2:8:250
+TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA
++CSHL_3_FC042AGLLWW:1:2:8:250
+BBBBBBBB?BBBBBBCCC<,91&6<39;?+6,3,9&
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastq_stats1.out	Wed Sep 25 11:15:30 2013 -0400
@@ -0,0 +1,37 @@
+column	count	min	max	sum	mean	Q1	med	Q3	IQR	lW	rW	A_Count	C_Count	G_Count	T_Count	N_Count	Max_count
+1	9	23	34	288	32.00	33	33	33	0	33	33	3	1	4	1	0	9
+2	9	28	33	287	31.89	31	33	33	2	28	33	3	3	2	1	0	9
+3	9	13	34	268	29.78	28	33	33	5	21	34	5	1	0	3	0	9
+4	9	17	33	261	29.00	30	33	33	3	26	33	1	2	3	3	0	9
+5	9	22	33	269	29.89	30	33	33	3	26	33	3	3	3	0	0	9
+6	9	22	33	277	30.78	30	33	33	3	26	33	5	3	0	1	0	9
+7	9	21	33	258	28.67	24	33	33	9	21	33	4	1	3	1	0	9
+8	9	12	33	263	29.22	32	33	33	1	31	33	2	1	1	5	0	9
+9	9	29	33	290	32.22	33	33	33	0	33	33	3	3	2	1	0	9
+10	9	23	33	277	30.78	32	33	33	1	31	33	1	4	2	2	0	9
+11	9	12	33	245	27.22	21	31	33	12	12	33	5	2	1	1	0	9
+12	9	13	33	214	23.78	15	24	33	18	13	33	2	4	2	1	0	9
+13	9	5	33	249	27.67	29	31	33	4	23	33	2	1	1	5	0	9
+14	9	5	33	233	25.89	24	33	33	9	11	33	3	3	2	1	0	9
+15	9	15	33	251	27.89	24	33	33	9	15	33	5	1	1	2	0	9
+16	9	23	34	269	29.89	24	33	33	9	23	34	3	1	2	3	0	9
+17	9	13	34	266	29.56	33	33	33	0	33	33	2	3	1	3	0	9
+18	9	21	34	272	30.22	31	33	33	2	28	34	0	5	1	3	0	9
+19	9	5	34	244	27.11	27	30	33	6	18	34	4	4	1	0	0	9
+20	9	11	34	241	26.78	23	32	33	10	11	34	3	4	2	0	0	9
+21	9	13	33	240	26.67	24	27	33	9	13	33	1	4	0	4	0	9
+22	9	5	33	190	21.11	13	21	33	20	5	33	1	4	0	3	1	9
+23	9	5	33	205	22.78	16	26	33	17	5	33	4	4	1	0	0	9
+24	9	5	33	247	27.44	28	31	33	5	21	33	1	5	1	2	0	9
+25	9	11	34	241	26.78	24	33	33	9	11	34	3	4	0	2	0	9
+26	9	5	33	212	23.56	18	31	33	15	5	33	0	6	0	3	0	9
+27	9	5	33	227	25.22	21	26	33	12	5	33	3	4	1	1	0	9
+28	9	21	33	255	28.33	24	31	33	9	21	33	2	4	3	0	0	9
+29	9	5	33	228	25.33	21	30	33	12	5	33	2	4	1	2	0	9
+30	9	10	33	213	23.67	16	28	33	17	10	33	3	4	2	0	0	9
+31	9	5	33	236	26.22	21	31	33	12	5	33	1	4	1	3	0	9
+32	9	5	33	210	23.33	12	29	33	21	5	33	3	3	0	3	0	9
+33	9	5	33	183	20.33	9	21	33	24	5	33	1	4	2	2	0	9
+34	9	5	33	150	16.67	7	17	22	15	5	33	3	4	1	1	0	9
+35	9	13	33	217	24.11	21	24	29	8	13	33	1	4	1	3	0	9
+36	9	5	33	195	21.67	18	21	32	14	5	33	3	2	1	3	0	9
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Wed Sep 25 11:15:30 2013 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="fastx_toolkit" version="0.0.13">
+        <repository changeset_revision="ec66ae4c269b" name="package_fastx_toolkit_0_0_13" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>