comparison fastx_quality_statistics.xml @ 0:78a7d28f2a15 draft

Uploaded
author idot
date Wed, 10 Jul 2013 06:13:48 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:78a7d28f2a15
1 <tool id="cshl_fastx_quality_statistics" name="Compute quality statistics">
2 <description></description>
3 <command>
4 cat '$input' |
5 fastx_quality_stats
6 #if $input.ext == "fastqsanger":
7 -Q 33
8 #elif $input.ext == "fastq":
9 -Q 64
10 #end if
11 -o '$output'
12 </command>
13
14 <inputs>
15 <param format="fasta,fastq,fastqsanger" name="input" type="data" label="Library to analyse" />
16 </inputs>
17
18 <tests>
19 <test>
20 <param name="input" value="fastq_stats1.fastq" ftype="fastq"/>
21 <output name="output" file="fastq_stats1.out" />
22 </test>
23 </tests>
24
25 <outputs>
26 <data format="txt" name="output" metadata_source="input"
27 />
28 </outputs>
29
30 <help>
31
32 **What it does**
33
34 Creates quality statistics report for the given Solexa/FASTQ library.
35
36 .. class:: infomark
37
38 **TIP:** This statistics report can be used as input for **Quality Score** and **Nucleotides Distribution** tools.
39
40 -----
41
42 **The output file will contain the following fields:**
43
44 * column = column number (1 to 36 for a 36-cycles read FASTQ file)
45 * count = number of bases found in this column.
46 * min = Lowest quality score value found in this column.
47 * max = Highest quality score value found in this column.
48 * sum = Sum of quality score values for this column.
49 * mean = Mean quality score value for this column.
50 * Q1 = 1st quartile quality score.
51 * med = Median quality score.
52 * Q3 = 3rd quartile quality score.
53 * IQR = Inter-Quartile range (Q3-Q1).
54 * lW = 'Left-Whisker' value (for boxplotting).
55 * rW = 'Right-Whisker' value (for boxplotting).
56 * A_Count = Count of 'A' nucleotides found in this column.
57 * C_Count = Count of 'C' nucleotides found in this column.
58 * G_Count = Count of 'G' nucleotides found in this column.
59 * T_Count = Count of 'T' nucleotides found in this column.
60 * N_Count = Count of 'N' nucleotides found in this column.
61
62
63
64 **Output Example**::
65
66 column count min max sum mean Q1 med Q3 IQR lW rW A_Count C_Count G_Count T_Count N_Count
67 1 6362991 -4 40 250734117 39.41 40 40 40 0 40 40 1396976 1329101 678730 2958184 0
68 2 6362991 -5 40 250531036 39.37 40 40 40 0 40 40 1786786 1055766 1738025 1782414 0
69 3 6362991 -5 40 248722469 39.09 40 40 40 0 40 40 2296384 984875 1443989 1637743 0
70 4 6362991 -5 40 247654797 38.92 40 40 40 0 40 40 1683197 1410855 1722633 1546306 0
71 5 6362991 -4 40 248214827 39.01 40 40 40 0 40 40 2536861 1167423 1248968 1409739 0
72 6 6362991 -5 40 248499903 39.05 40 40 40 0 40 40 1598956 1236081 1568608 1959346 0
73 7 6362991 -4 40 247719760 38.93 40 40 40 0 40 40 1692667 1822140 1496741 1351443 0
74 8 6362991 -5 40 245745205 38.62 40 40 40 0 40 40 2230936 1343260 1529928 1258867 0
75 9 6362991 -5 40 245766735 38.62 40 40 40 0 40 40 1702064 1306257 1336511 2018159 0
76 10 6362991 -5 40 245089706 38.52 40 40 40 0 40 40 1519917 1446370 1450995 1945709 0
77 11 6362991 -5 40 242641359 38.13 40 40 40 0 40 40 1717434 1282975 1387804 1974778 0
78 12 6362991 -5 40 242026113 38.04 40 40 40 0 40 40 1662872 1202041 1519721 1978357 0
79 13 6362991 -5 40 238704245 37.51 40 40 40 0 40 40 1549965 1271411 1973291 1566681 1643
80 14 6362991 -5 40 235622401 37.03 40 40 40 0 40 40 2101301 1141451 1603990 1515774 475
81 15 6362991 -5 40 230766669 36.27 40 40 40 0 40 40 2344003 1058571 1440466 1519865 86
82 16 6362991 -5 40 224466237 35.28 38 40 40 2 35 40 2203515 1026017 1474060 1651582 7817
83 17 6362991 -5 40 219990002 34.57 34 40 40 6 25 40 1522515 1125455 2159183 1555765 73
84 18 6362991 -5 40 214104778 33.65 30 40 40 10 15 40 1479795 2068113 1558400 1249337 7346
85 19 6362991 -5 40 212934712 33.46 30 40 40 10 15 40 1432749 1231352 1769799 1920093 8998
86 20 6362991 -5 40 212787944 33.44 29 40 40 11 13 40 1311657 1411663 2126316 1513282 73
87 21 6362991 -5 40 211369187 33.22 28 40 40 12 10 40 1887985 1846300 1300326 1318380 10000
88 22 6362991 -5 40 213371720 33.53 30 40 40 10 15 40 542299 3446249 516615 1848190 9638
89 23 6362991 -5 40 221975899 34.89 36 40 40 4 30 40 347679 1233267 926621 3855355 69
90 24 6362991 -5 40 194378421 30.55 21 40 40 19 -5 40 433560 674358 3262764 1992242 67
91 25 6362991 -5 40 199773985 31.40 23 40 40 17 -2 40 944760 325595 1322800 3769641 195
92 26 6362991 -5 40 179404759 28.20 17 34 40 23 -5 40 3457922 156013 1494664 1254293 99
93 27 6362991 -5 40 163386668 25.68 13 28 40 27 -5 40 1392177 281250 3867895 821491 178
94 28 6362991 -5 40 156230534 24.55 12 25 40 28 -5 40 907189 981249 4174945 299437 171
95 29 6362991 -5 40 163236046 25.65 13 28 40 27 -5 40 1097171 3418678 1567013 280008 121
96 30 6362991 -5 40 151309826 23.78 12 23 40 28 -5 40 3514775 2036194 566277 245613 132
97 31 6362991 -5 40 141392520 22.22 10 21 40 30 -5 40 1569000 4571357 124732 97721 181
98 32 6362991 -5 40 143436943 22.54 10 21 40 30 -5 40 1453607 4519441 38176 351107 660
99 33 6362991 -5 40 114269843 17.96 6 14 30 24 -5 40 3311001 2161254 155505 734297 934
100 34 6362991 -5 40 140638447 22.10 10 20 40 30 -5 40 1501615 1637357 18113 3205237 669
101 35 6362991 -5 40 138910532 21.83 10 20 40 30 -5 40 1532519 3495057 23229 1311834 352
102 36 6362991 -5 40 117158566 18.41 7 15 30 23 -5 40 4074444 1402980 63287 822035 245
103
104 ------
105
106 This tool is based on `FASTX-toolkit`__ by Assaf Gordon.
107
108 .. __: http://hannonlab.cshl.edu/fastx_toolkit/
109
110 </help>
111 </tool>
112 <!-- FASTQ-Statistics is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) -->