Mercurial > repos > idot > fastx_toolkit2
diff fastx_quality_statistics.xml @ 0:78a7d28f2a15 draft
Uploaded
author | idot |
---|---|
date | Wed, 10 Jul 2013 06:13:48 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastx_quality_statistics.xml Wed Jul 10 06:13:48 2013 -0400 @@ -0,0 +1,112 @@ +<tool id="cshl_fastx_quality_statistics" name="Compute quality statistics"> + <description></description> + <command> +cat '$input' | +fastx_quality_stats +#if $input.ext == "fastqsanger": + -Q 33 +#elif $input.ext == "fastq": + -Q 64 +#end if + -o '$output' +</command> + + <inputs> + <param format="fasta,fastq,fastqsanger" name="input" type="data" label="Library to analyse" /> + </inputs> + + <tests> + <test> + <param name="input" value="fastq_stats1.fastq" ftype="fastq"/> + <output name="output" file="fastq_stats1.out" /> + </test> + </tests> + + <outputs> + <data format="txt" name="output" metadata_source="input" + /> + </outputs> + +<help> + +**What it does** + +Creates quality statistics report for the given Solexa/FASTQ library. + +.. class:: infomark + +**TIP:** This statistics report can be used as input for **Quality Score** and **Nucleotides Distribution** tools. + +----- + +**The output file will contain the following fields:** + +* column = column number (1 to 36 for a 36-cycles read FASTQ file) +* count = number of bases found in this column. +* min = Lowest quality score value found in this column. +* max = Highest quality score value found in this column. +* sum = Sum of quality score values for this column. +* mean = Mean quality score value for this column. +* Q1 = 1st quartile quality score. +* med = Median quality score. +* Q3 = 3rd quartile quality score. +* IQR = Inter-Quartile range (Q3-Q1). +* lW = 'Left-Whisker' value (for boxplotting). +* rW = 'Right-Whisker' value (for boxplotting). +* A_Count = Count of 'A' nucleotides found in this column. +* C_Count = Count of 'C' nucleotides found in this column. +* G_Count = Count of 'G' nucleotides found in this column. +* T_Count = Count of 'T' nucleotides found in this column. +* N_Count = Count of 'N' nucleotides found in this column. + + + +**Output Example**:: + + column count min max sum mean Q1 med Q3 IQR lW rW A_Count C_Count G_Count T_Count N_Count + 1 6362991 -4 40 250734117 39.41 40 40 40 0 40 40 1396976 1329101 678730 2958184 0 + 2 6362991 -5 40 250531036 39.37 40 40 40 0 40 40 1786786 1055766 1738025 1782414 0 + 3 6362991 -5 40 248722469 39.09 40 40 40 0 40 40 2296384 984875 1443989 1637743 0 + 4 6362991 -5 40 247654797 38.92 40 40 40 0 40 40 1683197 1410855 1722633 1546306 0 + 5 6362991 -4 40 248214827 39.01 40 40 40 0 40 40 2536861 1167423 1248968 1409739 0 + 6 6362991 -5 40 248499903 39.05 40 40 40 0 40 40 1598956 1236081 1568608 1959346 0 + 7 6362991 -4 40 247719760 38.93 40 40 40 0 40 40 1692667 1822140 1496741 1351443 0 + 8 6362991 -5 40 245745205 38.62 40 40 40 0 40 40 2230936 1343260 1529928 1258867 0 + 9 6362991 -5 40 245766735 38.62 40 40 40 0 40 40 1702064 1306257 1336511 2018159 0 + 10 6362991 -5 40 245089706 38.52 40 40 40 0 40 40 1519917 1446370 1450995 1945709 0 + 11 6362991 -5 40 242641359 38.13 40 40 40 0 40 40 1717434 1282975 1387804 1974778 0 + 12 6362991 -5 40 242026113 38.04 40 40 40 0 40 40 1662872 1202041 1519721 1978357 0 + 13 6362991 -5 40 238704245 37.51 40 40 40 0 40 40 1549965 1271411 1973291 1566681 1643 + 14 6362991 -5 40 235622401 37.03 40 40 40 0 40 40 2101301 1141451 1603990 1515774 475 + 15 6362991 -5 40 230766669 36.27 40 40 40 0 40 40 2344003 1058571 1440466 1519865 86 + 16 6362991 -5 40 224466237 35.28 38 40 40 2 35 40 2203515 1026017 1474060 1651582 7817 + 17 6362991 -5 40 219990002 34.57 34 40 40 6 25 40 1522515 1125455 2159183 1555765 73 + 18 6362991 -5 40 214104778 33.65 30 40 40 10 15 40 1479795 2068113 1558400 1249337 7346 + 19 6362991 -5 40 212934712 33.46 30 40 40 10 15 40 1432749 1231352 1769799 1920093 8998 + 20 6362991 -5 40 212787944 33.44 29 40 40 11 13 40 1311657 1411663 2126316 1513282 73 + 21 6362991 -5 40 211369187 33.22 28 40 40 12 10 40 1887985 1846300 1300326 1318380 10000 + 22 6362991 -5 40 213371720 33.53 30 40 40 10 15 40 542299 3446249 516615 1848190 9638 + 23 6362991 -5 40 221975899 34.89 36 40 40 4 30 40 347679 1233267 926621 3855355 69 + 24 6362991 -5 40 194378421 30.55 21 40 40 19 -5 40 433560 674358 3262764 1992242 67 + 25 6362991 -5 40 199773985 31.40 23 40 40 17 -2 40 944760 325595 1322800 3769641 195 + 26 6362991 -5 40 179404759 28.20 17 34 40 23 -5 40 3457922 156013 1494664 1254293 99 + 27 6362991 -5 40 163386668 25.68 13 28 40 27 -5 40 1392177 281250 3867895 821491 178 + 28 6362991 -5 40 156230534 24.55 12 25 40 28 -5 40 907189 981249 4174945 299437 171 + 29 6362991 -5 40 163236046 25.65 13 28 40 27 -5 40 1097171 3418678 1567013 280008 121 + 30 6362991 -5 40 151309826 23.78 12 23 40 28 -5 40 3514775 2036194 566277 245613 132 + 31 6362991 -5 40 141392520 22.22 10 21 40 30 -5 40 1569000 4571357 124732 97721 181 + 32 6362991 -5 40 143436943 22.54 10 21 40 30 -5 40 1453607 4519441 38176 351107 660 + 33 6362991 -5 40 114269843 17.96 6 14 30 24 -5 40 3311001 2161254 155505 734297 934 + 34 6362991 -5 40 140638447 22.10 10 20 40 30 -5 40 1501615 1637357 18113 3205237 669 + 35 6362991 -5 40 138910532 21.83 10 20 40 30 -5 40 1532519 3495057 23229 1311834 352 + 36 6362991 -5 40 117158566 18.41 7 15 30 23 -5 40 4074444 1402980 63287 822035 245 + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + +</help> +</tool> +<!-- FASTQ-Statistics is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) -->