Mercurial > repos > devteam > fastx_artifacts_filter
changeset 0:7f9660a246c0
Uploaded tool tarball.
author | devteam |
---|---|
date | Wed, 25 Sep 2013 11:05:24 -0400 |
parents | |
children | c8808979ba33 |
files | fastx_artifacts_filter.xml test-data/fastx_artifacts1.fasta test-data/fastx_artifacts1.out test-data/fastx_artifacts2.fastq test-data/fastx_artifacts2.out tool_dependencies.xml |
diffstat | 6 files changed, 236 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastx_artifacts_filter.xml Wed Sep 25 11:05:24 2013 -0400 @@ -0,0 +1,92 @@ +<tool id="cshl_fastx_artifacts_filter" version="1.0.0" name="Remove sequencing artifacts"> + <description></description> + <requirements> + <requirement type="package" version="0.0.13">fastx_toolkit</requirement> + </requirements> + <command>zcat -f '$input' | fastx_artifacts_filter -v -o "$output" +#if $input.ext == "fastqsanger": +-Q 33 +#end if + </command> + + <inputs> + <param format="fasta,fastqsanger,fastqsolexa,fastqillumina" version="1.0.0" name="input" type="data" label="Library to filter" /> + + </inputs> + + <tests> + <test> + <!-- Filter FASTA file --> + <param version="1.0.0" name="input" value="fastx_artifacts1.fasta" /> + <output version="1.0.0" name="output" file="fastx_artifacts1.out" /> + </test> + <test> + <!-- Filter FASTQ file --> + <param version="1.0.0" name="input" value="fastx_artifacts2.fastq" ftype="fastqsanger" /> + <output version="1.0.0" name="output" file="fastx_artifacts2.out" /> + </test> + </tests> + + <outputs> + <data format="input" version="1.0.0" name="output" metadata_source="input" /> + </outputs> +<help> +**What it does** + +This tool filters sequencing artifacts (reads with all but 3 identical bases). + +-------- + +**The following is an example of sequences which will be filtered out**:: + + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAACACAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC + AAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAA + AAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAA + AAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAA + AAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAA + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ +</help> +<!-- FASTX-Artifacts-filter is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fastx_artifacts1.fasta Wed Sep 25 11:05:24 2013 -0400 @@ -0,0 +1,24 @@ +>CSHL_3_FC0420AGLLKK:2:1:233:1674 +GTTAGAGGGAATACACCCACTCTGTAGGCACCATC +>CSHL_3_FC0420AGLLKK:2:1:237:1037 +GTGATAGATTGTCTTGTTGTTCTGTAGGCACCATC +>CSHL_3_FC0420AGLLKK:2:1:1601:1525 +AAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAA +>CSHL_3_FC0420AGLLKK:2:1:1805:1464 +GATGCGTTCGAGATGGGTGCGCTGTAGGCACCATC +>CSHL_3_FC0420AGLLKK:2:1:1713:528 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>CSHL_3_FC0420AGLLKK:2:1:126:1087 +GAGATATTCGAATGCATCATCAGATGGCACCATCA +>CSHL_3_FC0420AGLLKK:2:1:1488:1323 +GTTTTTTCCCCTAATCTGAGTCTGTAGGCACCATC +>CSHL_3_FC0420AGLLKK:2:1:1236:1157 +AAAAAAAAAAAAAAAACAAAAAAAAAAAAAACAAA +>CSHL_3_FC0420AGLLKK:2:1:727:1020 +GTAATATAGTTGATAAGAATCTGCAGAGAGAATCA +>CSHL_3_FC0420AGLLKK:2:1:758:1799 +GTAGAGACCCCCTAATAGAGTCTGTAGGCACCATC +>CSHL_3_FC0420AGLLKK:2:1:1818:550 +AAAAAAAAAAAAAAAACAAAAACAAAAAAAACAAA +>CSHL_3_FC0420AGLLKK:2:1:1764:391 +CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fastx_artifacts1.out Wed Sep 25 11:05:24 2013 -0400 @@ -0,0 +1,14 @@ +>CSHL_3_FC0420AGLLKK:2:1:233:1674 +GTTAGAGGGAATACACCCACTCTGTAGGCACCATC +>CSHL_3_FC0420AGLLKK:2:1:237:1037 +GTGATAGATTGTCTTGTTGTTCTGTAGGCACCATC +>CSHL_3_FC0420AGLLKK:2:1:1805:1464 +GATGCGTTCGAGATGGGTGCGCTGTAGGCACCATC +>CSHL_3_FC0420AGLLKK:2:1:126:1087 +GAGATATTCGAATGCATCATCAGATGGCACCATCA +>CSHL_3_FC0420AGLLKK:2:1:1488:1323 +GTTTTTTCCCCTAATCTGAGTCTGTAGGCACCATC +>CSHL_3_FC0420AGLLKK:2:1:727:1020 +GTAATATAGTTGATAAGAATCTGCAGAGAGAATCA +>CSHL_3_FC0420AGLLKK:2:1:758:1799 +GTAGAGACCCCCTAATAGAGTCTGTAGGCACCATC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fastx_artifacts2.fastq Wed Sep 25 11:05:24 2013 -0400 @@ -0,0 +1,60 @@ +@CSHL_3_FC0420AGLLKK:2:1:233:1674 +GTTAGAGGGAATACACCCACTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:233:1674 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 32 40 40 40 40 16 20 25 9 21 37 40 40 16 29 26 30 +@CSHL_3_FC0420AGLLKK:2:1:136:448 +GTTCTCAGGACCCCTTCAGTAGTNGGCACCATCAA ++CSHL_3_FC0420AGLLKK:2:1:136:448 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 -5 13 17 28 40 40 8 17 27 8 13 10 +@CSHL_3_FC0420AGLLKK:2:1:237:1037 +GTGATAGATTGTCTTGTTGTTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:237:1037 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 4 40 40 26 35 40 38 40 6 40 40 0 3 26 32 27 14 11 26 11 +@CSHL_3_FC0420AGLLKK:2:1:1601:1525 +AAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAA ++CSHL_3_FC0420AGLLKK:2:1:1601:1525 +40 40 40 40 40 40 40 40 40 40 40 40 35 40 40 12 40 40 30 30 40 40 40 12 36 23 17 24 18 22 25 15 10 34 14 +@CSHL_3_FC0420AGLLKK:2:1:1805:1464 +GATGCGTTCGAGATGGGTGCGCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:1805:1464 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 16 23 28 40 21 40 9 37 13 20 21 7 11 14 14 6 23 10 +@CSHL_3_FC0420AGLLKK:2:1:1713:528 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA ++CSHL_3_FC0420AGLLKK:2:1:1713:528 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 32 40 12 38 15 22 20 17 14 12 10 7 22 11 +@CSHL_3_FC0420AGLLKK:2:1:126:1087 +GAGATATTCGAATGCATCATCAGATGGCACCATCA ++CSHL_3_FC0420AGLLKK:2:1:126:1087 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 25 40 40 40 40 40 40 40 31 40 40 11 10 23 40 13 12 17 37 17 22 +@CSHL_3_FC0420AGLLKK:2:1:1488:1323 +GTTTTTTCCCCTAATCTGAGTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:1488:1323 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 39 22 31 40 40 12 29 22 0 7 12 8 18 7 3 18 9 +@CSHL_3_FC0420AGLLKK:2:1:913:199 +GTTCAGTGTTGGTGCACTGTGTTNTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:913:199 +40 40 39 40 40 40 40 40 40 40 40 40 4 40 40 24 34 20 33 21 36 32 40 -5 40 13 21 21 26 17 18 25 14 25 21 +@CSHL_3_FC0420AGLLKK:2:1:1236:1157 +AAAAAAAAAAAAAAAACAAAAAAAAAAAAAACAAA ++CSHL_3_FC0420AGLLKK:2:1:1236:1157 +40 40 40 40 40 40 40 40 40 40 40 40 40 35 40 40 40 40 40 33 40 37 40 40 40 18 16 20 23 22 31 26 10 22 19 +@CSHL_3_FC0420AGLLKK:2:1:928:765 +GTTTTCAGTTCGAGGTTCGTGCTNTAGGCATTATC ++CSHL_3_FC0420AGLLKK:2:1:928:765 +40 40 40 40 40 40 40 40 40 40 40 40 40 25 27 40 37 35 27 40 40 17 40 -5 36 11 19 15 19 16 11 12 12 23 11 +@CSHL_3_FC0420AGLLKK:2:1:727:1020 +GTAATATAGTTGATAAGAATCTGCAGAGAGAATCA ++CSHL_3_FC0420AGLLKK:2:1:727:1020 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 30 40 40 24 18 38 33 26 16 23 22 16 18 +@CSHL_3_FC0420AGLLKK:2:1:758:1799 +GTAGAGACCCCCTAATAGAGTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:758:1799 +40 40 40 40 40 40 40 40 35 40 39 40 40 27 20 40 17 34 15 40 40 40 40 15 28 17 4 12 10 10 18 14 3 14 11 +@CSHL_3_FC0420AGLLKK:2:1:1818:550 +AAAAAAAAAAAAAAAACAAAAACAAAAAAAACAAA ++CSHL_3_FC0420AGLLKK:2:1:1818:550 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 36 32 40 33 40 40 38 37 40 28 29 27 22 13 20 19 17 17 13 33 18 +@CSHL_3_FC0420AGLLKK:2:1:1764:391 +CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC ++CSHL_3_FC0420AGLLKK:2:1:1764:391 +40 40 40 40 40 40 40 40 40 40 40 33 40 40 40 40 40 24 40 40 40 40 40 12 40 24 14 9 22 15 29 18 11 40 22
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fastx_artifacts2.out Wed Sep 25 11:05:24 2013 -0400 @@ -0,0 +1,40 @@ +@CSHL_3_FC0420AGLLKK:2:1:233:1674 +GTTAGAGGGAATACACCCACTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:233:1674 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 32 40 40 40 40 16 20 25 9 21 37 40 40 16 29 26 30 +@CSHL_3_FC0420AGLLKK:2:1:136:448 +GTTCTCAGGACCCCTTCAGTAGTNGGCACCATCAA ++CSHL_3_FC0420AGLLKK:2:1:136:448 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 -5 13 17 28 40 40 8 17 27 8 13 10 +@CSHL_3_FC0420AGLLKK:2:1:237:1037 +GTGATAGATTGTCTTGTTGTTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:237:1037 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 4 40 40 26 35 40 38 40 6 40 40 0 3 26 32 27 14 11 26 11 +@CSHL_3_FC0420AGLLKK:2:1:1805:1464 +GATGCGTTCGAGATGGGTGCGCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:1805:1464 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 16 23 28 40 21 40 9 37 13 20 21 7 11 14 14 6 23 10 +@CSHL_3_FC0420AGLLKK:2:1:126:1087 +GAGATATTCGAATGCATCATCAGATGGCACCATCA ++CSHL_3_FC0420AGLLKK:2:1:126:1087 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 25 40 40 40 40 40 40 40 31 40 40 11 10 23 40 13 12 17 37 17 22 +@CSHL_3_FC0420AGLLKK:2:1:1488:1323 +GTTTTTTCCCCTAATCTGAGTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:1488:1323 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 39 22 31 40 40 12 29 22 0 7 12 8 18 7 3 18 9 +@CSHL_3_FC0420AGLLKK:2:1:913:199 +GTTCAGTGTTGGTGCACTGTGTTNTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:913:199 +40 40 39 40 40 40 40 40 40 40 40 40 4 40 40 24 34 20 33 21 36 32 40 -5 40 13 21 21 26 17 18 25 14 25 21 +@CSHL_3_FC0420AGLLKK:2:1:928:765 +GTTTTCAGTTCGAGGTTCGTGCTNTAGGCATTATC ++CSHL_3_FC0420AGLLKK:2:1:928:765 +40 40 40 40 40 40 40 40 40 40 40 40 40 25 27 40 37 35 27 40 40 17 40 -5 36 11 19 15 19 16 11 12 12 23 11 +@CSHL_3_FC0420AGLLKK:2:1:727:1020 +GTAATATAGTTGATAAGAATCTGCAGAGAGAATCA ++CSHL_3_FC0420AGLLKK:2:1:727:1020 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 30 40 40 24 18 38 33 26 16 23 22 16 18 +@CSHL_3_FC0420AGLLKK:2:1:758:1799 +GTAGAGACCCCCTAATAGAGTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:758:1799 +40 40 40 40 40 40 40 40 35 40 39 40 40 27 20 40 17 34 15 40 40 40 40 15 28 17 4 12 10 10 18 14 3 14 11
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Wed Sep 25 11:05:24 2013 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="fastx_toolkit" version="0.0.13"> + <repository changeset_revision="ec66ae4c269b" name="package_fastx_toolkit_0_0_13" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>