Mercurial > repos > iuc > umi_tools_extract
changeset 5:f77bc14eba31 draft
planemo upload commit 57e3e460a740aa7aad217c8365527c49e88c9a30
author | iuc |
---|---|
date | Tue, 05 Jun 2018 19:44:38 -0400 |
parents | e73a22ff585c |
children | 6417d5ed05c6 |
files | test-data/scrb_extract.fastq.gz test-data/scrb_seq_barcodes test-data/scrb_seq_fastq.1.gz test-data/scrb_seq_fastq.2.gz test-data/t_R2.fastq umi-tools_extract.xml |
diffstat | 6 files changed, 857 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/scrb_seq_barcodes Tue Jun 05 19:44:38 2018 -0400 @@ -0,0 +1,384 @@ +AAAACT +GCTAGA +AAAATC +GCTTAC +AAACAT +GGACAT +AAACTA +GGCAAT +AAAGTT +GGGATT +AAATAC +GTACAC +AAATCA +GTCAAG +AAATGT +GTGACT +AAATTG +GTTCGA +AACAAT +TAGTGG +AACATA +TCCAAC +AACTAA +TCGAAG +AAGATT +TCTGCA +AAGTAT +TTCCTC +AAGTTA +TTGTCC +AATAAC +TTTGGC +AATACA +CCAACC +AATAGT +CCTTCC +AATATG +CTCTCC +AATCAA +GGACCA +AATCTT +GTACCG +AATGAT +ACCCCC +AATGTA +ACCCGG +AATTAG +ACCGCG +AATTCT +ACCGGC +AATTGA +ACGCCG +AATTTC +ACGCGC +ACAAAT +ACGGCC +ACAATA +ACGGGG +ACATAA +AGCCCG +ACTAAA +AGCCGC +ACTATT +AGCGCC +ACTTAT +AGCGGG +ACTTTA +AGGCCC +AGAATT +AGGCGG +AGATAT +AGGGCG +AGATTA +AGGGGC +AGTAAT +CACCCC +AGTATA +CACCGG +AGTTAA +CACGCG +ATAAAC +CACGGC +ATAACA +CAGCCG +ATAAGT +CAGCGC +ATAATG +CAGGCC +ATACAA +CAGGGG +ATACTT +CCACCG +ATAGAT +CCACGC +ATAGTA +CCAGGG +ATATAG +CCCACG +ATATCT +CCCAGC +ATATGA +CCCCAC +ATATTC +CCCCCA +ATCAAA +CCCCGT +ATCATT +CCCCTG +ATCTAT +CCCGAG +ATCTTA +CCCGGA +ATGAAT +CCCTGG +ATGATA +CCGAGG +ATGTAA +CCGCAG +ATTAAG +CCGCGA +ATTACT +CCGGAC +ATTAGA +CCGGCA +ATTATC +CCGGGT +ATTCAT +CCGGTG +ATTCTA +CCGTCG +ATTGAA +CCGTGC +ATTGTT +CCTCGG +ATTTAC +CCTGCG +ATTTCA +CCTGGC +ATTTGT +CGACCC +ATTTTG +CGACGG +CAAAAT +CGAGCG +CAAATA +CGAGGC +CAATAA +CGCACC +CATAAA +CGCAGG +CATATT +CGCCAG +CATTAT +CGCCCT +CATTTA +CGCCGA +CTAAAA +CGCCTC +CTAATT +CGCGAC +CTATAT +CGCGCA +CTATTA +CGCGGT +CTTAAT +CGCGTG +CTTATA +CGCTCG +CTTTAA +CGCTGC +GAAATT +CGGACG +GAATAT +CGGAGC +GAATTA +CGGCAC +GATAAT +CGGCCA +GATATA +CGGCGT +GATTAA +CGGCTG +GTAAAT +CGGGAG +GTAATA +CGGGCT +GTATAA +CGGGGA +GTTAAA +CGGGTC +GTTATT +CGGTCC +GTTTAT +CGGTGG +GTTTTA +CGTCCG +TAAAAC +CGTCGC +TAAACA +CGTGCC +TAAAGT +CGTGGG +TAAATG +CTCCCG +TAACAA +CTCCGC +TAACTT +CTCGGG +TAAGAT +CTGCGG +TAAGTA +CTGGCG +TAATAG +CTGGGC +TAATCT +GACCCG +TAATGA +GACCGC +TAATTC +GACGCC +TACAAA +GACGGG +TACATT +GAGCCC +TACTAT +GAGCGG +TACTTA +GAGGCG +TAGAAT +GAGGGC +TAGATA +GCACCC +TAGTAA +GCACGG +TAGTTT +GCAGCG +TATAAG +GCAGGC +TATACT +GCCACC +TATAGA +GCCAGG +TATATC +GCCCAG +TATCAT +GCCCCT +TATCTA +GCCCGA +TATGAA +GCCCTC +TATGTT +GCCGAC +TATTAC +GCCGCA +TATTCA +GCCGGT +TATTGT +GCCGTG +TATTTG +GCCTCG +TCAAAA +GCCTGC +TCAATT +GCGACG +TCATAT +GCGAGC +TCATTA +GCGCAC +TCTAAT +GCGCCA +TCTATA +GCGCGT +TCTTAA +GCGCTG +TGAAAT +GCGGAG +TGAATA +GCGGCT +TGATAA +GCGGGA +TGATTT +GCGGTC +TGTAAA +GCGTCC +TGTATT +GCGTGG +TGTTAT +GCTCCG +TGTTTA +GCTCGC +TTAAAG +GCTGCC +TTAACT +GCTGGG +TTAAGA +GGACGC +TTAATC +GGAGCC +TTACAT +GGAGGG +TTACTA +GGCACG +TTAGAA +GGCAGC +TTAGTT +GGCCAC +TTATAC +GGCGAG +TTATCA +GGCGCT +TTATGT +GGCGGA +TTATTG +GGCGTC +TTCAAT +GGCTCC +TTCATA +GGGACC +TTCTAA +GGGAGG +TTGAAA +GGGCAG +TTGATT +GGGCCT +TTGTTA +GGGCGA +TTTAAC +GGGCTC +TTTACA +GGGGAC +TTTAGT +GGGGCA +TTTATG +GGGGGT +TTTCAA +GGGGTG +TTTCTT +GGGTCG +TTTGTA +GGGTGC +TTTTAG +GGTCCC +TTTTCT +GGTGCG +TTTTGA +GGTGGC +TCTTTC +GTCCCC +TTGGAT +GTCGCG +ACCGTA +GTCGGC +AGACCT +GTGCGC +AGGGAT +GTGGCC +ATCGAG +GTGGGG +CAAGCT +TCCCCG +CACCAA +TCCCGC +CAGTCA +TCCGGG +CATCAG +TCGCGG +CATGGT +TCGGCG +CCACAT +TCGGGC +CCGATT +TGCCCC +CGACTT +TGCGCG +CGATTG +TGCGGC +CTAGTG +TGGCCG +CTTCTG +TGGCGC +GAAGAC +TGGGCC +GATCGT +TGGGGG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/t_R2.fastq Tue Jun 05 19:44:38 2018 -0400 @@ -0,0 +1,400 @@ +@HISEQ:105:C2UE1ACXX:3:1101:11160:2245 2:N:0:CAGATC +CATAAAAACCAAAACTAACTAAACCCCAAATAAAAAACAACCTAACCTCTAACAAAAACAACAACAACTAACACCTCAAAATCAACTCTAAATAAAAACTA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:19338:2197 2:N:0:CAGATC +CTAATTTCTATTACCTACCTAACAACTATAACTATAATACTAACAAAAAACAAACAACATAGACCTAAATCCTACTTATACCCAACATTCTAAAAACAATT ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII0<BFFIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFB +@HISEQ:105:C2UE1ACXX:3:1101:19467:2281 2:N:0:CATATC +ACACAACAAAATCCCTACTCCTATAACCTCTCACTACACCCAAAACTCCATTCTTTTCCCCCTTTACAAAAATCACTAAAATCCAAACTATACATCTCACC ++ +BBBFFFFFFFFFFIIIIIIIIIIIIFIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIFFIIIIFFFFBBFFFFFFFBBFFFFFBBBFFFBFFFFBF +@HISEQ:105:C2UE1ACXX:3:1101:7009:2740 2:N:0:CAGATC +TAAATAAAACCCAAACCCACACTATCTATCCCTTATTAACATTACAATCACAATTATCAAATAAATAACAAAACCCAAAAAAACCTTACTTAACATTCCAT ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIFIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFB +@HISEQ:105:C2UE1ACXX:3:1101:13708:2613 2:N:0:CAGATC +TTCAAAAACTCCATAACAAACACAAATAAAAAATAAAAAACTCCTAAATCTCACCTTAAAAACTTATCTAACTGCAACTATTATCTTACTTAAAAAAAAAC ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:19067:2707 2:N:0:CAGATC +ATCACTCTTCCAAAAATCACTCGAATCCACAAATACAAAAACTTTCTAACCACACACCTAAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCTTTAAAACCT ++ +BBBFFFFFFFFFFIIIIIIIIIIFFIIIIIIIIIIIIIIIFIIIIIIIIIFIIIIIIIFIIIIIFFFFFFFFFFFFFFFBB#################### +@HISEQ:105:C2UE1ACXX:3:1101:4999:3182 2:N:0:CAGATC +CTTATAATTCAAATTTCTAAACTCCTACTCCCTCTCCCTTTATATTTATTTAACACATACTATTCTAACTATATATAAATCATAAATCTTATAAACTTTAA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:16790:3145 2:N:0:CAGATC +AACATACCTATAAAAACGCACTACTTTTATATACAAAATCCGTCTCTACTAATACCATAACCAACCTCTATACCACATATAAAAAACAACAAACAATACTC ++ +BBBFFFFFFFFBFIIIIF<FFFFFIIIIIFFFIIIIIFIFFFIIIIIIFIFFFFIIIIIIIIIIIIFBBBBFFFFFFBBFFFFFFFFFFBBFFFFFFFFBB +@HISEQ:105:C2UE1ACXX:3:1101:18065:3106 2:N:0:CAGATC +AAAAACTAAACCCAAAAAAAAAACAATAAAAATAAAATAATAAAAATTATCATAATAAATTCCTAAAAAAAAAAAAAACTTTAAAAAAAAAAAAAAAAAAA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIFIIIIIBFIIIFFFFFIIIFFFFFFFBFFFFFFFFFFBFFFFFFFFFFFFFF'0<<<BFB############### +@HISEQ:105:C2UE1ACXX:3:1101:2300:3263 2:N:0:TAGATC +AAAAATACAAAAAATAAAAAAAAAAAAAAACATTAAATTTAAAAAAAATTTATTTTTTATTTTATTTTATTTTTATTTTTTTAAATTAAAATAAAAAAAAA ++ +BBBFFFFBFFFFFIFFIIIIIIIIIIIFF<'<<<BBBFF0<FFBBBBFB7'0<BB000<'<F<0BBFB'<<BBB0<B<B<B7B<<0'<B'00BBFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:5605:3427 2:N:0:CAGATC +AACCTCTACACAAAAAAATCTAAAAATCTAAAACAAATCCTATAACCGAACCACTTCTTATCTATAAATCTTACTAAAACTCCCCACATCCTATACCTCTC ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFIFIIFFFFFFFFFFFFFFFFFFFFFFFFFFBBFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:8129:3589 2:N:0:CAGATC +TCTTATCTCTTCAAATTCCCTAATATCAATAAACCCTAACATAACCCGATAAATCAAAAAACTCTTTTATCACAATAAACGTATAATCCTAACTAAAAACT ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFIIIIIIIIIFFFFFFFFFFFFFFFFFFFFBBBFFFFFFFFFFFFFFFF< +@HISEQ:105:C2UE1ACXX:3:1101:14304:3866 2:N:0:CAGATC +AATTTATTCTTCACTAAAACCCCTTAACCAATACCAACATTTCCACAAAATTCTACCCTCTACAAAAACAACCTATCAAACTCAAAAATCCCCTATATAAA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFBBBFFBFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:12720:4398 2:N:0:CAGATC +CTAATTTAACCTTTAAACTCAACAAAAATTAACCTACCTCTACCACTAAAATACTAAAATTAAACATATATATCACCAAACCCAACTTCAATTAAATACAC ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIIIIIIIFIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFBFFFFFFB +@HISEQ:105:C2UE1ACXX:3:1101:14945:4439 2:N:0:CAGATC +ACTCTCTCCTATATTCTTTACCAACATATATAACTTAACTCTCTAATAACCTTAACTATTCCTCACCCTAACCTCCACAAATACTATATATAATACTATAC ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIBFIIIIIFIIFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:8616:4508 2:N:0:CAGATC +CTAATAAAAACCCTAAAAAATAAATCCTATCCTAATTCATTACCCACCATACCTTACAATACTAAAAATAACTACCCTACCTATTAAACCTCAAAAAAAAC ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIBFIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:18975:4834 2:N:0:CAGATC +CTACAAAAATCCCCCTACCTCAACCTCCCACAAATAACTATTACCAATAATTACTTTTAAATCTTTAAATTACAAAATTACCATTTATAAGATCGGAAGAG ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFBFFFF +@HISEQ:105:C2UE1ACXX:3:1101:4984:5374 2:N:0:CAGATC +AAAACACTTTTCATAAACGACCCACTCTAAAAACAAACACCTCAACTAATACTTATTATCATTTTCCAAACTAATACTCAAAACCATTAACTAACAAATAA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:14432:5660 2:N:0:CAGATC +TTACAAAAAAAAATTTTTTAATTAAATAAAAATAAAAATAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATCCTTTTTTTTTTTTTTTTC ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFB<BBBFFB777<B###################### +@HISEQ:105:C2UE1ACXX:3:1101:17410:5545 2:N:0:CAGATC +ATATCCAAACCTCTAATAAAATAAAACCTAAATAAAACTAACATCTAAAATAATTAAATATACTCAAATTTCACTAAATAACCCTAACAACTTCAAAAAAA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIFIIIIIIIIIIIIFFFFFFFFFFBFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:12336:6058 2:N:0:CAGATC +TAAAAAAAATCACTCTTCCAAAAATCACTCAAATCCACAAATACAAAAACTTTCTAACCACACACCTAAAAAAAAAAAAAAAAAAAAAAAACACACACCCT ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFB########## +@HISEQ:105:C2UE1ACXX:3:1101:5999:6265 2:N:0:CAGATC +ATAAAACATTATATATAAAAACAACCCAATATCCAAAAACCCTATCAACCACAAAAACTAATAAACAAAAAATAAATTCAAAAAAAACACAAATATATAAC ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:5836:6568 2:N:0:CAGATC +AAAACTACCTATAAAAAAAACATACCAACAACCTTTCAAATACAAAACTCCGTAAAATCTATACTATCTAACAATACCCTAAATCAAAAAAAAAACCAATC ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBBFBB +@HISEQ:105:C2UE1ACXX:3:1101:15437:6678 2:N:0:CAGATC +CACCATAAACATACTCTACATCTCTACCTCCACTATTCAATTTTCAAAAACAAAAACTACCAACATTTAAAACTCAAATATCATAAAAAAATCTAAACATA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIIIIIIIIIIFIIIIFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:12432:6996 2:N:0:CAGATC +TAAAAAATAAAACTATATAAAATATACAAAATAAAATAAAAAAAAAACAACCTTTAAAAATAACACTTATCTAAAATTACAACTTCAAATATTACTAAAAT ++ +BBBFFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFBFFBFFFFFBFFFBBFFFFFFFFFFFFFFFFFBFFFFFFFFFF< +@HISEQ:105:C2UE1ACXX:3:1101:19878:7022 2:N:0:CATATC +TACAAACGCAACGTAAACTCAATATACCAAAAAACCCTACGCCCAACCACACCCCTAAATCGAACATAAATCTACACAAACTAATAAACAAAACACAAACT ++ +BBBFFFF'BFFB'<BFIIFFIIIIIFFFIIIIIIIIIIII07BFIIIIFFFFFFBBBBBBB'7B7B<BBBFBBBB00<BBBBBBBBBBB00BB7BB'0BB0 +@HISEQ:105:C2UE1ACXX:3:1101:6837:7574 2:N:0:CAGATC +AAATCTTTTCATTCAATATAACATTTTAACAAAACTATATTATTACACCCATAAAAACCAAAACTAACTAAACCCCAAATAAAAAACAACCTAACCTCTAA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:15853:7717 2:N:0:CAGATC +CTCAAAAAAATAAAAAAAAAATCCTTAACAAAAAACAATATACTAAACACTAACACCCACAACTACGATTAATTCTACCTTCACCCACCTTAAACACTTAT ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFF<BFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFB +@HISEQ:105:C2UE1ACXX:3:1101:2440:7931 2:N:0:CAGATC +TATAAATCTTACTAAAACTCCCCACATCCTATACCTCTCATCTCCATACCTTTCTAATATCTATAACACTTTTCAAATAGATCGGAAGAGCGTCGTGTAGG ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIBFFFIFFFFFFFFFFFFFBBBBB<BBF +@HISEQ:105:C2UE1ACXX:3:1101:6407:7896 2:N:0:CAGATC +CTCCTAACTATAACCCTCTAATAACTCCCAATACATATCCTCAACCCAAACCTATAAAAGATCGGAAGAGCGTCGTTAGGGAAAGAGTGTAGATCTCGGTG ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIBFFIFFIIBFBBBFBFBBFFF<BFFBF<00770<BBFFB007 +@HISEQ:105:C2UE1ACXX:3:1101:19497:7952 2:N:0:CAGATC +TCCACTTAACCATAAACCTCATAAACCTCTAAATACATTTATTTAAACTTTTAAACATCTACGTATATATCGGAAGAGCGTCATGTATGGAAAAAATGTAA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII'7FFI'7BF0'0B########################## +@HISEQ:105:C2UE1ACXX:3:1101:3853:8232 2:N:0:CAGATC +CTACCGACAAACTAAATAAACAACAACTCAAAAACAACTCCTAAAAACCTCTAAATACAACAACAACCTAACCAGATCGGAAGAGCGTCGTGTAGGGAAAG ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFBFFFBBFFFFFFFBFFBBBBF<<7BB# +@HISEQ:105:C2UE1ACXX:3:1101:7442:8079 2:N:0:CAGATC +CAAAATATATCAATCAAAAAAACATCCAAAACCGAAACTCCAAACAACAAAAAAATAATAAAATACTACCCATAAAACCTAACCCTCAATATACCCCGACA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFBFFF +@HISEQ:105:C2UE1ACXX:3:1101:14324:8171 2:N:0:CAGATC +AATAACCCTAAATTCCTCTTCCTCCCTCACCATAACTAACATAACACGTCCTACCAAAAACAATCCCCAACAAACTATCACTCACACACACACACACACAC ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIFFFFIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:3147:8365 2:N:0:CAGATC +AATCTTTAAAAAAAACTTTCAAAACAAAATATAAAATTTCAAACTAAAAACAATAAAAACTAATAAATAAATATCTAAAAACGAAAACCAAAGAACAAAAA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIFIIIIIFIIIFIIFFIIIIIIIIIIIIIIIIIIIFFBBBBFFFFFFFFFFFFFFFFFBBFFFFFFFFBFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:7150:8384 2:N:0:CAGATC +TCTAAAATAAAAATACCACTCACTCTATCCTAAATCCTTAAATACTTTCCTTCCAAATACTAAAAACCCTCCTAAACAATCCACTATAATAATAAACAATA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:11512:8610 2:N:0:CAGATC +AACCTTACGCACGCCCTACTCGCACTTCCACGCATAAACCAAAACCTTATAAATAAACAACATCTCCTACACTTCTACAAAAAAAAACGAAAACCCCGACC ++ +BBBFFFFFFFF<BBFFIFIIIFFFIIIIIIIIIFFIIIIIIIIIFFFFFFFFFFFFFBFFFFBBBFFFFFFFFFFFFFFFFFFFFFF<<BFFF77B<BBB7 +@HISEQ:105:C2UE1ACXX:3:1101:18380:8734 2:N:0:CAGATC +ATAAAAAAAAAAAAAAAATTTTTTAAATAAAAAAAAATAAACAAAAACAACTAAACAACAACACAAATCCAAACATCCAAACTAATACCTAACTCTATAAA ++ +BBBFFFFFFFFFFIIIIBFFFFFBFFBBFFFFFFFFB0BBB0BBFFF7<BB'7BB0BB0B<770BBB0<<BFB<B'<0BBBB0BB0<BB<B<BBB<B<BBF +@HISEQ:105:C2UE1ACXX:3:1101:18629:8512 2:N:0:CAGATC +CCCCCAATAAAAAATAAACCCAAACCTAAACAAACAAAAAAACTCAAAAAATTAACTCTAAATATCAATCCTAAAAACAACTTACTTTCTTTTTTTTTAAA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFIIFF<FFFB<B<BBF<BBBBFBBBBBFFFFBBBBBBBFBFFFFFFFFFFFFBBF +@HISEQ:105:C2UE1ACXX:3:1101:5146:8981 2:N:0:CAGATC +ATCAATACAACTCATACTAAACTCACCCCAAACCTTCTAAAAACAACAAAATCAACAAAAAAATTATAACTACACTAATACAAAACAAACAACAACAACTA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:3344:9104 2:N:0:CAGATC +CCAAAATAAAAAAACAAATAAATACTAAAATTCATATACCTCTTATCTCACTATAAATCACCTAAATTATTATCGTCCGCTTACAACTATACTCTCTCCTA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:8326:9445 2:N:0:CAGATC +CTAAAATAATATCATCCTCCAACATCAAACCTACCAAAAACATAACTTCATTTTCAAAAACAATACCTTCCAAATAAACTTTAATTTCCTAACCAATCACT ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:2097:9536 2:N:0:CAGATC +ATAACAAAAAATAATTTAAAAACAATAACAACAAAAACTCCCACCCCATCCCTATCCCAAAATTCCCAAAAAACCAAACTTAAATAAAATCCCATTAACTA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:16351:9944 2:N:0:CAGATC +TATCAATTCATAATCACTCAAAATAAAAAATAACTAATTTAACAACCAACAAACTTAATCCCAACACAACCAAAACAAACAATAAAAATAAATCCCACAAT ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFB +@HISEQ:105:C2UE1ACXX:3:1101:17628:10309 2:N:0:CAGATC +CCCGAACACACGAAAAAACAAAAAATAATAAAAAATAATCAACAACCAAAAACGCAAATTCAAAACAAATCTAAACATACAAGATCGGAAGAGCGTCGTGT ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFBFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFBBBBF< +@HISEQ:105:C2UE1ACXX:3:1101:1440:10515 2:N:0:CAGATC +ATATTATTAATCATAATATTTATCACAAAAATAAAAACCTACCTAACTCACCAAACACAAAAAATTAAACTAAAATAAAACATAAATCATAACCTAAATAA ++ +BBBFFFFFFFFFFIIIIIFIIIIIIIIIIIIIIIIIFIIIIIIIIIIIIIFIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:11787:10735 2:N:0:CAGATC +ACTAACTAACACACTTACTAAATACCAAACACAATACCCCATAAAAAAAATATTCTACTTAAAAATTCAAAAAAATCTAATAACTACTTATATCTAAAAAT ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFB +@HISEQ:105:C2UE1ACXX:3:1101:10231:11124 2:N:0:CAGATC +CTCTCTAACCCAAAAATCCTTATATCCATCTACAAACCTCCTACACCCTCTAATAACACAAAATAATACTATCTCCTAACATCCTACAAAAACATTTCACA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:12958:11220 2:N:0:CAGATC +TTTAATTATAAACTTAACGAAAACCTAAATTTCGAATTACCTAAATCAAACTAACCTATTAAAAAACGAAAAAAAAAAAAAAAACAATTAATAAAATTAAT ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIIIIIFFFFFFFFFFFFFFF#################### +@HISEQ:105:C2UE1ACXX:3:1101:9833:11396 2:N:0:CAGATC +CCTATATCATTCTTACTATAACACACAAAAATCAAAAAACAACTACCTTAATCCCAAAAATAAAAACAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFBBFBFBBBFFBFFBBB#### +@HISEQ:105:C2UE1ACXX:3:1101:13437:11566 2:N:0:CAGATC +AAACTCCTAAAATAACCTAAACCAACAAACTATTCTCAATAACAAAATCTAAAAAAAAACCAAAAAATAAAAAAATAAAAAAAACAAAAAAACAAAAATCA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:12514:13081 2:N:0:CAGATC +AAAAATTCCTAAAAACCTCTAAAAACCAACTCCTACAAAAAAAACAACCCACAAAACAAAAATCAACTCCCCAAACCTTAACTTAAATTTAAAATCAAAAC ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF< +@HISEQ:105:C2UE1ACXX:3:1101:18902:13084 2:N:0:CAGATC +ACCAAAACTATCAAAATAAAAAAAAACTAAAAATCTCCATTCCAAAACCAACACCTATCTCTACTATAATATTTCACAAAACCCGACTCCTAGATCGGAAG ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFB +@HISEQ:105:C2UE1ACXX:3:1101:3574:13312 2:N:0:GAGATC +TACCTCTCTCTCCAATAAAACCAAAAAATTCCAACAAACATTACAATACCCATTCCATTCTAACCAACTCAAATTATAAAACAAAAACCTGATTACCTAAC ++ +BBBFFFFFFFFFFIIIIIIIFFFIIIIIIIIIFIFFFIBFFFIFFIFIIFFFBFFFIIFFFBFFIFFIFFFFFB<BBFFFFFFFFFF<<<7BBBFBBBBF< +@HISEQ:105:C2UE1ACXX:3:1101:5163:13594 2:N:0:CAGATC +ATAAAAAAAAAATAAATACTTAAACTAATAAACTAAAAATAACCATTACCATATTAACTATATATTCCTCTAAATAACCCATTTCTAACCCACAAAGATCG ++ +BBBFFFFFFFFFFIIIFIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFBFFBFFFFFFFFFFFFFFFFFFFFFBBBFB +@HISEQ:105:C2UE1ACXX:3:1101:10809:14594 2:N:0:CAGATC +ATATAATACTATACTTCTTAATAAACTTACTTAACATAAACCATCAACTTATAAAAAACCTCCTAATCACAACTATTACTTTATTATCTTATTTCTCTTCT ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:7778:14963 2:N:0:CAGATC +AAAATCCTCATAAAACACCCTCAAATAACCCAAAATAAAAAAACAAATAAATACTAAAATTCATATACCTCTTATCTCACTATAAATCACCTAAATTATTA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:2575:15554 2:N:0:CAGATC +TTAAAATAAAAATTTCATACATATTCAATATCTTAATAAAATTATAAAAAAAACCAAAACCCTATAACCTCTCTAAAAAAAAAAAAAACCAACCCAAAAAA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFB7'77<B'7BBB## +@HISEQ:105:C2UE1ACXX:3:1101:20234:16099 2:N:0:CATATC +ACTTTTAACCAAACACTAAATCTCCCACACTACCCAAATATAATACCACATACCTATAATTCCCAAAAATTAAACCATTAACCTCAACTATCATTTTTACC ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:20842:16102 2:N:0:CAGATC +CATCTAATTTAAAATAACCCAAACTACCTCGTTTCCTTAACTAAAATAAATTTTAAAAATTTCCTACAAATTCCATATTCTAAAAATAAACAACACATTAA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIII<FFIIIIIIIIIIIIIIIIIIIIIIIIIIFFIFIIIIIIIFFFFIIIFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:21194:16434 2:N:0:CAAATC +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA ++ +BBBFFFFFFFFFFIIIFFFFFFFFFFFFB77BFFFFFFFFFFFFFFFFFFFFBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFBBFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:7164:16848 2:N:0:CAGATC +AAAAAAAATAGTATACAACATAAAAATTTATTCTTCACTAAAACCCCTTAACCAATACCAACGTTTCCACAAAATTCTACCCTCTACAAAAACAACCTATC ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFIIIIIIIFFFFBFFFFFFFFFFBBFFFFFFFFFFFFFFFFFFBFFBBBB +@HISEQ:105:C2UE1ACXX:3:1101:2125:17112 2:N:0:CAGATC +AAACTCTCTACTAAACAACAATCCCAACCCCACAACACCTTAAACATACTCTACTTCTCTGCCTCCACTATTCAATTTTCAAAAACAAAAACTACCAACAT ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIFFIIIIIIFFFIIIFFFFFFFFBBBFFFFFFFFFFFFFFFFFFFFFFFB +@HISEQ:105:C2UE1ACXX:3:1101:6615:17184 2:N:0:CAGATC +AAAAATAAATTTATTTCAACTACTATTATTCATATACCTCTATAAAAAAACGTATTTTTATAACTTATAACTTATCCTTATAATTAAACACCTTACTCATA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:12454:17532 2:N:0:CAGATC +CAACTCAAACTCATTTTAACTTCTCCTCCACCTAAAAAAAAACAAAACCCCTAACTCTCAACTTAATACAAAAATCACATTTAACCAATAAAACACGAATA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFBFFBB +@HISEQ:105:C2UE1ACXX:3:1101:2584:17964 2:N:0:CAGATC +AATTCTCTCTCACTCAACTTCTTAACAAACTAACTCCAAAAAAAAACCCTACTTCTAAAACACTAATAACCTAAATAAACACCACAAAAACCCAAAAAGAT ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF< +@HISEQ:105:C2UE1ACXX:3:1101:9624:17911 2:N:0:CAGATC +CACAACAACAATAATTCCTCCTACAAATACCCTCATAAACATTTAAAAAAATAAATATATACTACCCCCTAATAATCATTACAAAATCAATACTAAACTAA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFBB'<BFBBBFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:19795:18124 2:N:0:CAGATC +CCAAAACTCCATCCCTATACCTCACCCCCACCCCCGCACTATCAACTTCCTTCATACGCCATTTATAAAAAACTAATCTCTTCAATTCAAATTTCTCTTAA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIII0BFFIIIIIIIIIIFFFFFFFF'7<BBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:2293:18893 2:N:0:CAGATC +CATCAAACAACCGAATCTTTAAAAAAAACTTTCAAAACAAAATATAAAATTTCAAACTAAAAACAATAAAAACTAATAAATAAATATCTAAAAACGAAAAC ++ +BBBFFFFFFFFFFIIIFIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFIIFFIFFFFFFFFFFFFBFBFFFFFFB<<<BBFBFFFFFFFFFFFFFF<BFFFF +@HISEQ:105:C2UE1ACXX:3:1101:12741:20220 2:N:0:CAGATC +AAAAAAAAATAATATACAACATAAAAATTTATTCTTCACTAAAACCCCTTAACCAATACCAACGTTTCCACAAAATTCTACCCTCTACAAAAACAACCTAT ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIFFFFFFFFFFBFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFB +@HISEQ:105:C2UE1ACXX:3:1101:18080:20193 2:N:0:CAGATC +CTCTACCAAACAAACCAATAATTAATTAATCTTACCTACCAACAAAAAAATTCAACCAACTTCTTAAAACCTACAAAAATCTACATACATCATTAATCATT ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:18680:20134 2:N:0:CAGATC +CAAAAAATTTCCAAAACCTACCCCAACCAAAAAAACAAAATAATCTAAATCATTTCAAACTAAATAACCAAAAACCTCTTAAAAAACACCATACTTCCAAA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFB +@HISEQ:105:C2UE1ACXX:3:1101:19451:20037 2:N:0:CATATC +ATTTACAACACGAATTACACACGTTTTTCCGAAACACTTTTCATAAACAACCCACTCTAAAAACAAACACCTCAACTAATACTTATTATCATTTTCCAAAC ++ +BBBFFFFFFFF'BFFFIIIIFI'BFFIIFF'BFFFFBFFIIIFIIIFFIIFFFIIBFBFFFFFFFFFBBBB7<BBBBBBBBBBBFBFFFBFFFFFFBFBBB +@HISEQ:105:C2UE1ACXX:3:1101:14848:20360 2:N:0:CAGATC +AACTACCGACATCCTGTATCTATACCTCAATCTATATACTCCTCTATAAAACAAACATAACAAAAACACCCTCGATCCTCCCTAATATCTTTTTTTATATA ++ +BBBFFFFFFFFFFIIIFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFB +@HISEQ:105:C2UE1ACXX:3:1101:20622:20397 2:N:0:CAAATC +CTCCAAATCCCCAAATTTAAACTAATCAAACACAAAAAAATCCCCAAAATATTCTTAATACATATTAAACTATACAAAACTCAATTTACTAAAATCTTTAA ++ +<BBFFFFFFFFFFIIIIIIFIBFFIFIIBFFFFIIIIFIIFFBFFIIIIIIIIIFFFIIIIFFFFFFFFFBFBFFFFFBB<BBBBBBBFBFBBBFBBBBBB +@HISEQ:105:C2UE1ACXX:3:1101:12295:20565 2:N:0:CAGATC +ATATCTAATAAAAAAACTTTCATACGTATACAAATACTCTAAAAATCCAAAACATTCAATCCCCTAAAACTAAAATAACAAATACTAAAAATCAAACCTAA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIFIIIIIIIIIIIIIIIIIIIIIIIFFIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:8987:20904 2:N:0:CAGATC +TACACACACACGCCAAACACACAATCCACCACCACCAAAACTCCATCGATAACACGCAAAACAACTATCACCTCTAAAAAAAAATCCACATAACCGAAAAA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIFFFIBFFIFFIIIIIIFFFIIFFIIFFFFFBFFFBFBBFFBB<BBBBBBBFFFFFFFFF<BBBFFBFBBB7<BFFF +@HISEQ:105:C2UE1ACXX:3:1101:11915:21129 2:N:0:CAGATC +CAAAAAATACTTCTTAACAAACAAATCCTAAAATTACCCACTCCTCATTATAAAACCAAAAATTACTTCACTATTTAACACATACTATTTCTAAAACGAAC ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:12442:21376 2:N:0:CAGATC +AACCTACCAAAAACAAACCACGCTAAAAAACCATAATAACTAATATAACTTTCACCTTATATTTTTTTATTTACTTACTTACTTTTTTATATTTTATTCCA ++ +BBBFFFFFFFFFFIIIIIIIIFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:10225:21919 2:N:0:CAGATC +ATAAAAAAATAAAAAAAAAAAAATAACTATACAAAATAACCCTCTAACCTCCTCATAAATATCATAACATATACACAACTACACACCACACACAAACTTAA ++ +BBBFFFFFFFFFFIIIIIIIIIIFFFB<<<B<BFFB<BBBFBBBBFFFFBBB7BBBBFF<B<<B<BBBB<B0<BBBBBBBBFFB<BBBB7<B<BBB<B<BB +@HISEQ:105:C2UE1ACXX:3:1101:12240:21986 2:N:0:CAGATC +TCCTAATTTCAAACTCTCCCTCCTACACAAAAAATAATTCTCCTATAATTCCTCCCTCAATCCCAAACCAAAACTACCCCCTATATTAATAACAATAGATC ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:2199:22213 2:N:0:CAGATC +AATCAAAAAACCAAAAACCTCTAATACCCACCCCTAAACAAACCTAAAAAATAACGCCGATATAAAACAATCCAATCAATAAAAAAATAAAAACAATAATA ++ +BBBFFFFFFFFFFIIIFIIIIIIIIFFIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:9023:22492 2:N:0:CAGATC +TAATAAAAAAATTCTTTCTTAATAACCCTAAATTCCTCTTCCTCCCTCACCATAACTAACATAACACGTCCTACCAAAAACAATCCCCAACAAACTATCAC ++ +BBBFFFFFFFFFFIIIIIIIIIIIIFIIFFIFFFFFBFFFIIIIIIFFFFFIFFIFFIFFIFFIIIF7<BB<BBFFFFBFFBBBBBBBFFFBFFFBF<<<< +@HISEQ:105:C2UE1ACXX:3:1101:13060:22287 2:N:0:CAGATC +ACTCATCACTAAAACGACCAAAAAATACATAAAATCACACCCGTAACTCCTTTATATTATAACTAAACTAACGCAATTAAAAATTACAACAAAAAATAATA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIFFFIFIIIIIIIFIIIIIIIIIIIFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:16084:22385 2:N:0:CAGATC +TTAAAATATATATCGTCAACACCAAAAAAACAAAATCAAACAAAACTCTCAAAACTATAAAAACACAAACAATCCCATTTAAAAAATCTTCCATAAAACTA ++ +BBBFFFFFFFFFFIIFIIIIIIIIIIIIIIFIIIIIIIIIIIIIIIIIIIFFIIIFIFFFFFFFFFFFFFFF<<BBFBBFFFFFBFBBBBFFFFFFFFFBF +@HISEQ:105:C2UE1ACXX:3:1101:7272:22581 2:N:0:CAGATC +CTAACTCCACCTATAAAAAACCTAAAAAATCGATCAAAATACTCCTCCTCCTTCTATTCCTAAAAAAAATAAAATTCTTTAAAAAATTCTTAATATCATTA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIIFFIIFIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:10060:23020 2:N:0:CAGATC +CAAAATAATATAAATCTAAAAATAAAAACACCATCCTTACTTCCTTTTAAAAAAAATACTCAAAACTATAAGACTACCCTTTCCTCTTAAAAACCTAATAC ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFBBFFFFFFFFFFFFFFFFFFFFFFBFFBFF +@HISEQ:105:C2UE1ACXX:3:1101:14440:23104 2:N:0:CAGATC +ATATTCTATAAAACACCAAAAATTCCTAAAAACCTCTAAAAACCAACTCCTACAAAAAAAACAACCCACAAAACAAAAATCAACTCCCCAAACCTTAACTT ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFB +@HISEQ:105:C2UE1ACXX:3:1101:6941:23338 2:N:0:CAGATC +CTTATTCATTCCATCGCCTATAACATAAAAACAAAAATAAACATTATCATAACAACCTATAATCAACACCCAAAACCAAATTCAACACACTATAACTCCTA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:10069:23622 2:N:0:CAGATC +CTTAAAATTCACTTCACATTAACTTTTAAATATTATTCCAATATATTTAAAATAACACTTTAAAAAAAACCTAAATTAAAAAAACTAACCTTCTACAAATA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:14079:24078 2:N:0:CAGATC +ATAAAACTAAACTTTCATATATTACTCTTAAATTTTTTTCCTAACTATAAAAAACTTTACAAAATACACCCTATTACTTTCAAACTACCAAAACTACCTAA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:12064:24631 2:N:0:CCGATC +AAAAAAACCACCATTCTACAATCCCTTAAAAAAACCCCCAATAAAAAATAAACCCAAACCTAAACAAACAAAAAAACTCAAAAAATTAACTCTAAATATCA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFBBBFFBFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:6662:24968 2:N:0:CAGATC +CTCCCATACTTAAAAAAATTAAAACCCAAAAACAATTCTACCCTCACAAATACCAAAAAAACGAAACACAATACCATAAACCTAAACAATTAAACGACACC ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFBFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:11630:24964 2:N:0:TAGATC +AAAAAAACATACCAACAACCTTTCAAATACAAAACTCCGTAAAATCTATACTATCTAACAATACCCTAAATCAAAAAAAAAACCAATCCTTAACATCCAAA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIFFIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFFFFBFBFBBFBFFFFFBFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:12594:24878 2:N:0:CCGATC +ATATAATATTACCTATTTATTTTTATTATTATTATTATTATTATTTATTTATTTATTCTAAACACCTCCCAAAACATTCCAACTTTCTTCATTAAATATAA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIFFFFFFIIIFFIFIIIFIIIIIIIIIIIFFFFIIIFFFFIIIIIIIIIIFFIIIIFIIFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:4483:25030 2:N:0:CAGATC +CCTAATACTCTATACTAAACAAATAACAAAAAACTTCCTCCTACCTCTTTAAAAAACCCATAAACTTACTCAAATACAAATATAACAAAATAATATAAATC ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFIIIIIIIIIFIFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFB +@HISEQ:105:C2UE1ACXX:3:1101:12198:25235 2:N:0:CAGATC +CACAAAAACAACATACCTCTTAATAATCTTTCAAACTTACACCAAATCTATTTCTATCCTCATCTTCCATTTATAAAAACCAAAACTATACAACCCAATCT ++ +BBBFFFFFFFFFFFIFFFFFFBFBFFBBFF<FIBFFIF<BFIIIIIIIIFFFFFBFBFBBFF<B<BFFFBBFIIIIFIIFFFFFFFFFBFBFFBBBBBBB< +@HISEQ:105:C2UE1ACXX:3:1101:20477:25084 2:N:0:CATATC +AAAACCCAACTCTTCCACCAAAAAAATTACTTTATCTCTAATACTCTTAAAATACCCTATATTATCCTTATACAAAAACACTTAATTAACTTCCTAATTAA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIFIIFIIIIIIIIFIIIIIIIIIIIIIF0<BFFFFFFFFFFFFBBBFFFFFFFFFBBB<<BB70 +@HISEQ:105:C2UE1ACXX:3:1101:5725:25359 2:N:0:CAGATC +ATAACTTAACTCTCTAATAACCTTAACTATTCCTCTCCCTAACCTCCACAAATACTATATATAATACTATACTTCTTAATAAACTTACTTAACATAAACCA ++ +BBBFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFFFFFFFFFFFF +@HISEQ:105:C2UE1ACXX:3:1101:5502:25591 2:N:0:CAGATC +TTCTTAATTCCATCTTTAAACAATAATATTAAAATATTACTTCCCAAAATACAAACCCATATCCATATCCAAAAACTAAACAACCTCTACACCTCAACAAA ++ +<<<BBBBBBBBBBBBBBBBB70B00B000<0BBB###################################################################
--- a/umi-tools_extract.xml Mon Apr 16 16:38:40 2018 -0400 +++ b/umi-tools_extract.xml Tue Jun 05 19:44:38 2018 -0400 @@ -8,13 +8,15 @@ @COMMAND_LINK@ umi_tools extract + --extract-method='$extract_method.value' --bc-pattern='$bc_pattern' + #if $input_type.type == 'single': #if $gz: --stdin=input_single.gz --stdout out.gz #else - --stdin='$input_type.input_single' + --stdin=input_single.txt --stdout '$out' #end if #else: @@ -24,8 +26,8 @@ --stdout out1.gz --read2-out=out2.gz #else: - --stdin='$input_type.input_read1' - --read2-in='$input_type.input_read2' + --stdin=input_read1.txt + --read2-in=input_read2.txt --stdout '$out1' --read2-out='$out2' #end if @@ -34,6 +36,13 @@ --bc-pattern2='$input_type.barcode.bc_pattern2' #end if #end if + + #if $barcodes.use_barcodes.value == 'yes': + --filter-cell-barcode + --whitelist='$barcodes.filter_barcode_file' + '$barcodes.filter_correct.value' + #end if + #if not $prime3: --3prime #end if @@ -57,12 +66,49 @@ ]]></command> <inputs> <expand macro="input_types" /> + + <conditional name="barcodes" > + <param name="use_barcodes" argument="--filter-cell-barcode" type="select" label="Use Known Barcodes?" > + <option value="yes">Yes</option> + <option value="no" selected="true" >No</option> + </param> + <when value="no" /> + <when value="yes" > + <param name="filter_barcode_file" type="data" format="tsv" label="Barcode File" /> + <param name="filter_correct" argument="--error-correct-cell" type="boolean" truevalue="--error-correct-cell" falsevalue="" checked="false" label="Apply correction to cell barcodes?" help="This only applies if your barcode file has two columns output from the umi_tools whitelist command." /> + </when> + </conditional> + + <param name="extract_method" type="select" label="Method to extract barcodes" > + <option value="regex">Regular Expressions</option> + <option value="string" selected="true">String</option> + </param> + <param name="bc_pattern" argument="--bc-pattern" type="text" label="Barcode pattern for first read" help="Use this option to specify the format of the UMI/barcode. Use Ns to represent the random positions and Xs to indicate the bc positions. Bases with Ns will be extracted and added to the read name. Remaining bases, marked with an X will be reattached to the read."> + <sanitizer invalid_char=""> + <valid initial="string.letters,string.digits"> + <add value="!="/> + <add value="-"/> + <add value="_"/> + <add value="."/> + <add value="?"/> + <add value="<"/><!-- left triangle bracket --> + <add value=">"/><!-- right triangle bracket --> + <add value="["/> <!-- left square bracket --> + <add value="]"/> <!-- right square bracket --> + <add value="^"/> <!-- caret --> + <add value="{"/> <!-- left curly --> + <add value="}"/> <!-- right curly --> + <add value="("/> <!-- left parenthesis --> + <add value=")"/> <!-- right parenthesis --> + </valid> + </sanitizer> </param> + <param name="prime3" argument="--3prime" type="boolean" label="Is the barcode at the 5' end?" truevalue="1" falsevalue="0" checked="true" help="By default the barcode is assumed to be on the 5' end of the read, but @@ -126,17 +172,37 @@ <output name="out_log" file="out_paired.log" lines_diff="16"/> </test> <test> - <param name="type" value="paired_collection" /> + <param name="type" value="paired_collection" /> <!-- same as before, but uncompressed --> <param name="input_readpair" > <collection type="paired"> - <element name="forward" ftype="fastq.gz" value="t_R1.fastq.gz" /> - <element name="reverse" ftype="fastq.gz" value="t_R2.fastq.gz" /> + <element name="forward" ftype="fastq" value="t_R1.fastq" /> + <element name="reverse" ftype="fastq" value="t_R2.fastq" /> </collection> </param> <param name="bc_pattern" value="NNNXXX" /> <output name="out1" file="out_R1.fastq.gz" decompress="true" lines_diff="2" /> <output name="out2" file="out_R2.fastq.gz" decompress="true" lines_diff="2" /> - <output name="out_log" file="out_paired.log" lines_diff="16"/> + <output name="out_log" file="out_paired.log" lines_diff="25" /> + </test> + <test> + <param name="type" value="paired" /> + <param name="input_read1" value="scrb_seq_fastq.1.gz" ftype="fastq.gz" /> + <param name="input_read2" value="scrb_seq_fastq.2.gz" ftype="fastq.gz" /> + <param name="extract_method" value="string" /> + <param name="bc_pattern" value="CCCCCCNNNNNNNNNN" /> + <param name="use_barcodes" value="yes" /> + <param name="filter_barcode_file" value="scrb_seq_barcodes" /> + <output name="out2" file="scrb_extract.fastq.gz" decompress="true" /> + </test> + <test><!-- same as above but with regex barcode--> + <param name="type" value="paired" /> + <param name="input_read1" value="scrb_seq_fastq.1.gz" ftype="fastq.gz" /> + <param name="input_read2" value="scrb_seq_fastq.2.gz" ftype="fastq.gz" /> + <param name="extract_method" value="regex" /> + <param name="bc_pattern" value="^(?P<cell_1>.{6})(?P<umi_1>.{10})" /> + <param name="use_barcodes" value="yes" /> + <param name="filter_barcode_file" value="scrb_seq_barcodes" /> + <output name="out2" file="scrb_extract.fastq.gz" decompress="true" /> </test> </tests> <help><![CDATA[