changeset 8:3519c2de7fac draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sr_bowtie_dataset_annotation commit e62066428e669516e024d081933ee8c7f953ba1b"
author artbio
date Sat, 09 Apr 2022 22:45:21 +0000
parents 3bddd7ab96e3
children 6bf9de09aa74
files sr_bowtie_dataset_annotation.xml test-data/unmatched_1.fa test-data/unmatched_2.fa test-data/unmatched_3.fa
diffstat 4 files changed, 1157 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/sr_bowtie_dataset_annotation.xml	Sun Oct 24 23:52:11 2021 +0000
+++ b/sr_bowtie_dataset_annotation.xml	Sat Apr 09 22:45:21 2022 +0000
@@ -1,4 +1,4 @@
-<tool id="sr_bowtie_dataset_annotation" name="Annotate smRNA dataset" version="2.5.0">
+<tool id="sr_bowtie_dataset_annotation" name="Annotate smRNA dataset" version="2.6">
   <description>by iterative alignments with sRbowtie</description>
   <requirements>
         <requirement type="package" version="1.3.1">bowtie</requirement>
@@ -59,9 +59,11 @@
             remaining=\$(( \$(wc -l < class_unmatched.fa)/2)) &&
             echo -e "$sample\tNot classified\t\${remaining}\t\${genome_aligned}" >> $output &&
         #end for
-        
-        
         Rscript $__tool_directory__/barplot.r --input $output --barplot $barplot
+        #if $format == '-q':
+            && mv class_unmatched.fa class_unmatched.fastq
+            && sed -n '1~4s/^@/>/p;2~4p' class_unmatched.fastq > class_unmatched.fa
+        #end if
         ]]></command>
   <inputs>
     <param name="input" type="data" multiple="True" format="fasta,fastq" label="Input file: reads clipped from their adapter" help="Only with clipped, raw fasta or fastq files"/>
@@ -94,6 +96,7 @@
 <!-- End of other bowtie index selections -->
    </inputs>
    <outputs>
+       <data format="fasta" name="unmatched" label="Annotate smRNAs: Unmatched reads" from_work_dir="class_unmatched.fa" />
        <data format="tabular" name="output" label="Cascade Annotation Analysis">
            <actions>
                <action name="column_names" type="metadata" default="Sample,Reference Index,Number of reads, Total reads" />
@@ -110,6 +113,7 @@
             <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" />
             <output name="output" ftype="tabular" file="sample1_output.tab" />
             <output name="barplot" ftype="pdf" file="sample1_output.pdf" compare="sim_size" delta="500"/>
+            <output name="unmatched" ftype="fasta" file="unmatched_1.fa" />
         </test>
         <test>
             <param name="input" value ="sample.fastq" ftype="fastq" />
@@ -119,6 +123,7 @@
             <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" />
             <output name="output" ftype="tabular" file="sample_output.tab" />
             <output name="barplot" ftype="pdf" file="sample_output.pdf" compare="sim_size" delta="500"/>
+            <output name="unmatched" ftype="fasta" file="unmatched_2.fa" />
         </test>
         <test>
             <param name="input" value ="sample5.fa,sample4.fa,sample3.fa,sample2.fa,sample1.fa" ftype="fasta" />
@@ -128,6 +133,7 @@
             <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" />
             <output name="output" ftype="tabular" file="multisample5_output.tab" />
             <output name="barplot" ftype="pdf" file="multisample5_output.pdf" compare="sim_size" delta="500" />
+            <output name="unmatched" ftype="fasta" file="unmatched_3.fa" />
         </test>
     </tests>
   <help>
@@ -172,7 +178,16 @@
 
 **OUTPUTS**
 
-**Annotation table in a tabular format**
+**- Annotation table in a tabular format**
+
+**- Pie Charts of class abundances**
+
+**- Unmatched reads in fasta format**
 
   </help>
+
+  <citations>
+    <citation type="doi">10.1038/nature11416</citation>
+  </citations>
+
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unmatched_1.fa	Sat Apr 09 22:45:21 2022 +0000
@@ -0,0 +1,530 @@
+>25207
+TCGCACTAGCTTCGGTCGTCTCGGA
+>25229
+TCTGTTCGAGGTTCTTTATTTGAA
+>25283
+TAAGGGAGCGAGATCGTTCGATGCA
+>25363
+TAGTGTTGGGTGTAGGCCAGGGAG
+>25376
+TCCGCCGACAATGTTGCAACATGA
+>25435
+TTTCTTCAAGCTGCGCGTTTTTCGG
+>25593
+TCGAAAACTTCTTCGCACTCTTCGTT
+>25660
+TCTCGCTGCGTGAACGATGAAGGC
+>25968
+TTTGCTAGGACTGCTGCATAAG
+>26220
+GCCCGTGTCGATGAATGCTTCAAACT
+>26261
+TATTATCGTCCGCTGCTAAACTGC
+>26543
+CTTATGTATAAAAAGCTCTGATGGA
+>26610
+TCCTTAAGTTCGTTGATTTGGGCT
+>26675
+CAATATCGTCAACATCCTCGAACGAT
+>26873
+TTGGGAGCTGAATCCCGTTACGGTA
+>26978
+CGCGCATTAGCTCAAATCTAGTTGGA
+>27220
+TGTTTATTTGTCAAGTTTAGATAATA
+>27397
+TAGTTCACAGCTGTATGTCCAGATGGGT
+>27869
+TATGGTCCAGAATGTAGCCTCGGC
+>27878
+TATTAATCGGGCCACAAATATCGGTA
+>28035
+GGTATTCTTTGCGAGGTCGTCCTGG
+>28154
+AAAGACGAGAACGCGTATATGTGTGC
+>28319
+TGACCAACCTTAAAAGATCGGGGT
+>28387
+TATTACTATTTCTAAGCTTTGTTTAAA
+>28594
+TTTTGAGGTTGGTCAAGAAGTTGTT
+>28608
+TAAGATTGAAAATTACTGTGGAGT
+>28668
+TACAGATTCTAGAGACAAAGACGC
+>28674
+TAAGTACATGCGCCCAGCCGCCGTGA
+>28836
+TCTGGTTAAGGTCGGAATACTCGTCT
+>29056
+TGCTTTACATACCCTTTGGTGCCC
+>29323
+TTTTGCTGCAGAGCTTCTTCCAACGT
+>29342
+TATGTATGGATATGTATATTTATGGT
+>29387
+TGATAATCGACCTCTTCCATCGTTGT
+>29423
+TCTAAGAACTTCTGAGGTGAAGG
+>29462
+TGCATTTCAATCGGAAGAGTACTCTG
+>29492
+TAAAACACAAATCTCGACATACAGA
+>29703
+GTACAGGTTCTGATGACAATG
+>29785
+TCAGATGAAAGACAAATTAGAATT
+>30053
+TAAGAACTTCTGAGATGAAGGGC
+>30096
+TGGATATTGAATGTTTTTGATTTGC
+>30136
+TGTAGTCGTCGTATGTCCGGA
+>30272
+TGCATTCGTGGATTCGCATTCGAGA
+>30497
+TGACAAAATATGCCCTTCACCTCAGA
+>30787
+TAACTGTTTTTAGTTCAAAGTCTCGGA
+>30836
+TAAATATTTTTTTTGAAACA
+>30977
+CGACTGTAATTATTAGCACAATACT
+>30985
+GACGATATTGCTGCAATAGACCTTGA
+>31000
+TCAGATGAGAGACAAATTAGAAT
+>31256
+TAATTCGGAATGCCTGCTCTACT
+>31417
+TCAATGATCGCTGTGCTCAGTAGGA
+>31506
+TTTGTCTGACGTTAAAAAATATA
+>31567
+TCCACATTAGGAGGATTATTAGACAAC
+>31790
+AAACATAATAATTGATGGCGGAAGA
+>31872
+AAGGTAATCATAGAGCACCACGGTT
+>32157
+TTTCTGTGAATTCACATGCTGATGA
+>32192
+TTTCTCATGTATAAAATGCTCTGATGG
+>32223
+TATCTTGTTATTCTAGTGTCTTTGGTT
+>32338
+TGTGGGACTCGAGCCAAAATGGCAACCT
+>32497
+TGCGGTTGGACAATTTTTTTTTTATA
+>32506
+TTGTTGTTTGGAGGAAGTTCCTTT
+>32510
+TCTTCCGCCATCAATTATTATGTTTT
+>32522
+TACTTGACTTTTCTATAGAATCTGGT
+>32540
+TATCGTCAACATCCTCGAACGATCGAGA
+>32626
+TTGATCAAGGTAGGGTTGTCGC
+>32646
+TCTGTTAAACACCCCTGAATCGTGGAT
+>32657
+TTTGGACATTTTGCAGGTGATACAAT
+>32682
+TAACTGTTTTTAGTTCAAAGTCTCGGA
+>32716
+TATCTATAGTTCCGATTGGCCATCTC
+>32885
+GAAAGTGGGTATCTGTATTTTAGGC
+>32967
+TAAAGATACCATCTAACCTCCTTGGA
+>33075
+ACAGATATTAGTGTTTTTCAAGCAGC
+>33147
+ATCGCAATTATGGCATAACAGATTCGGA
+>33163
+TTCAGCGTCGCTTGATTGAATAGAT
+>33231
+TAAGAACTTCTGAGGTGAAGGGC
+>33242
+TTGGGTTTAGAAATTAAAATTAAGGC
+>33244
+TATAGAAAGTTACGAATATATTAGGA
+>33248
+TTTTTGATCAATTGGCACCGTGCGAA
+>33296
+TGCAGAGCTTCTTCCAACGTTGGCAAG
+>33318
+TAGATGTCTGCAGGAATAACGGA
+>33338
+TCGACTATTAATGGCTGTTAGAATT
+>33475
+CAAACTTATCGACCATCTCCTCAAACG
+>33601
+TTACCCTTTTTCCGGAGCGTTTGTGC
+>33611
+AATGATCGCTGTGCTCAGTATGACGG
+>33662
+TTTGATTCATTACAATTTACGCTGAA
+>33737
+CAGATATTAGTGTTTTTCAAGCAGC
+>33747
+TACATGTAAAGCAGCTGTGTGTGC
+>33815
+AGTTTTTGGAATCACTTGA
+>34032
+TGCGGACGTGTGCTCGCTGCGTGA
+>34200
+TAATATATGTATAATCTGCTTGGTG
+>34240
+TCTTTACGCCATATAAATCATTTCGA
+>34259
+ACAAATCATAAATTTGATGGGACGA
+>34268
+TCTTGTGGACTTCACTCTAG
+>34297
+TAAGTAAATAGTCCCCGCCTTATTGAGG
+>34337
+TGGATAATGAATGTTTTTGATTTGC
+>34569
+TCTAGGTTCTTCTGAAATCGTGGGA
+>34643
+TCGTGTAGACCGGATAAGATTTTTT
+>34697
+TCTAATACTGTGAAAGGGTGGGG
+>34808
+GTAGGATGTGCTCTGCGGTTTCCAC
+>34892
+TCTGTCGCAGTTGTAGCTTGCAATA
+>34896
+TGTAGTTGCCACTTATGCTGTCCA
+>35151
+TAGCAATGTCCGTCTGTCCGTATGA
+>35196
+TGTAAATGGTCAGCGAAAGCAAAGG
+>35211
+CAGATATTAGTGTTTTTCAAGCAGCGG
+>35246
+CCTCAGAGAACGTCAGACCGCG
+>35272
+TGCAGAGCTTCTTCCAACGTTGG
+>35273
+TCTGGTAGTAAGAAAAATGTAGCTT
+>35286
+TATGTATGGATATGTATATTTATGGGT
+>35317
+TTTTGGTTTGATCGTCAGGTGGTC
+>35512
+GACGATAATAGTGAATTTTGGACA
+>35566
+TTCGAATTCGCGCCATTTCACAATC
+>35948
+TGGCCTGTATACGCTTTCTGTTG
+>36009
+TTGCAAAAGTCATATCTTGAGG
+>36059
+TAAGTTACTATGGATCCATAAGGGTA
+>36100
+TGATATGGGACTTGTAGCTTTTTTAAA
+>36121
+TGTCGTGGGCTGTGCGTTTGAGAA
+>36235
+TGCTACCTTTAGCTGCAAGATTAACT
+>36238
+CAGATATTAGTGTTTTTCAAGCAGCGG
+>36523
+TGCGAGTCCGAGCTTTGAACGTGGG
+>36607
+TCTCAATGTAATGTCTTCTTTTTGGA
+>36624
+CGTTCTTTAAAACCACCAATGGGA
+>36776
+TACTCACGACCATTGTCATTTCTCAAG
+>36825
+CGTTCGAGGTCCACTTTCTTAGCGGA
+>36850
+TCTGATGTCGGCGAGAAAGGAGTCTCA
+>36872
+TCAAGGCCAGCACACAGTAACATGGT
+>36968
+ACATTGGAAATACCGCGGGACCGC
+>36994
+TATACGTTTTTTGACCTCTTCTCTTTGA
+>37110
+TGATAATGTAAACAAAGATAAAGGG
+>37210
+TCAGATGAAAGACAAATTAGAATTAAA
+>37224
+GACGATAATAGTGAATTTTGGACA
+>37226
+TCTGAAGTTGCCGCACTAGAGATGG
+>37339
+TATACAGTCCACTATATCGTTGTTTAA
+>37395
+TACAATTTACGCTGAATTTAAATGAA
+>37460
+CACAAAGTAACGTGCACCACCATTT
+>37766
+TCTGTGCAACTTTGTACGCGAAGTCAGC
+>37836
+TCAAGGATTAATGTAGGGGGGGGG
+>38096
+TTTGGAGGTAGAAGTCTTAGTGGCCGC
+>38100
+GACAAAATATGCCCTTCAATTTAGA
+>38113
+TATGGTCCAGAATGTAGCCTCGGC
+>38213
+TAACTGTTTTTAGTTCAAAGTCTCGAA
+>38224
+TTTCAGTTTTTATTGTTAGTCACAGG
+>38263
+TAACTGCGAAGTCGATCAGGTCCGA
+>38266
+ATCCGGACGATTGACGAGGAGCCCATT
+>38271
+TTATGTCAGTGTCGAAGGCGATCGAA
+>38373
+TCTATAGCCTTGGCGTAGGAACTCGCA
+>38384
+GCTACAACTGCGACAGAAAATTCGGA
+>38468
+TATTATCGTCCGCTGCTAAACTGC
+>38491
+TGCATACTTCCGTTCTCTTTTCGGGA
+>38673
+TTGTGTATTGTGATTCTGATTCGTG
+>38745
+TCCCTGGCCCGCTAGACAGCAGGA
+>38784
+TGTGCTAATAATTAAAGTCGACTGA
+>38873
+TGTCATACTTTCGTCAAAA
+>39154
+TAATAGTCAGGGCGCGAATTTTTAAAA
+>39325
+TTAGGACTTATTGAACTTTACGGTA
+>39359
+TATGCAAATCAAGTGTGACCGTAGCT
+>39450
+TTAAACAACGATATAGTGGACAGTA
+>39453
+TCTGCCGGATTTTGATCCAATCAAGG
+>39819
+GTGGAATTGAAAAAGAACCAGACACA
+>40040
+TTTTTGGAACTACCTGAGTCGGTT
+>40183
+TAACACAAAGCAGTATGATTTAATAAT
+>40189
+CAGCAAGCTGAGATGTACATTAGTATA
+>40374
+TACGTTTTCTTGCAGATCAAAAA
+>40445
+CGTTCTTTAAAACCACCAATGGGA
+>40467
+TACGCAGATTCCTGGGAGTTACAGGA
+>40488
+TGATTTGGGCTTGCATACTTGTACT
+>40797
+TATATTCGTGTTCATGTGTGAACAGC
+>40831
+TCTAAGAACTTCTGAGGTGAAGGGC
+>40951
+TTCTTCGTAAGTCAAAATAGTGTCGCC
+>40988
+TATGATTGATTGCTTGAGAGT
+>40998
+TGATAGAGCTGCATTTGAATTAACGG
+>41061
+CTCTTTCCGCTCACTCCCGCTGAGA
+>41076
+TGCATTAAGAAGATTTAGGATCC
+>41140
+TCAAGGATTAATGTAGGGGGGG
+>41143
+CAATAGCGTCGCTGAGTAACAGTG
+>41162
+TACGGATTGCAGCGGCTAG
+>41183
+TTTTTTGGCACACGATTTTTTGGACGT
+>41227
+TAATATATGTATACTCTGCTTGGTG
+>41265
+TTTATGATTTTTGGTAATA
+>41365
+TAGGAGGGTTCCACAACTATTTCGGGG
+>41837
+TAGTTTGACACTGTTTGGAGACGTGG
+>42053
+TCTGTGGTCGAATCGAAGGAGTGC
+>42113
+TAATAGATCGCTCACCTGTTCCTGG
+>42392
+CGGCATCGGAAAACTCCCAGCGGGGC
+>42504
+TAACGTTATTATTATTTGAAAATAGAA
+>42566
+TTTGTTGGGTAGGAACTTTACTGC
+>42668
+TGCTGCAGAGCTTCTTCCAACGTTGG
+>42751
+CCAAAGTCTGGTTGTCAGAAAATGTGC
+>42777
+AAGGAAGGAACCAAAGAAGCACAAACG
+>42804
+TAATTCTAATTTGTCTCTCATCTGA
+>42830
+TAGTATACTTATTAAGTCATTTGA
+>42857
+AGAGTATTCATCTTGAGGCGTGTG
+>42886
+TGAACAACGATTTATGTATATAAGAA
+>42899
+TACGATAATAGTGAATTTTGGACA
+>42936
+TTTGACTAAAAGTCGCTTGTTTTGGA
+>43050
+AATACATAACTCTGGACACAGGAGA
+>43219
+TACTTTCGTCAAAATGTTCAGGAGCT
+>43242
+TGACATGTCTATTTCCATGGGTTCGGA
+>43268
+GTGTATAAAAAAATTTATTGTTGAGCA
+>43299
+TCTGGATGATGGCTGATGCTCGTTG
+>43491
+TCTGATGACAATGAATTTTTTAGACA
+>43520
+TATTGTTGTAATTGCTGCCTCGGTTG
+>43654
+ATATGAACAAAGCAAAGACACTAGAA
+>43677
+CACGATTCATAACCCTCAGCTGAAG
+>43776
+GCTCCTGGCAACTCTGTGATGGAC
+>43903
+TATTTTGATGTTTAATGAA
+>43990
+TTTCTACTTCGTATTATTTTTATGA
+>44062
+AAACATAATAATTGATGGCGGAAGA
+>44071
+CACAGACGCAGTGGAAACCGCAGA
+>44078
+CGTGCGTCCGAGCAAAAGGTGGT
+>44187
+TTATGTAAGAATATTTGTCATTAGA
+>44239
+TATCAATGTTGACCGTAATACTCAA
+>44253
+TGGATATTGAATGTTTTTGATTTGC
+>44279
+TGTGATTTTCCCAATTTATATTAATACA
+>44374
+TAGTCGGAGTTGATGAGCTGCC
+>44412
+TCCGAATTTTCTGTCGCAGTTGTAGCT
+>44423
+TCGGCTTGGGTTTAGAAATTAAAAT
+>44558
+ACAAATCATAAATTTGGTGGGA
+>44637
+TTTTTTATCAATTGGCACCATGCGAA
+>44642
+TACTGTGTGCTGGCCTTGATGAAAGT
+>44688
+TAGATGTCTGCACGAATAACGGA
+>44945
+TGCTTCCGAGCAATCTACGTTGGTAAAA
+>45054
+TCTCATCTGACAATTTTTTAAAAGCGA
+>45088
+TCTGAAGCAGCGCTCACGGCAGAATGC
+>45203
+TGCCGGATTTTGATCCAATCAAGGGA
+>45263
+TATTGATTTTCCTATTTAGTTGAACA
+>45274
+AAAAGTCTGGATATTGTAGGATAGGA
+>45358
+CAACGCTGGACCTTGGACTCGAGGGC
+>45396
+CATCTTGTTATTCTAGTGTCTTTGGTA
+>45641
+GAACTAAGATCAGTCGACTGTAATTAT
+>45774
+CGACGAACTAGCAGCTCTGGTGT
+>45911
+CAGATATTAGTGTTTTTCAAGCAGC
+>45997
+TTAACCAGTCGGCGTTGTTTAAGTAGC
+>46031
+AAAGCGTCTACTTGAACAATGAGA
+>46054
+TTAGATCGTATTACTTGGGTGCTGG
+>46199
+TAAGCGTTAGGTCGTATTACTTGGGC
+>46255
+TAAGATTGAAAATTACTGTGGAGTAAT
+>46366
+TAAGGAATTGTCGGCCATTTAATGTGA
+>46387
+CAGAGCTTCTTCCAACGTTGGCAAG
+>46701
+TTGTATCTTTTTGCTTTTTATATT
+>46705
+TAACTGTTTTTAGTTCAAAGTCTCGGA
+>46783
+TCTCATCTGACAATTTTTTAAAAGCGA
+>46836
+TCATAAGGACAGACGGACAT
+>46892
+AAACATAATAATTGATGGCGGAAGA
+>47244
+TCTCTGTCCGCTCGCTTACGATGAGA
+>47290
+TTTCAGGAATGGGGTCGTCCCACTA
+>47348
+TGCAAAAGTAATATCAAAGACACTA
+>47359
+TTTAAAATTATAATAGTCAGGG
+>47427
+CATCTTGTTATTCTAGTGTCTTTGGT
+>47654
+TTAAACACTGAATTCGGTTTCGAAA
+>47656
+TTTCTGAAGAATCCTGTAACTCCC
+>47785
+TGTAGATGAGCGGCAAATGTGG
+>47827
+TTGATCAAGGTAGAGTTGTCGCGC
+>48184
+TTCAAGGATTAATGTAGGGGGGG
+>48576
+TTAACCCGGAGACTTGGGTGTGGGT
+>48764
+TGCAAAAGTAATATCAAAGACAATAGA
+>48782
+TGACAATGTAGTGAACGCCAGTGT
+>48893
+TATAAATGCCGTCTGATATTATTAAA
+>49017
+ACCGGATGTCATATCCAGCGTCGTGAA
+>49381
+TACAATGTAAATTCGTTTCTTCGATCA
+>49456
+TTTTGGTTTGATCGTCAGGTGGACGC
+>49484
+TCTGCTATCATTGACTCGATCATTGA
+>49569
+TCGATTGTATGATCAGTGGAAGTGGC
+>49602
+GAAATTGGCCAACATTAATTCGGAA
+>49628
+TACTTTCGTCAAAATGTTCAGGAGC
+>49715
+TTTGTCCGGGTGCTTCGAAAGAACTCT
+>49778
+CAATAGCGTCGCTGAGTAACAGTG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unmatched_2.fa	Sat Apr 09 22:45:21 2022 +0000
@@ -0,0 +1,78 @@
+>HWI-176
+CGAGACTTTGAACTAAAAACAGT
+>HWI-277
+TCAGATGAGAGACAAATTAGAAT
+>HWI-458
+TTTCGAGGTTCCGAATTTTCTGTC
+>HWI-778
+TAGGATGTGCTCTGCGGTTTCCACT
+>HWI-895
+TTTAATTGGCGCAGTCGGTAGGATC
+>HWI-967
+GAGAATGACGGAACTGTAATA
+>HWI-974
+TTGCGAGGTCGTCCTGGGAGACCAG
+>HWI-1009
+AACGTTGGAAGAAGCTCTGCAGC
+>HWI-1011
+TTTAATGTTAACACGGACATTGACC
+>HWI-1014
+TCTCCGACTTGCTGAGCTGTTTCCGCCG
+>HWI-1106
+CATCTTGTTATTCTATTGTCTTTGGTC
+>HWI-1202
+AAAGTAAATTCTGATGACTTCAAAAT
+>HWI-1227
+TACCATGTAAATTCGTTTCTTCG
+>HWI-1262
+TATTAGCTCAAAGAACAGCTCGT
+>HWI-1297
+TTGCAGCAATATCGTCAACATCCTC
+>HWI-1520
+TAATATCGAAGCCGAACTGAGAACA
+>HWI-1584
+TAAAGTTATGACAAGAATTGATGTT
+>HWI-1590
+TCATATGCCAATTTCGTGTTTCGATG
+>HWI-1609
+TAAGATTGAAAATTACTGTGGAGTAAT
+>HWI-1745
+TTCATCCTGCTGCCGGAGC
+>HWI-1816
+CTTCCGTTATTCCTGCAGACA
+>HWI-1845
+TAATATCGAACCCGAACTGAGAA
+>HWI-1852
+TGGACACCGTCGTTCCACTTGAACT
+>HWI-1887
+TCAAGGTAGGGTTGTCGCGTGTATTT
+>HWI-1892
+TATTATCGTCCGCTGCTAAACTG
+>HWI-1936
+TAGCTGCAACTTCTGAAGTTATGGC
+>HWI-1997
+CAAAACAAGAATTTTTCGCATGGTGCC
+>HWI-2055
+TAAACTAATTCTGTCGGTTTTCTGT
+>HWI-2075
+AGACTTTGAACTAAAAACAGTTACCT
+>HWI-2244
+TAACTTCTTCTATTTTCGTGCGGGA
+>HWI-2253
+TAACTGCCCTCGATCACGCCTTCCCGA
+>HWI-2277
+TGACGACGACAGTATCGCAAGACGGT
+>HWI-2409
+GTCAGATGAGAGACAAATTAGAAT
+>HWI-2418
+TAATATCGAACCCGAACTGAGAAC
+>HWI-2490
+TTTATGACAACCCGATCGGACCTCACTC
+>HWI-2557
+TGTCCGGAGATCAAGAAGTGTTGGCAC
+>HWI-2670
+TCAGATGAGAGACAAATTAGAATTAAA
+>HWI-2719
+TATTATCGTCCGCTGCTAAACTGCTG
+>HWI-2834
+CCTAGGAGAAGGAAGTCAAGAAGGCC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unmatched_3.fa	Sat Apr 09 22:45:21 2022 +0000
@@ -0,0 +1,530 @@
+>25207
+TCGCACTAGCTTCGGTCGTCTCGGA
+>25229
+TCTGTTCGAGGTTCTTTATTTGAA
+>25283
+TAAGGGAGCGAGATCGTTCGATGCA
+>25363
+TAGTGTTGGGTGTAGGCCAGGGAG
+>25376
+TCCGCCGACAATGTTGCAACATGA
+>25435
+TTTCTTCAAGCTGCGCGTTTTTCGG
+>25593
+TCGAAAACTTCTTCGCACTCTTCGTT
+>25660
+TCTCGCTGCGTGAACGATGAAGGC
+>25968
+TTTGCTAGGACTGCTGCATAAG
+>26220
+GCCCGTGTCGATGAATGCTTCAAACT
+>26261
+TATTATCGTCCGCTGCTAAACTGC
+>26543
+CTTATGTATAAAAAGCTCTGATGGA
+>26610
+TCCTTAAGTTCGTTGATTTGGGCT
+>26675
+CAATATCGTCAACATCCTCGAACGAT
+>26873
+TTGGGAGCTGAATCCCGTTACGGTA
+>26978
+CGCGCATTAGCTCAAATCTAGTTGGA
+>27220
+TGTTTATTTGTCAAGTTTAGATAATA
+>27397
+TAGTTCACAGCTGTATGTCCAGATGGGT
+>27869
+TATGGTCCAGAATGTAGCCTCGGC
+>27878
+TATTAATCGGGCCACAAATATCGGTA
+>28035
+GGTATTCTTTGCGAGGTCGTCCTGG
+>28154
+AAAGACGAGAACGCGTATATGTGTGC
+>28319
+TGACCAACCTTAAAAGATCGGGGT
+>28387
+TATTACTATTTCTAAGCTTTGTTTAAA
+>28594
+TTTTGAGGTTGGTCAAGAAGTTGTT
+>28608
+TAAGATTGAAAATTACTGTGGAGT
+>28668
+TACAGATTCTAGAGACAAAGACGC
+>28674
+TAAGTACATGCGCCCAGCCGCCGTGA
+>28836
+TCTGGTTAAGGTCGGAATACTCGTCT
+>29056
+TGCTTTACATACCCTTTGGTGCCC
+>29323
+TTTTGCTGCAGAGCTTCTTCCAACGT
+>29342
+TATGTATGGATATGTATATTTATGGT
+>29387
+TGATAATCGACCTCTTCCATCGTTGT
+>29423
+TCTAAGAACTTCTGAGGTGAAGG
+>29462
+TGCATTTCAATCGGAAGAGTACTCTG
+>29492
+TAAAACACAAATCTCGACATACAGA
+>29703
+GTACAGGTTCTGATGACAATG
+>29785
+TCAGATGAAAGACAAATTAGAATT
+>30053
+TAAGAACTTCTGAGATGAAGGGC
+>30096
+TGGATATTGAATGTTTTTGATTTGC
+>30136
+TGTAGTCGTCGTATGTCCGGA
+>30272
+TGCATTCGTGGATTCGCATTCGAGA
+>30497
+TGACAAAATATGCCCTTCACCTCAGA
+>30787
+TAACTGTTTTTAGTTCAAAGTCTCGGA
+>30836
+TAAATATTTTTTTTGAAACA
+>30977
+CGACTGTAATTATTAGCACAATACT
+>30985
+GACGATATTGCTGCAATAGACCTTGA
+>31000
+TCAGATGAGAGACAAATTAGAAT
+>31256
+TAATTCGGAATGCCTGCTCTACT
+>31417
+TCAATGATCGCTGTGCTCAGTAGGA
+>31506
+TTTGTCTGACGTTAAAAAATATA
+>31567
+TCCACATTAGGAGGATTATTAGACAAC
+>31790
+AAACATAATAATTGATGGCGGAAGA
+>31872
+AAGGTAATCATAGAGCACCACGGTT
+>32157
+TTTCTGTGAATTCACATGCTGATGA
+>32192
+TTTCTCATGTATAAAATGCTCTGATGG
+>32223
+TATCTTGTTATTCTAGTGTCTTTGGTT
+>32338
+TGTGGGACTCGAGCCAAAATGGCAACCT
+>32497
+TGCGGTTGGACAATTTTTTTTTTATA
+>32506
+TTGTTGTTTGGAGGAAGTTCCTTT
+>32510
+TCTTCCGCCATCAATTATTATGTTTT
+>32522
+TACTTGACTTTTCTATAGAATCTGGT
+>32540
+TATCGTCAACATCCTCGAACGATCGAGA
+>32626
+TTGATCAAGGTAGGGTTGTCGC
+>32646
+TCTGTTAAACACCCCTGAATCGTGGAT
+>32657
+TTTGGACATTTTGCAGGTGATACAAT
+>32682
+TAACTGTTTTTAGTTCAAAGTCTCGGA
+>32716
+TATCTATAGTTCCGATTGGCCATCTC
+>32885
+GAAAGTGGGTATCTGTATTTTAGGC
+>32967
+TAAAGATACCATCTAACCTCCTTGGA
+>33075
+ACAGATATTAGTGTTTTTCAAGCAGC
+>33147
+ATCGCAATTATGGCATAACAGATTCGGA
+>33163
+TTCAGCGTCGCTTGATTGAATAGAT
+>33231
+TAAGAACTTCTGAGGTGAAGGGC
+>33242
+TTGGGTTTAGAAATTAAAATTAAGGC
+>33244
+TATAGAAAGTTACGAATATATTAGGA
+>33248
+TTTTTGATCAATTGGCACCGTGCGAA
+>33296
+TGCAGAGCTTCTTCCAACGTTGGCAAG
+>33318
+TAGATGTCTGCAGGAATAACGGA
+>33338
+TCGACTATTAATGGCTGTTAGAATT
+>33475
+CAAACTTATCGACCATCTCCTCAAACG
+>33601
+TTACCCTTTTTCCGGAGCGTTTGTGC
+>33611
+AATGATCGCTGTGCTCAGTATGACGG
+>33662
+TTTGATTCATTACAATTTACGCTGAA
+>33737
+CAGATATTAGTGTTTTTCAAGCAGC
+>33747
+TACATGTAAAGCAGCTGTGTGTGC
+>33815
+AGTTTTTGGAATCACTTGA
+>34032
+TGCGGACGTGTGCTCGCTGCGTGA
+>34200
+TAATATATGTATAATCTGCTTGGTG
+>34240
+TCTTTACGCCATATAAATCATTTCGA
+>34259
+ACAAATCATAAATTTGATGGGACGA
+>34268
+TCTTGTGGACTTCACTCTAG
+>34297
+TAAGTAAATAGTCCCCGCCTTATTGAGG
+>34337
+TGGATAATGAATGTTTTTGATTTGC
+>34569
+TCTAGGTTCTTCTGAAATCGTGGGA
+>34643
+TCGTGTAGACCGGATAAGATTTTTT
+>34697
+TCTAATACTGTGAAAGGGTGGGG
+>34808
+GTAGGATGTGCTCTGCGGTTTCCAC
+>34892
+TCTGTCGCAGTTGTAGCTTGCAATA
+>34896
+TGTAGTTGCCACTTATGCTGTCCA
+>35151
+TAGCAATGTCCGTCTGTCCGTATGA
+>35196
+TGTAAATGGTCAGCGAAAGCAAAGG
+>35211
+CAGATATTAGTGTTTTTCAAGCAGCGG
+>35246
+CCTCAGAGAACGTCAGACCGCG
+>35272
+TGCAGAGCTTCTTCCAACGTTGG
+>35273
+TCTGGTAGTAAGAAAAATGTAGCTT
+>35286
+TATGTATGGATATGTATATTTATGGGT
+>35317
+TTTTGGTTTGATCGTCAGGTGGTC
+>35512
+GACGATAATAGTGAATTTTGGACA
+>35566
+TTCGAATTCGCGCCATTTCACAATC
+>35948
+TGGCCTGTATACGCTTTCTGTTG
+>36009
+TTGCAAAAGTCATATCTTGAGG
+>36059
+TAAGTTACTATGGATCCATAAGGGTA
+>36100
+TGATATGGGACTTGTAGCTTTTTTAAA
+>36121
+TGTCGTGGGCTGTGCGTTTGAGAA
+>36235
+TGCTACCTTTAGCTGCAAGATTAACT
+>36238
+CAGATATTAGTGTTTTTCAAGCAGCGG
+>36523
+TGCGAGTCCGAGCTTTGAACGTGGG
+>36607
+TCTCAATGTAATGTCTTCTTTTTGGA
+>36624
+CGTTCTTTAAAACCACCAATGGGA
+>36776
+TACTCACGACCATTGTCATTTCTCAAG
+>36825
+CGTTCGAGGTCCACTTTCTTAGCGGA
+>36850
+TCTGATGTCGGCGAGAAAGGAGTCTCA
+>36872
+TCAAGGCCAGCACACAGTAACATGGT
+>36968
+ACATTGGAAATACCGCGGGACCGC
+>36994
+TATACGTTTTTTGACCTCTTCTCTTTGA
+>37110
+TGATAATGTAAACAAAGATAAAGGG
+>37210
+TCAGATGAAAGACAAATTAGAATTAAA
+>37224
+GACGATAATAGTGAATTTTGGACA
+>37226
+TCTGAAGTTGCCGCACTAGAGATGG
+>37339
+TATACAGTCCACTATATCGTTGTTTAA
+>37395
+TACAATTTACGCTGAATTTAAATGAA
+>37460
+CACAAAGTAACGTGCACCACCATTT
+>37766
+TCTGTGCAACTTTGTACGCGAAGTCAGC
+>37836
+TCAAGGATTAATGTAGGGGGGGGG
+>38096
+TTTGGAGGTAGAAGTCTTAGTGGCCGC
+>38100
+GACAAAATATGCCCTTCAATTTAGA
+>38113
+TATGGTCCAGAATGTAGCCTCGGC
+>38213
+TAACTGTTTTTAGTTCAAAGTCTCGAA
+>38224
+TTTCAGTTTTTATTGTTAGTCACAGG
+>38263
+TAACTGCGAAGTCGATCAGGTCCGA
+>38266
+ATCCGGACGATTGACGAGGAGCCCATT
+>38271
+TTATGTCAGTGTCGAAGGCGATCGAA
+>38373
+TCTATAGCCTTGGCGTAGGAACTCGCA
+>38384
+GCTACAACTGCGACAGAAAATTCGGA
+>38468
+TATTATCGTCCGCTGCTAAACTGC
+>38491
+TGCATACTTCCGTTCTCTTTTCGGGA
+>38673
+TTGTGTATTGTGATTCTGATTCGTG
+>38745
+TCCCTGGCCCGCTAGACAGCAGGA
+>38784
+TGTGCTAATAATTAAAGTCGACTGA
+>38873
+TGTCATACTTTCGTCAAAA
+>39154
+TAATAGTCAGGGCGCGAATTTTTAAAA
+>39325
+TTAGGACTTATTGAACTTTACGGTA
+>39359
+TATGCAAATCAAGTGTGACCGTAGCT
+>39450
+TTAAACAACGATATAGTGGACAGTA
+>39453
+TCTGCCGGATTTTGATCCAATCAAGG
+>39819
+GTGGAATTGAAAAAGAACCAGACACA
+>40040
+TTTTTGGAACTACCTGAGTCGGTT
+>40183
+TAACACAAAGCAGTATGATTTAATAAT
+>40189
+CAGCAAGCTGAGATGTACATTAGTATA
+>40374
+TACGTTTTCTTGCAGATCAAAAA
+>40445
+CGTTCTTTAAAACCACCAATGGGA
+>40467
+TACGCAGATTCCTGGGAGTTACAGGA
+>40488
+TGATTTGGGCTTGCATACTTGTACT
+>40797
+TATATTCGTGTTCATGTGTGAACAGC
+>40831
+TCTAAGAACTTCTGAGGTGAAGGGC
+>40951
+TTCTTCGTAAGTCAAAATAGTGTCGCC
+>40988
+TATGATTGATTGCTTGAGAGT
+>40998
+TGATAGAGCTGCATTTGAATTAACGG
+>41061
+CTCTTTCCGCTCACTCCCGCTGAGA
+>41076
+TGCATTAAGAAGATTTAGGATCC
+>41140
+TCAAGGATTAATGTAGGGGGGG
+>41143
+CAATAGCGTCGCTGAGTAACAGTG
+>41162
+TACGGATTGCAGCGGCTAG
+>41183
+TTTTTTGGCACACGATTTTTTGGACGT
+>41227
+TAATATATGTATACTCTGCTTGGTG
+>41265
+TTTATGATTTTTGGTAATA
+>41365
+TAGGAGGGTTCCACAACTATTTCGGGG
+>41837
+TAGTTTGACACTGTTTGGAGACGTGG
+>42053
+TCTGTGGTCGAATCGAAGGAGTGC
+>42113
+TAATAGATCGCTCACCTGTTCCTGG
+>42392
+CGGCATCGGAAAACTCCCAGCGGGGC
+>42504
+TAACGTTATTATTATTTGAAAATAGAA
+>42566
+TTTGTTGGGTAGGAACTTTACTGC
+>42668
+TGCTGCAGAGCTTCTTCCAACGTTGG
+>42751
+CCAAAGTCTGGTTGTCAGAAAATGTGC
+>42777
+AAGGAAGGAACCAAAGAAGCACAAACG
+>42804
+TAATTCTAATTTGTCTCTCATCTGA
+>42830
+TAGTATACTTATTAAGTCATTTGA
+>42857
+AGAGTATTCATCTTGAGGCGTGTG
+>42886
+TGAACAACGATTTATGTATATAAGAA
+>42899
+TACGATAATAGTGAATTTTGGACA
+>42936
+TTTGACTAAAAGTCGCTTGTTTTGGA
+>43050
+AATACATAACTCTGGACACAGGAGA
+>43219
+TACTTTCGTCAAAATGTTCAGGAGCT
+>43242
+TGACATGTCTATTTCCATGGGTTCGGA
+>43268
+GTGTATAAAAAAATTTATTGTTGAGCA
+>43299
+TCTGGATGATGGCTGATGCTCGTTG
+>43491
+TCTGATGACAATGAATTTTTTAGACA
+>43520
+TATTGTTGTAATTGCTGCCTCGGTTG
+>43654
+ATATGAACAAAGCAAAGACACTAGAA
+>43677
+CACGATTCATAACCCTCAGCTGAAG
+>43776
+GCTCCTGGCAACTCTGTGATGGAC
+>43903
+TATTTTGATGTTTAATGAA
+>43990
+TTTCTACTTCGTATTATTTTTATGA
+>44062
+AAACATAATAATTGATGGCGGAAGA
+>44071
+CACAGACGCAGTGGAAACCGCAGA
+>44078
+CGTGCGTCCGAGCAAAAGGTGGT
+>44187
+TTATGTAAGAATATTTGTCATTAGA
+>44239
+TATCAATGTTGACCGTAATACTCAA
+>44253
+TGGATATTGAATGTTTTTGATTTGC
+>44279
+TGTGATTTTCCCAATTTATATTAATACA
+>44374
+TAGTCGGAGTTGATGAGCTGCC
+>44412
+TCCGAATTTTCTGTCGCAGTTGTAGCT
+>44423
+TCGGCTTGGGTTTAGAAATTAAAAT
+>44558
+ACAAATCATAAATTTGGTGGGA
+>44637
+TTTTTTATCAATTGGCACCATGCGAA
+>44642
+TACTGTGTGCTGGCCTTGATGAAAGT
+>44688
+TAGATGTCTGCACGAATAACGGA
+>44945
+TGCTTCCGAGCAATCTACGTTGGTAAAA
+>45054
+TCTCATCTGACAATTTTTTAAAAGCGA
+>45088
+TCTGAAGCAGCGCTCACGGCAGAATGC
+>45203
+TGCCGGATTTTGATCCAATCAAGGGA
+>45263
+TATTGATTTTCCTATTTAGTTGAACA
+>45274
+AAAAGTCTGGATATTGTAGGATAGGA
+>45358
+CAACGCTGGACCTTGGACTCGAGGGC
+>45396
+CATCTTGTTATTCTAGTGTCTTTGGTA
+>45641
+GAACTAAGATCAGTCGACTGTAATTAT
+>45774
+CGACGAACTAGCAGCTCTGGTGT
+>45911
+CAGATATTAGTGTTTTTCAAGCAGC
+>45997
+TTAACCAGTCGGCGTTGTTTAAGTAGC
+>46031
+AAAGCGTCTACTTGAACAATGAGA
+>46054
+TTAGATCGTATTACTTGGGTGCTGG
+>46199
+TAAGCGTTAGGTCGTATTACTTGGGC
+>46255
+TAAGATTGAAAATTACTGTGGAGTAAT
+>46366
+TAAGGAATTGTCGGCCATTTAATGTGA
+>46387
+CAGAGCTTCTTCCAACGTTGGCAAG
+>46701
+TTGTATCTTTTTGCTTTTTATATT
+>46705
+TAACTGTTTTTAGTTCAAAGTCTCGGA
+>46783
+TCTCATCTGACAATTTTTTAAAAGCGA
+>46836
+TCATAAGGACAGACGGACAT
+>46892
+AAACATAATAATTGATGGCGGAAGA
+>47244
+TCTCTGTCCGCTCGCTTACGATGAGA
+>47290
+TTTCAGGAATGGGGTCGTCCCACTA
+>47348
+TGCAAAAGTAATATCAAAGACACTA
+>47359
+TTTAAAATTATAATAGTCAGGG
+>47427
+CATCTTGTTATTCTAGTGTCTTTGGT
+>47654
+TTAAACACTGAATTCGGTTTCGAAA
+>47656
+TTTCTGAAGAATCCTGTAACTCCC
+>47785
+TGTAGATGAGCGGCAAATGTGG
+>47827
+TTGATCAAGGTAGAGTTGTCGCGC
+>48184
+TTCAAGGATTAATGTAGGGGGGG
+>48576
+TTAACCCGGAGACTTGGGTGTGGGT
+>48764
+TGCAAAAGTAATATCAAAGACAATAGA
+>48782
+TGACAATGTAGTGAACGCCAGTGT
+>48893
+TATAAATGCCGTCTGATATTATTAAA
+>49017
+ACCGGATGTCATATCCAGCGTCGTGAA
+>49381
+TACAATGTAAATTCGTTTCTTCGATCA
+>49456
+TTTTGGTTTGATCGTCAGGTGGACGC
+>49484
+TCTGCTATCATTGACTCGATCATTGA
+>49569
+TCGATTGTATGATCAGTGGAAGTGGC
+>49602
+GAAATTGGCCAACATTAATTCGGAA
+>49628
+TACTTTCGTCAAAATGTTCAGGAGC
+>49715
+TTTGTCCGGGTGCTTCGAAAGAACTCT
+>49778
+CAATAGCGTCGCTGAGTAACAGTG