# HG changeset patch # User iuc # Date 1539539089 14400 # Node ID 740ce0a18f0d3ed40299939055f946fe3bb34e35 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_merge commit b3426aed6615742d96dfb8f7346a9e0d4e391a99 diff -r 000000000000 -r 740ce0a18f0d macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Sun Oct 14 13:44:49 2018 -0400 @@ -0,0 +1,172 @@ + + + + samtools + + + + 1.9 + #set $flags = sum(map(int, str($filter).split(','))) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ^[^\s'\":]+(:\d+(-\d+){0,1}){0,1}$ + + + + + + + + + + + + + @misc{SAM_def, + title={Definition of SAM/BAM format}, + url = {https://samtools.github.io/hts-specs/},} + + 10.1093/bioinformatics/btp352 + 10.1093/bioinformatics/btr076 + 10.1093/bioinformatics/btr509 + + @misc{Danecek_et_al, + Author={Danecek, P., Schiffels, S., Durbin, R.}, + title={Multiallelic calling model in bcftools (-m)}, + url = {http://samtools.github.io/bcftools/call-m.pdf},} + + + @misc{Durbin_VCQC, + Author={Durbin, R.}, + title={Segregation based metric for variant call QC}, + url = {http://samtools.github.io/bcftools/rd-SegBias.pdf},} + + + @misc{Li_SamMath, + Author={Li, H.}, + title={Mathematical Notes on SAMtools Algorithms}, + url = {http://www.broadinstitute.org/gatk/media/docs/Samtools.pdf},} + + + @misc{SamTools_github, + title={SAMTools GitHub page}, + url = {https://github.com/samtools/samtools},} + + + + + &1 | grep Version]]> + + + + + + + diff -r 000000000000 -r 740ce0a18f0d samtools_merge.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtools_merge.xml Sun Oct 14 13:44:49 2018 -0400 @@ -0,0 +1,129 @@ + + merge multiple sorted alignment files + + macros.xml + + + + + &2 echo "inconsistently sorted input" && + exit 1 && + #end if +#end for + +samtools merge +-@ \$addthreads +-s $seed +## TODO force overwrite seems necessay (but I do not understand why ...) +-f +## Galaxy provides only default compression +## #if $compression == 'levelone' +## -1 +## #else if $compression == 'uncompressed' +## -u +## #end if +#if str($headerbam) != 'None' + -h '$headerbam' +#end if +#if $sortby=='name' + -n +#end if +## TODO since galaxy can't represent this as data type at the moment this option is unsupported +## -t TAG The input alignments have been sorted by the value of TAG, then by either position or name (if -n is given). +#if str($region) != '' + -R '$region' +#end if +## Attach an RG tag to each alignment. The tag value is inferred from file names. +## -r +## TODO -r makes no sense with the link names, is there some data set metadata (tags,...) that could be used? +$idrg +$idpg +$output +#for $i, $bam in enumerate( $bamfiles ): + ${i}.sam +#end for + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** +Merge multiple sorted alignment files, producing a single sorted output file that contains all the input records and maintains the existing sort order. + +If a file to take @headers from is specified the @SQ headers of input files will be merged into the specified header, otherwise they will be merged into a composite header created from the input headers. If in the process of merging @SQ lines for coordinate sorted input files, a conflict arises as to the order (for example input1.bam has @SQ for a,b,c and input2.bam has b,a,c) then the resulting output file will need to be re-sorted back into coordinate order. + +Unless the @PG/@RG headers are made unique when merging @RG and @PG records into the output header then any IDs found to be duplicates of existing IDs in the output header will have a suffix appended to them to differentiate them from similar header records from other files and the read records will be updated to reflect this. + + + diff -r 000000000000 -r 740ce0a18f0d test-data/2.merge.expected-samin.bam Binary file test-data/2.merge.expected-samin.bam has changed diff -r 000000000000 -r 740ce0a18f0d test-data/2.merge.expected.bam Binary file test-data/2.merge.expected.bam has changed diff -r 000000000000 -r 740ce0a18f0d test-data/2.merge.expected.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/2.merge.expected.sam Sun Oct 14 13:44:49 2018 -0400 @@ -0,0 +1,67 @@ +@HD VN:1.4 +@SQ SN:insert LN:599 +@SQ SN:ref1 LN:45 +@SQ SN:ref2 LN:40 +@SQ SN:ref3 LN:4 +@RG ID:fish PG:donkey +@RG ID:cow PU:13_&^&&*(:332 +@RG PU:*9u8jkjjkjd: ID:colt +@RG ID:fish-55424A4 PG:llama +@RG ID:cow-3A2CCEF5 PU:13_&^&&*(:332 PG:donkey-4861F4EF +@RG PU:*9u8jkjjkjd: ID:colt-6ADB4A65 +@RG ID:fish-39E5EF +@RG ID:cow-1802EEEC PU:13_&^&&*(:332 +@RG PU:*9u8jkjjkjd: ID:colt-7EC68B3F +@PG ID:bull PP:donkey +@PG ID:donkey +@PG ID:moose +@PG PP:moose ID:cow +@PG ID:llama +@PG ID:bull-2B019719 PP:donkey-4861F4EF +@PG ID:donkey-4861F4EF +@PG ID:bull-60104A41 PP:donkey-2EE20DF8 +@PG ID:donkey-2EE20DF8 +@CO +@CO Do you know? +@CO Do you know? +@CO Another comment from test_input_1_c +r000 99 insert 50 30 10M = 80 30 ATTTAGCTAC AAAAAAAAAA RG:Z:cow PG:Z:bull +r000 211 insert 80 30 10M = 50 -30 CCCAATCATT AAAAAAAAAA RG:Z:cow PG:Z:bull +r001 163 ref1 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 RG:Z:fish +r005 163 ref1 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 RG:Z:colt-6ADB4A65 PG:Z:donkey-4861F4EF +r008 163 ref1 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 RG:Z:colt-7EC68B3F PG:Z:donkey-2EE20DF8 +r002 0 ref1 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10 +r003 0 ref1 9 30 5H6M * 0 0 AGCTAA * RG:Z:cow +r006 0 ref1 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10 RG:Z:colt-6ADB4A65 PG:Z:donkey-4861F4EF +r007 0 ref1 9 30 5H6M * 0 0 AGCTAA * RG:Z:colt-6ADB4A65 PG:Z:donkey-4861F4EF +r009 0 ref1 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10 RG:Z:colt-7EC68B3F PG:Z:donkey-2EE20DF8 +r010 0 ref1 9 30 5H6M * 0 0 AGCTAA * RG:Z:colt-7EC68B3F PG:Z:donkey-2EE20DF8 +r004 0 ref1 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * RG:Z:colt +r007 0 ref1 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * RG:Z:colt-6ADB4A65 PG:Z:donkey-4861F4EF +r010 0 ref1 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * RG:Z:colt-7EC68B3F PG:Z:donkey-2EE20DF8 +r003 16 ref1 29 30 6H5M * 0 0 TAGGC * RG:Z:cow +r006 16 ref1 29 30 6H5M * 0 0 TAGGC * RG:Z:colt-6ADB4A65 PG:Z:donkey-4861F4EF +r009 16 ref1 29 30 6H5M * 0 0 TAGGC * RG:Z:colt-7EC68B3F PG:Z:donkey-2EE20DF8 +r001 83 ref1 37 30 9M = 7 -39 CAGCGCCAT * RG:Z:fish +r005 83 ref1 37 30 9M = 7 -39 CAGCGCCAT * RG:Z:colt-6ADB4A65 PG:Z:donkey-4861F4EF +r008 83 ref1 37 30 9M = 7 -39 CAGCGCCAT * RG:Z:colt-7EC68B3F PG:Z:donkey-2EE20DF8 +x1 0 ref2 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * RG:Z:colt PG:Z:bull +x7 0 ref2 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * RG:Z:cow-3A2CCEF5 PG:Z:bull-2B019719 +x10 0 ref2 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * RG:Z:cow-1802EEEC PG:Z:bull-60104A41 +x2 0 ref2 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? RG:Z:colt PG:Z:bull +x8 0 ref2 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? RG:Z:cow-3A2CCEF5 PG:Z:bull-2B019719 +x11 0 ref2 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? RG:Z:cow-1802EEEC PG:Z:bull-60104A41 +x3 0 ref2 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? RG:Z:fish PG:Z:bull +x9 0 ref2 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? RG:Z:cow-3A2CCEF5 PG:Z:bull-2B019719 +x12 0 ref2 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? RG:Z:cow-1802EEEC PG:Z:bull-60104A41 +x4 0 ref2 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? RG:Z:fish PG:Z:bull +x10 0 ref2 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? RG:Z:cow-3A2CCEF5 PG:Z:bull-2B019719 +x13 0 ref2 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? RG:Z:cow-1802EEEC PG:Z:bull-60104A41 +x5 0 ref2 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? RG:Z:fish PG:Z:bull +x11 0 ref2 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? RG:Z:cow-3A2CCEF5 PG:Z:bull-2B019719 +x14 0 ref2 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? RG:Z:cow-1802EEEC PG:Z:bull-60104A41 +x6 0 ref2 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ??????????????????????? RG:Z:cow +x12 0 ref2 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ??????????????????????? RG:Z:cow-3A2CCEF5 PG:Z:bull-2B019719 +x15 0 ref2 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ??????????????????????? RG:Z:cow-1802EEEC PG:Z:bull-60104A41 +u1 4 * 0 30 23M * 0 0 TAATTAAGTCTACAGAAAAAAAA ??????????????????????? +u2 4 * 0 30 * * 0 0 TAATTAAGTCTACAGAAAAAAAA ??????????????????????? diff -r 000000000000 -r 740ce0a18f0d test-data/4.merge.expected.bam Binary file test-data/4.merge.expected.bam has changed diff -r 000000000000 -r 740ce0a18f0d test-data/6.merge.expected.bam Binary file test-data/6.merge.expected.bam has changed diff -r 000000000000 -r 740ce0a18f0d test-data/7.merge.expected.bam Binary file test-data/7.merge.expected.bam has changed diff -r 000000000000 -r 740ce0a18f0d test-data/test_input_1_a.bam Binary file test-data/test_input_1_a.bam has changed diff -r 000000000000 -r 740ce0a18f0d test-data/test_input_1_a.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_input_1_a.sam Sun Oct 14 13:44:49 2018 -0400 @@ -0,0 +1,28 @@ +@HD VN:1.4 +@SQ SN:insert LN:599 +@SQ SN:ref1 LN:45 +@SQ SN:ref2 LN:40 +@SQ SN:ref3 LN:4 +@RG ID:fish PG:donkey +@RG ID:cow PU:13_&^&&*(:332 +@RG PU:*9u8jkjjkjd: ID:colt +@PG ID:bull PP:donkey +@PG ID:donkey +@PG ID:moose +@PG PP:moose ID:cow +@CO +r000 99 insert 50 30 10M = 80 30 ATTTAGCTAC AAAAAAAAAA RG:Z:cow PG:Z:bull +r000 211 insert 80 30 10M = 50 -30 CCCAATCATT AAAAAAAAAA RG:Z:cow PG:Z:bull +r001 163 ref1 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 RG:Z:fish PG:Z:donkey +r002 0 ref1 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10 PG:Z:donkey +r003 0 ref1 9 30 5H6M * 0 0 AGCTAA * RG:Z:cow +r004 0 ref1 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * RG:Z:colt PG:Z:donkey +r003 16 ref1 29 30 6H5M * 0 0 TAGGC * RG:Z:cow PG:Z:donkey +r001 83 ref1 37 30 9M = 7 -39 CAGCGCCAT * RG:Z:fish PG:Z:donkey +x1 0 ref2 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * RG:Z:colt PG:Z:bull +x2 0 ref2 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? RG:Z:colt PG:Z:bull +x3 0 ref2 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? RG:Z:fish PG:Z:bull +x4 0 ref2 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? RG:Z:fish PG:Z:bull +x5 0 ref2 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? RG:Z:fish PG:Z:bull +x6 0 ref2 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ??????????????????????? RG:Z:cow +u1 4 * 0 30 23M * 0 0 TAATTAAGTCTACAGAAAAAAAA ??????????????????????? diff -r 000000000000 -r 740ce0a18f0d test-data/test_input_1_a_regex.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_input_1_a_regex.sam Sun Oct 14 13:44:49 2018 -0400 @@ -0,0 +1,28 @@ +@HD VN:1.4 +@SQ SN:insert LN:599 +@SQ SN:ref1|this=that LN:45 +@SQ SN:ref2*HLA:1a:2:b LN:40 +@SQ SN:ref3 LN:4 +@RG ID:fish PG:donkey +@RG ID:cow PU:13_&^&&*(:332 +@RG PU:*9u8jkjjkjd: ID:colt +@PG ID:bull PP:donkey +@PG ID:donkey +@PG ID:moose +@PG PP:moose ID:cow +@CO +r000 99 insert 50 30 10M = 80 30 ATTTAGCTAC AAAAAAAAAA RG:Z:cow PG:Z:bull +r000 211 insert 80 30 10M = 50 -30 CCCAATCATT AAAAAAAAAA RG:Z:cow PG:Z:bull +r001 163 ref1|this=that 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 RG:Z:fish PG:Z:colt +r002 0 ref1|this=that 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10 PG:Z:colt +r003 0 ref1|this=that 9 30 5H6M * 0 0 AGCTAA * RG:Z:cow +r004 0 ref1|this=that 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * RG:Z:colt PG:Z:colt +r003 16 ref1|this=that 29 30 6H5M * 0 0 TAGGC * RG:Z:cow PG:Z:colt +r001 83 ref1|this=that 37 30 9M = 7 -39 CAGCGCCAT * RG:Z:fish PG:Z:colt +x1 0 ref2*HLA:1a:2:b 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * RG:Z:colt PG:Z:bull +x2 0 ref2*HLA:1a:2:b 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? RG:Z:colt PG:Z:bull +x3 0 ref2*HLA:1a:2:b 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? RG:Z:fish PG:Z:bull +x4 0 ref2*HLA:1a:2:b 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? RG:Z:fish PG:Z:bull +x5 0 ref2*HLA:1a:2:b 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? RG:Z:fish PG:Z:bull +x6 0 ref2*HLA:1a:2:b 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ??????????????????????? RG:Z:cow +u1 4 * 0 30 23M * 0 0 TAATTAAGTCTACAGAAAAAAAA ??????????????????????? diff -r 000000000000 -r 740ce0a18f0d test-data/test_input_1_b.bam Binary file test-data/test_input_1_b.bam has changed diff -r 000000000000 -r 740ce0a18f0d test-data/test_input_1_b.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_input_1_b.sam Sun Oct 14 13:44:49 2018 -0400 @@ -0,0 +1,24 @@ +@HD VN:1.4 +@SQ SN:insert LN:599 +@SQ SN:ref1 LN:45 +@SQ SN:ref2 LN:40 +@SQ SN:ref3 LN:4 +@PG ID:llama +@RG ID:fish PG:llama +@RG ID:cow PU:13_&^&&*(:332 PG:donkey +@RG PU:*9u8jkjjkjd: ID:colt +@PG ID:bull PP:donkey +@PG ID:donkey +@CO Do you know? +r005 163 ref1 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 RG:Z:colt PG:Z:donkey +r006 0 ref1 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10 RG:Z:colt PG:Z:donkey +r007 0 ref1 9 30 5H6M * 0 0 AGCTAA * RG:Z:colt PG:Z:donkey +r007 0 ref1 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * RG:Z:colt PG:Z:donkey +r006 16 ref1 29 30 6H5M * 0 0 TAGGC * RG:Z:colt PG:Z:donkey +r005 83 ref1 37 30 9M = 7 -39 CAGCGCCAT * RG:Z:colt PG:Z:donkey +x7 0 ref2 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * RG:Z:cow PG:Z:bull +x8 0 ref2 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? RG:Z:cow PG:Z:bull +x9 0 ref2 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? RG:Z:cow PG:Z:bull +x10 0 ref2 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? RG:Z:cow PG:Z:bull +x11 0 ref2 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? RG:Z:cow PG:Z:bull +x12 0 ref2 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ??????????????????????? RG:Z:cow PG:Z:bull diff -r 000000000000 -r 740ce0a18f0d test-data/test_input_1_b_regex.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_input_1_b_regex.sam Sun Oct 14 13:44:49 2018 -0400 @@ -0,0 +1,24 @@ +@HD VN:1.4 +@SQ SN:insert LN:599 +@SQ SN:ref2*HLA:1a:2:b LN:40 +@SQ SN:ref3 LN:4 +@SQ SN:ref1 LN:45 +@PG ID:llama_{a} +@RG ID:fish-[1] PG:llama_{a} +@RG ID:cow-[2] PU:13_&^&&*(:332 PG:donkey +@RG PU:*9u8jkjjkjd: ID:colt +@PG ID:bull PP:donkey +@PG ID:donkey +@CO Do you know? +x7 0 ref2*HLA:1a:2:b 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * RG:Z:cow-[2] PG:Z:bull +x8 0 ref2*HLA:1a:2:b 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? RG:Z:cow-[2] PG:Z:bull +x9 0 ref2*HLA:1a:2:b 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? RG:Z:cow-[2] PG:Z:bull +x10 0 ref2*HLA:1a:2:b 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? RG:Z:cow-[2] PG:Z:bull +x11 0 ref2*HLA:1a:2:b 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? RG:Z:cow-[2] PG:Z:bull +x12 0 ref2*HLA:1a:2:b 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ??????????????????????? RG:Z:cow-[2] PG:Z:bull +r005 163 ref1 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 RG:Z:colt PG:Z:donkey +r006 0 ref1 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10 RG:Z:colt PG:Z:donkey +r007 0 ref1 9 30 5H6M * 0 0 AGCTAA * RG:Z:colt PG:Z:donkey +r007 0 ref1 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * RG:Z:colt PG:Z:donkey +r006 16 ref1 29 30 6H5M * 0 0 TAGGC * RG:Z:colt PG:Z:donkey +r005 83 ref1 37 30 9M = 7 -39 CAGCGCCAT * RG:Z:colt PG:Z:donkey diff -r 000000000000 -r 740ce0a18f0d test-data/test_input_1_c.bam Binary file test-data/test_input_1_c.bam has changed diff -r 000000000000 -r 740ce0a18f0d test-data/test_input_1_c.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_input_1_c.sam Sun Oct 14 13:44:49 2018 -0400 @@ -0,0 +1,23 @@ +@HD VN:1.4 +@SQ SN:ref1 LN:45 +@SQ SN:ref2 LN:40 +@RG ID:fish +@RG ID:cow PU:13_&^&&*(:332 +@RG PU:*9u8jkjjkjd: ID:colt +@PG ID:bull PP:donkey +@PG ID:donkey +@CO Do you know? +@CO Another comment from test_input_1_c +r008 163 ref1 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 RG:Z:colt PG:Z:donkey +r009 0 ref1 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10 RG:Z:colt PG:Z:donkey +r010 0 ref1 9 30 5H6M * 0 0 AGCTAA * RG:Z:colt PG:Z:donkey +r010 0 ref1 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * RG:Z:colt PG:Z:donkey +r009 16 ref1 29 30 6H5M * 0 0 TAGGC * RG:Z:colt PG:Z:donkey +r008 83 ref1 37 30 9M = 7 -39 CAGCGCCAT * RG:Z:colt PG:Z:donkey +x10 0 ref2 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * RG:Z:cow PG:Z:bull +x11 0 ref2 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? RG:Z:cow PG:Z:bull +x12 0 ref2 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? RG:Z:cow PG:Z:bull +x13 0 ref2 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? RG:Z:cow PG:Z:bull +x14 0 ref2 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? RG:Z:cow PG:Z:bull +x15 0 ref2 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ??????????????????????? RG:Z:cow PG:Z:bull +u2 4 * 0 30 * * 0 0 TAATTAAGTCTACAGAAAAAAAA ???????????????????????