Mercurial > repos > iuc > samtools_fixmate
changeset 0:bc0cc7bfbfe9 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_fixmate commit 0fe00966500158720fb63dc9b28f6a4ca0d1e1eb
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Sep 28 04:28:45 2018 -0400 @@ -0,0 +1,172 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">samtools</requirement> + <yield/> + </requirements> + </xml> + <token name="@TOOL_VERSION@">1.9</token> + <token name="@FLAGS@">#set $flags = sum(map(int, str($filter).split(',')))</token> + <token name="@PREPARE_IDX@"><![CDATA[ + ##prepare input and indices + ln -s '$input' infile && + #if $input.is_of_type('bam'): + #if str( $input.metadata.bam_index ) != "None": + ln -s '${input.metadata.bam_index}' infile.bai && + #else: + samtools index infile infile.bai && + #end if + #elif $input.is_of_type('cram'): + #if str( $input.metadata.cram_index ) != "None": + ln -s '${input.metadata.cram_index}' infile.crai && + #else: + samtools index infile infile.crai && + #end if + #end if + ]]></token> + <token name="@PREPARE_IDX_MULTIPLE@"><![CDATA[ + ##prepare input and indices + #for $i, $bam in enumerate( $input_bams ): + ln -s '$bam' '${i}' && + #if $bam.is_of_type('bam'): + #if str( $bam.metadata.bam_index ) != "None": + ln -s '${bam.metadata.bam_index}' '${i}.bai' && + #else: + samtools index '${i}' '${i}.bai' && + #end if + #elif $bam.is_of_type('cram'): + #if str( $bam.metadata.cram_index ) != "None": + ln -s '${bam.metadata.cram_index}' '${i}.crai' && + #else: + samtools index '${i}' '${i}.crai' && + #end if + #end if + #end for + ]]></token> + <token name="@PREPARE_FASTA_IDX@"><![CDATA[ + ##checks for reference data ($addref_cond.addref_select=="history" or =="cached") + ##and sets the -t/-T parameters accordingly: + ##- in case of history a symbolic link is used because samtools (view) will generate + ## the index which might not be possible in the directory containing the fasta file + ##- in case of cached the absolute path is used which allows to read the cram file + ## without specifying the reference + #if $addref_cond.addref_select == "history": + ln -s '${addref_cond.ref}' reference.fa && + samtools faidx reference.fa && + #set reffa="reference.fa" + #set reffai="reference.fa.fai" + #elif $addref_cond.addref_select == "cached": + #set reffa=str($addref_cond.ref.fields.path) + #set reffai=str($addref_cond.ref.fields.path)+".fai" + #else + #set reffa=None + #set reffai=None + #end if + ]]></token> + <token name="@ADDTHREADS@"><![CDATA[ + ##compute the number of ADDITIONAL threads to be used by samtools (-@) + addthreads=\${GALAXY_SLOTS:-1} && (( addthreads-- )) && + ]]></token> + <token name="@ADDMEMORY@"><![CDATA[ + ##compute the number of memory available to samtools sort (-m) + ##use only 75% of available: https://github.com/samtools/samtools/issues/831 + addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} && + ((addmemory=addmemory*75/100)) && + ]]></token> + <xml name="seed_input"> + <param name="seed" type="integer" optional="True" label="Seed for random number generator" help="If empty a random seed is used." /> + </xml> + <xml name="flag_options"> + <option value="1">read is paired</option> + <option value="2">read is mapped in a proper pair</option> + <option value="4">read is unmapped</option> + <option value="8">mate is unmapped</option> + <option value="16">read reverse strand</option> + <option value="32">mate reverse strand</option> + <option value="64">read is the first in a pair</option> + <option value="128">read is the second in a pair</option> + <option value="256">alignment or read is not primary</option> + <option value="512">read fails platform/vendor quality checks</option> + <option value="1024">read is a PCR or optical duplicate</option> + <option value="2048">supplementary alignment</option> + </xml> + + <!-- region specification macros and tokens for tools that allow the specification + of region by bed file / space separated list of regions --> + <token name="@REGIONS_FILE@"><![CDATA[ + #if $cond_region.select_region == 'tab': + -t '$cond_region.targetregions' + #end if + ]]></token> + <token name="@REGIONS_MANUAL@"><![CDATA[ + #if $cond_region.select_region == 'text': + #for $i, $x in enumerate($cond_region.regions_repeat): + '${x.region}' + #end for + #end if + ]]></token> + <xml name="regions_macro"> + <conditional name="cond_region"> + <param name="select_region" type="select" label="Filter by regions" help="restricts output to only those alignments which overlap the specified region(s)"> + <option value="no" selected="True">No</option> + <option value="text">Manualy specify regions</option> + <option value="tab">Regions from tabular file</option> + </param> + <when value="no"/> + <when value="text"> + <repeat name="regions_repeat" min="1" default="1" title="Regions"> + <param name="region" type="text" label="region" help="format chr:from-to"> + <validator type="regex" message="Required format: CHR[:FROM[-TO]]; where CHR: string containing any character except quotes, whitespace and colon; FROM and TO: any integer">^[^\s'\":]+(:\d+(-\d+){0,1}){0,1}$</validator> + </param> + </repeat> + </when> + <when value="tab"> + <param name="targetregions" argument="-t/--target-regions" type="data" format="tabular" label="Target regions file" help="Do stats in these regions only. Tab-delimited file chr,from,to (1-based, inclusive)" /> + </when> + </conditional> + </xml> + + <xml name="citations"> + <citations> + <citation type="bibtex"> + @misc{SAM_def, + title={Definition of SAM/BAM format}, + url = {https://samtools.github.io/hts-specs/},} + </citation> + <citation type="doi">10.1093/bioinformatics/btp352</citation> + <citation type="doi">10.1093/bioinformatics/btr076</citation> + <citation type="doi">10.1093/bioinformatics/btr509</citation> + <citation type="bibtex"> + @misc{Danecek_et_al, + Author={Danecek, P., Schiffels, S., Durbin, R.}, + title={Multiallelic calling model in bcftools (-m)}, + url = {http://samtools.github.io/bcftools/call-m.pdf},} + </citation> + <citation type="bibtex"> + @misc{Durbin_VCQC, + Author={Durbin, R.}, + title={Segregation based metric for variant call QC}, + url = {http://samtools.github.io/bcftools/rd-SegBias.pdf},} + </citation> + <citation type="bibtex"> + @misc{Li_SamMath, + Author={Li, H.}, + title={Mathematical Notes on SAMtools Algorithms}, + url = {http://www.broadinstitute.org/gatk/media/docs/Samtools.pdf},} + </citation> + <citation type="bibtex"> + @misc{SamTools_github, + title={SAMTools GitHub page}, + url = {https://github.com/samtools/samtools},} + </citation> + </citations> + </xml> + <xml name="version_command"> + <version_command><![CDATA[samtools 2>&1 | grep Version]]></version_command> + </xml> + <xml name="stdio"> + <stdio> + <exit_code range="1:" level="fatal" description="Error" /> + </stdio> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtools_fixmate.xml Fri Sep 28 04:28:45 2018 -0400 @@ -0,0 +1,86 @@ +<tool id="samtools_fixmate" name="Samtools fixmate" version="@TOOL_VERSION@"> + <description>fill mate coordinates, ISIZE and mate related flags</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="stdio"/> + <expand macro="version_command"/> + <command><![CDATA[ + @ADDTHREADS@ + ## name sort input + #if not $bamfile.is_of_type('qname_sorted.bam', 'qnamed_input_sorted.bam'): + samtools sort + -@ \$addthreads -m \${GALAXY_MEMORY_MB:-768}M -T sorttemp + -n + -O BAM + -o namesorted.bam + '$bamfile' && + #else: + ln -s '$bamfile' namesorted.bam && + #end if + + samtools fixmate + -@ \$addthreads + $remsec + $noprop + $tempcigar + $addms + -O BAM + namesorted.bam + '$output' + ]]></command> + <inputs> + <param name="bamfile" type="data" format="sam,bam,cram" optional="false" label="Select alignment" help="Set of aligned reads." /> + <param name="remsec" type="boolean" argument="-r" truevalue="-r" falsevalue="" checked="false" label="Remove secondary and unmapped reads" /> + <param name="noprop" type="boolean" argument="-p" truevalue="-p" falsevalue="" checked="false" label="Disable FR proper pair check" /> + <param name="tempcigar" type="boolean" argument="-c" truevalue="-c" falsevalue="" checked="false" label="Add template cigar ct tag" /> + <param name="addms" type="boolean" argument="-m" truevalue="-m" falsevalue="" checked="false" label="Add ms (mate score) tags" help="These are used by markdup to select the best reads to keep." /> + </inputs> + <outputs> + <!--<data name="output" format="sam" />--> + <data name="output" format="qname_sorted.bam"/> + </outputs> + <tests> +<!-- from https://github.com/samtools/samtools/blob/6d79411685d8f0fbb34e123f52d72b63271f4dcb/test/test.pl#L2493--> +<!-- test_cmd($opts,out=>'fixmate/1_coord_sort.sam.expected', err=>'fixmate/1_coord_sort.sam.expected.err', cmd=>"$$opts{bin}/samtools fixmate${threads} -O sam $$opts{path}/fixmate/1_coord_sort.sam -", expect_fail=>1);--> +<test> + <param name="bamfile" value="2_isize_overflow.sam" /> + <output name="output" file="2_isize_overflow.bam.expected" /> +</test> +<!-- test_cmd($opts,out=>'fixmate/2_isize_overflow.bam.expected', cmd=>"$$opts{bin}/samtools fixmate${threads} -O sam $$opts{path}/fixmate/2_isize_overflow.sam -");--> +<test> + <param name="bamfile" value="3_reverse_read_pp_lt.sam" /> + <output name="output" file="3_reverse_read_pp_lt.bam.expected" /> +</test> +<!-- test_cmd($opts,out=>'fixmate/3_reverse_read_pp_lt.bam.expected', cmd=>"$$opts{bin}/samtools fixmate${threads} -O sam $$opts{path}/fixmate/3_reverse_read_pp_lt.sam -");--> +<test> + <param name="bamfile" value="4_reverse_read_pp_equal.sam" /> + <output name="output" file="4_reverse_read_pp_equal.bam.expected" /> +</test> +<!-- test_cmd($opts,out=>'fixmate/4_reverse_read_pp_equal.bam.expected', cmd=>"$$opts{bin}/samtools fixmate${threads} -O sam $$opts{path}/fixmate/4_reverse_read_pp_equal.sam -");--> +<test> + <param name="bamfile" value="5_ct.sam" /> + <param name="tempcigar" value="-c" /> + <output name="output" file="5_ct.bam.expected" /> +</test> +<!-- test_cmd($opts,out=>'fixmate/5_ct.bam.expected', cmd=>"$$opts{bin}/samtools fixmate${threads} -cO sam $$opts{path}/fixmate/5_ct.sam -");--> +<test> + <param name="bamfile" value="6_ct_replace.sam" /> + <param name="tempcigar" value="-c" /> + <output name="output" file="6_ct_replace.bam.expected" /> +</test> +<!-- test_cmd($opts,out=>'fixmate/6_ct_replace.bam.expected', cmd=>"$$opts{bin}/samtools fixmate${threads} -cO sam $$opts{path}/fixmate/6_ct_replace.sam -");--> +<test> + <param name="bamfile" value="7_two_read_mapped.sam" /> + <output name="output" file="7_two_read_mapped.bam.expected" /> +</test> +<!--test_cmd($opts,out=>'fixmate/7_two_read_mapped.bam.expected', cmd=>"$$opts{bin}/samtools fixmate${threads} -O sam $$opts{path}/fixmate/7_two_read_mapped.sam -");--> + </tests> + <help> +**What it does** + +Fill in mate coordinates, ISIZE and mate related flags from a name-sorted alignment. + </help> + <expand macro="citations"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/2_isize_overflow.sam Fri Sep 28 04:28:45 2018 -0400 @@ -0,0 +1,20 @@ +@HD VN:1.4 SO:queryname +@SQ SN:ref1 LN:45 +@SQ SN:ref2 LN:40 +of1 99 ref1 10 30 23M = 8 2 AAGTCGGCAGCGTCAGATGTGTA ??????????????????????? +of1 147 ref1 8 30 23M = 10 -2 CTGTCTCTTATACACATCTCCTT ??????????????????????? +r001 83 ref1 37 30 9M = 7 -39 CAGCGCCAT * +r001 163 ref1 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 +r002 0 ref1 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10 +r003 0 ref1 9 30 5H6M * 0 0 AGCTAA * +r003 16 ref1 29 30 6H5M * 0 0 TAGGC * +r004 0 ref1 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * +r007 8 ref1 9 30 5H6M * 0 0 AGCTAA * +r007 4 ref1 9 30 * * 0 0 GGGGGG * +u1 4 * 0 30 * * 0 0 TAATTGGGTCTTCAGAGCACCTA ??????????????????????? +x1 0 ref2 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * +x2 0 ref2 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? +x3 0 ref2 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? +x4 0 ref2 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? +x5 0 ref2 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? +x6 0 ref2 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ???????????????????????
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/2_isize_overflow.sam.expected Fri Sep 28 04:28:45 2018 -0400 @@ -0,0 +1,20 @@ +@HD VN:1.4 SO:queryname +@SQ SN:ref1 LN:45 +@SQ SN:ref2 LN:40 +of1 99 ref1 10 30 23M = 8 21 AAGTCGGCAGCGTCAGATGTGTA ??????????????????????? MQ:i:30 MC:Z:23M +of1 147 ref1 8 30 23M = 10 -21 CTGTCTCTTATACACATCTCCTT ??????????????????????? MQ:i:30 MC:Z:23M +r001 83 ref1 37 30 9M = 7 -39 CAGCGCCAT * MQ:i:30 MC:Z:8M4I4M1D3M +r001 163 ref1 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 MQ:i:30 MC:Z:9M +r002 0 ref1 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10 +r003 33 ref1 9 30 5H6M = 29 25 AGCTAA * MQ:i:30 MC:Z:6H5M +r003 17 ref1 29 30 6H5M = 9 -25 TAGGC * MQ:i:30 MC:Z:5H6M +r004 0 ref1 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * +r007 9 ref1 9 30 5H6M = 9 0 AGCTAA * MC:Z:* +r007 5 ref1 9 30 * = 9 0 GGGGGG * MQ:i:30 MC:Z:5H6M +u1 4 * 0 30 * * 0 0 TAATTGGGTCTTCAGAGCACCTA ??????????????????????? +x1 0 ref2 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * +x2 0 ref2 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? +x3 0 ref2 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? +x4 0 ref2 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? +x5 0 ref2 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? +x6 0 ref2 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ???????????????????????
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/3_reverse_read_pp_lt.sam Fri Sep 28 04:28:45 2018 -0400 @@ -0,0 +1,5 @@ +@HD VN:1.4 SO:queryname +@SQ SN:1 LN:4569345 +@RG ID:1#6 LB:1 SM:a +MS0_12500:1:2114:20577:3664#6 99 1 40346 23 75M = 40340 75 CTCATGGACACCAACCACTCAATTATCTATCCACCTAGCCATGGCCATCACCTTATGAGCGGGCGCAGTGACTAT CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGD X0:i:1 X1:i:1 XA:Z:X,+8796,75M,2; MD:Z:22C52 RG:Z:1#6 XG:i:0 AM:i:23 NM:i:1 SM:i:23 XM:i:1 XO:i:0 MQ:i:23 XT:A:U +MS0_12500:1:2114:20577:3664#6 147 1 40340 23 75M = 40346 -75 CAAAATCTCATGGACACCAACCACTCAATTATCTATCCACCTAGCCATGGCCATCACCTTATGAGCGGGCGCAGT GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFCCF@GGGGGGGGGCCCCC X0:i:1 X1:i:1 XA:Z:X,-8796,75M,2; MD:Z:22C52 RG:Z:1#6 XG:i:0 AM:i:23 NM:i:1 SM:i:23 XM:i:1 XO:i:0 MQ:i:23 XT:A:U
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/3_reverse_read_pp_lt.sam.expected Fri Sep 28 04:28:45 2018 -0400 @@ -0,0 +1,5 @@ +@HD VN:1.4 SO:queryname +@SQ SN:1 LN:4569345 +@RG ID:1#6 LB:1 SM:a +MS0_12500:1:2114:20577:3664#6 99 1 40346 23 75M = 40340 69 CTCATGGACACCAACCACTCAATTATCTATCCACCTAGCCATGGCCATCACCTTATGAGCGGGCGCAGTGACTAT CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGD X0:i:1 X1:i:1 XA:Z:X,+8796,75M,2; MD:Z:22C52 RG:Z:1#6 XG:i:0 AM:i:23 NM:i:1 SM:i:23 XM:i:1 XO:i:0 XT:A:U MQ:i:23 MC:Z:75M +MS0_12500:1:2114:20577:3664#6 147 1 40340 23 75M = 40346 -69 CAAAATCTCATGGACACCAACCACTCAATTATCTATCCACCTAGCCATGGCCATCACCTTATGAGCGGGCGCAGT GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFCCF@GGGGGGGGGCCCCC X0:i:1 X1:i:1 XA:Z:X,-8796,75M,2; MD:Z:22C52 RG:Z:1#6 XG:i:0 AM:i:23 NM:i:1 SM:i:23 XM:i:1 XO:i:0 XT:A:U MQ:i:23 MC:Z:75M
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/4_reverse_read_pp_equal.sam Fri Sep 28 04:28:45 2018 -0400 @@ -0,0 +1,5 @@ +@HD VN:1.4 SO:queryname +@SQ SN:1 LN:4569345 +@RG ID:1#6 LB:1 SM:a +MS0_12500:1:2114:20577:3664#6 99 1 40346 23 75M = 40346 75 CTCATGGACACCAACCACTCAATTATCTATCCACCTAGCCATGGCCATCACCTTATGAGCGGGCGCAGTGACTAT CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGD X0:i:1 X1:i:1 XA:Z:X,+8796,75M,2; MD:Z:22C52 RG:Z:1#6 XG:i:0 AM:i:23 NM:i:1 SM:i:23 XM:i:1 XO:i:0 MQ:i:23 XT:A:U +MS0_12500:1:2114:20577:3664#6 147 1 40346 23 75M = 40346 -75 CTCATGGACACCAACCACTCAATTATCTATCCACCTAGCCATGGCCATCACCTTATGAGCGGGCGCAGTGACTAT GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFCCF@GGGGGGGGGCCCCC X0:i:1 X1:i:1 XA:Z:X,-8796,75M,2; MD:Z:22C52 RG:Z:1#6 XG:i:0 AM:i:23 NM:i:1 SM:i:23 XM:i:1 XO:i:0 MQ:i:23 XT:A:U
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/4_reverse_read_pp_equal.sam.expected Fri Sep 28 04:28:45 2018 -0400 @@ -0,0 +1,5 @@ +@HD VN:1.4 SO:queryname +@SQ SN:1 LN:4569345 +@RG ID:1#6 LB:1 SM:a +MS0_12500:1:2114:20577:3664#6 99 1 40346 23 75M = 40346 75 CTCATGGACACCAACCACTCAATTATCTATCCACCTAGCCATGGCCATCACCTTATGAGCGGGCGCAGTGACTAT CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGD X0:i:1 X1:i:1 XA:Z:X,+8796,75M,2; MD:Z:22C52 RG:Z:1#6 XG:i:0 AM:i:23 NM:i:1 SM:i:23 XM:i:1 XO:i:0 XT:A:U MQ:i:23 MC:Z:75M +MS0_12500:1:2114:20577:3664#6 147 1 40346 23 75M = 40346 -75 CTCATGGACACCAACCACTCAATTATCTATCCACCTAGCCATGGCCATCACCTTATGAGCGGGCGCAGTGACTAT GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFCCF@GGGGGGGGGCCCCC X0:i:1 X1:i:1 XA:Z:X,-8796,75M,2; MD:Z:22C52 RG:Z:1#6 XG:i:0 AM:i:23 NM:i:1 SM:i:23 XM:i:1 XO:i:0 XT:A:U MQ:i:23 MC:Z:75M
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/5_ct.sam Fri Sep 28 04:28:45 2018 -0400 @@ -0,0 +1,5 @@ +@HD VN:1.4 SO:queryname +@SQ SN:1 LN:4569345 +@RG ID:1#6 LB:1 SM:a +MS0_12500:1:2114:20577:3664#6 99 1 40346 23 75M = 40346 75 CTCATGGACACCAACCACTCAATTATCTATCCACCTAGCCATGGCCATCACCTTATGAGCGGGCGCAGTGACTAT CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGD X0:i:1 X1:i:1 XA:Z:X,+8796,75M,2; MD:Z:22C52 RG:Z:1#6 XG:i:0 AM:i:23 NM:i:1 SM:i:23 XM:i:1 XO:i:0 MQ:i:23 XT:A:U +MS0_12500:1:2114:20577:3664#6 147 1 40346 23 75M = 40346 -75 CTCATGGACACCAACCACTCAATTATCTATCCACCTAGCCATGGCCATCACCTTATGAGCGGGCGCAGTGACTAT GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFCCF@GGGGGGGGGCCCCC X0:i:1 X1:i:1 XA:Z:X,-8796,75M,2; MD:Z:22C52 RG:Z:1#6 XG:i:0 AM:i:23 NM:i:1 SM:i:23 XM:i:1 XO:i:0 MQ:i:23 XT:A:U
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/5_ct.sam.expected Fri Sep 28 04:28:45 2018 -0400 @@ -0,0 +1,5 @@ +@HD VN:1.4 SO:queryname +@SQ SN:1 LN:4569345 +@RG ID:1#6 LB:1 SM:a +MS0_12500:1:2114:20577:3664#6 99 1 40346 23 75M = 40346 75 CTCATGGACACCAACCACTCAATTATCTATCCACCTAGCCATGGCCATCACCTTATGAGCGGGCGCAGTGACTAT CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGD X0:i:1 X1:i:1 XA:Z:X,+8796,75M,2; MD:Z:22C52 RG:Z:1#6 XG:i:0 AM:i:23 NM:i:1 SM:i:23 XM:i:1 XO:i:0 XT:A:U MQ:i:23 MC:Z:75M ct:Z:1F75M-75T2R75M +MS0_12500:1:2114:20577:3664#6 147 1 40346 23 75M = 40346 -75 CTCATGGACACCAACCACTCAATTATCTATCCACCTAGCCATGGCCATCACCTTATGAGCGGGCGCAGTGACTAT GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFCCF@GGGGGGGGGCCCCC X0:i:1 X1:i:1 XA:Z:X,-8796,75M,2; MD:Z:22C52 RG:Z:1#6 XG:i:0 AM:i:23 NM:i:1 SM:i:23 XM:i:1 XO:i:0 XT:A:U MQ:i:23 MC:Z:75M
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/6_ct_replace.sam Fri Sep 28 04:28:45 2018 -0400 @@ -0,0 +1,5 @@ +@HD VN:1.4 SO:queryname +@SQ SN:1 LN:4569345 +@RG ID:1#6 LB:1 SM:a +MS0_12500:1:2114:20577:3664#6 99 1 40346 23 75M = 40346 75 CTCATGGACACCAACCACTCAATTATCTATCCACCTAGCCATGGCCATCACCTTATGAGCGGGCGCAGTGACTAT CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGD X0:i:1 X1:i:1 XA:Z:X,+8796,75M,2; MD:Z:22C52 RG:Z:1#6 XG:i:0 AM:i:23 NM:i:1 SM:i:23 XM:i:1 XO:i:0 XT:A:U MQ:i:23 +MS0_12500:1:2114:20577:3664#6 147 1 40346 23 75M = 40346 -75 CTCATGGACACCAACCACTCAATTATCTATCCACCTAGCCATGGCCATCACCTTATGAGCGGGCGCAGTGACTAT GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFCCF@GGGGGGGGGCCCCC X0:i:1 X1:i:1 XA:Z:X,-8796,75M,2; MD:Z:22C52 RG:Z:1#6 XG:i:0 AM:i:23 NM:i:1 SM:i:23 XM:i:1 XO:i:0 XT:A:U MQ:i:23 ct:Z:1F70M-75T2R70M
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/6_ct_replace.sam.expected Fri Sep 28 04:28:45 2018 -0400 @@ -0,0 +1,5 @@ +@HD VN:1.4 SO:queryname +@SQ SN:1 LN:4569345 +@RG ID:1#6 LB:1 SM:a +MS0_12500:1:2114:20577:3664#6 99 1 40346 23 75M = 40346 75 CTCATGGACACCAACCACTCAATTATCTATCCACCTAGCCATGGCCATCACCTTATGAGCGGGCGCAGTGACTAT CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGD X0:i:1 X1:i:1 XA:Z:X,+8796,75M,2; MD:Z:22C52 RG:Z:1#6 XG:i:0 AM:i:23 NM:i:1 SM:i:23 XM:i:1 XO:i:0 XT:A:U MQ:i:23 MC:Z:75M ct:Z:1F75M-75T2R75M +MS0_12500:1:2114:20577:3664#6 147 1 40346 23 75M = 40346 -75 CTCATGGACACCAACCACTCAATTATCTATCCACCTAGCCATGGCCATCACCTTATGAGCGGGCGCAGTGACTAT GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFCCF@GGGGGGGGGCCCCC X0:i:1 X1:i:1 XA:Z:X,-8796,75M,2; MD:Z:22C52 RG:Z:1#6 XG:i:0 AM:i:23 NM:i:1 SM:i:23 XM:i:1 XO:i:0 XT:A:U MQ:i:23 MC:Z:75M
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/7_two_read_mapped.sam Fri Sep 28 04:28:45 2018 -0400 @@ -0,0 +1,22 @@ +@HD VN:1.4 SO:queryname +@SQ SN:ref1 LN:45 +@SQ SN:ref2 LN:40 +of1 99 ref1 10 30 23M = 8 2 AAGTCGGCAGCGTCAGATGTGTA ??????????????????????? +of1 147 ref1 8 30 23M = 10 -2 CTGTCTCTTATACACATCTCCTT ??????????????????????? +r001 83 ref1 37 30 9M = 7 -39 CAGCGCCAT * +r001 163 ref1 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 +r002 0 ref1 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10 +r003 0 ref1 9 30 5H6M * 0 0 AGCTAA * +r003 16 ref1 29 30 6H5M * 0 0 TAGGC * +r004 0 ref1 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * +r007 8 ref1 9 30 5H6M * 0 0 AGCTAA * +r007 4 ref1 9 30 * * 0 0 GGGGGG * +r008 12 ref1 9 30 5H6M * 0 0 AGCTAA * +r008 4 ref1 9 30 * * 0 0 GGGGGG * +uu1 4 * 0 30 * * 0 0 TAATTGGGTCTTCAGAGCACCTA ??????????????????????? +x1 0 ref2 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * +x2 0 ref2 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? +x3 0 ref2 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? +x4 0 ref2 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? +x5 0 ref2 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? +x6 0 ref2 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ???????????????????????
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/7_two_read_mapped.sam.expected Fri Sep 28 04:28:45 2018 -0400 @@ -0,0 +1,22 @@ +@HD VN:1.4 SO:queryname +@SQ SN:ref1 LN:45 +@SQ SN:ref2 LN:40 +of1 99 ref1 10 30 23M = 8 21 AAGTCGGCAGCGTCAGATGTGTA ??????????????????????? MQ:i:30 MC:Z:23M +of1 147 ref1 8 30 23M = 10 -21 CTGTCTCTTATACACATCTCCTT ??????????????????????? MQ:i:30 MC:Z:23M +r001 83 ref1 37 30 9M = 7 -39 CAGCGCCAT * MQ:i:30 MC:Z:8M4I4M1D3M +r001 163 ref1 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 YY:i:100 MQ:i:30 MC:Z:9M +r002 0 ref1 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * XA:Z:abc XB:i:-10 +r003 33 ref1 9 30 5H6M = 29 25 AGCTAA * MQ:i:30 MC:Z:6H5M +r003 17 ref1 29 30 6H5M = 9 -25 TAGGC * MQ:i:30 MC:Z:5H6M +r004 0 ref1 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * +r007 9 ref1 9 30 5H6M = 9 0 AGCTAA * MC:Z:* +r007 5 ref1 9 30 * = 9 0 GGGGGG * MQ:i:30 MC:Z:5H6M +r008 13 ref1 9 30 5H6M = 9 0 AGCTAA * +r008 13 ref1 9 30 * = 9 0 GGGGGG * +uu1 4 * 0 30 * * 0 0 TAATTGGGTCTTCAGAGCACCTA ??????????????????????? +x1 0 ref2 1 30 20M * 0 0 AGGTTTTATAAAACAAATAA * +x2 0 ref2 2 30 21M * 0 0 GGTTTTATAAAACAAATAATT ????????????????????? +x3 0 ref2 6 30 9M4I13M * 0 0 TTATAAAACAAATAATTAAGTCTACA ?????????????????????????? +x4 0 ref2 10 30 25M * 0 0 CAAATAATTAAGTCTACAGAGCAAC ????????????????????????? +x5 0 ref2 12 30 24M * 0 0 AATAATTAAGTCTACAGAGCAACT ???????????????????????? +x6 0 ref2 14 30 23M * 0 0 TAATTAAGTCTACAGAGCAACTA ???????????????????????