Mercurial > repos > lsong10 > psiclass
comparison PsiCLASS-1.0.2/samtools-0.1.19/misc/blast2sam.pl @ 0:903fc43d6227 draft default tip
Uploaded
| author | lsong10 |
|---|---|
| date | Fri, 26 Mar 2021 16:52:45 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:903fc43d6227 |
|---|---|
| 1 #!/usr/bin/perl -w | |
| 2 | |
| 3 use strict; | |
| 4 use warnings; | |
| 5 use Getopt::Std; | |
| 6 | |
| 7 &blast2sam; | |
| 8 | |
| 9 sub blast2sam { | |
| 10 my %opts = (); | |
| 11 getopts('s', \%opts); | |
| 12 die("Usage: blast2sam.pl <in.blastn>\n") if (-t STDIN && @ARGV == 0); | |
| 13 my ($qlen, $slen, $q, $s, $qbeg, $qend, @sam, @cigar, @cmaux, $show_seq); | |
| 14 $show_seq = defined($opts{s}); | |
| 15 @sam = (); @sam[0,4,6..8,10] = ('', 255, '*', 0, 0, '*'); | |
| 16 while (<>) { | |
| 17 if (@cigar && (/^Query=/ || /Score =.*bits.*Expect/)) { # print | |
| 18 &blast_print_sam(\@sam, \@cigar, \@cmaux, $qlen - $qend); | |
| 19 @cigar = (); | |
| 20 } | |
| 21 if (/^Query= (\S+)/) { | |
| 22 $sam[0] = $1; | |
| 23 } elsif (/\((\S+)\s+letters\)/) { | |
| 24 $qlen = $1; $qlen =~ s/,//g; | |
| 25 } elsif (/^>(\S+)/) { | |
| 26 $sam[2] = $1; | |
| 27 } elsif (/Length = (\d+)/) { | |
| 28 $slen = $1; | |
| 29 } elsif (/Score =\s+(\S+) bits.+Expect(\(\d+\))? = (\S+)/) { # the start of an alignment block | |
| 30 my ($as, $ev) = (int($1 + .499), $3); | |
| 31 $ev = "1$ev" if ($ev =~ /^e/); | |
| 32 @sam[1,3,9,11,12] = (0, 0, '', "AS:i:$as", "EV:Z:$ev"); | |
| 33 @cigar = (); $qbeg = 0; | |
| 34 @cmaux = (0, 0, 0, ''); | |
| 35 } elsif (/Strand = (\S+) \/ (\S+)/) { | |
| 36 $sam[1] |= 0x10 if ($2 eq 'Minus'); | |
| 37 } elsif (/Query\:\s(\d+)\s*(\S+)\s(\d+)/) { | |
| 38 $q = $2; | |
| 39 unless ($qbeg) { | |
| 40 $qbeg = $1; | |
| 41 push(@cigar, ($1-1) . "H") if ($1 > 1); | |
| 42 } | |
| 43 $qend = $3; | |
| 44 if ($show_seq) { | |
| 45 my $x = $q; | |
| 46 $x =~ s/-//g; $sam[9] .= $x; | |
| 47 } | |
| 48 } elsif (/Sbjct\:\s(\d+)\s*(\S+)\s(\d+)/) { | |
| 49 $s = $2; | |
| 50 if ($sam[1] & 0x10) { | |
| 51 $sam[3] = $3; | |
| 52 } else { | |
| 53 $sam[3] = $1 unless ($sam[3]); | |
| 54 } | |
| 55 &aln2cm(\@cigar, \$q, \$s, \@cmaux); | |
| 56 } | |
| 57 } | |
| 58 &blast_print_sam(\@sam, \@cigar, \@cmaux, $qlen - $qend); | |
| 59 } | |
| 60 | |
| 61 sub blast_print_sam { | |
| 62 my ($sam, $cigar, $cmaux, $qrest) = @_; | |
| 63 push(@$cigar, $cmaux->[1] . substr("MDI", $cmaux->[0], 1)); | |
| 64 push(@$cigar, $qrest . 'H') if ($qrest); | |
| 65 if ($sam->[1] & 0x10) { | |
| 66 @$cigar = reverse(@$cigar); | |
| 67 $sam->[9] = reverse($sam->[9]); | |
| 68 $sam->[9] =~ tr/atgcrymkswATGCRYMKSW/tacgyrkmswTACGYRKMSW/; | |
| 69 } | |
| 70 $sam->[9] = '*' if (!$sam->[9]); | |
| 71 $sam->[5] = join('', @$cigar); | |
| 72 print join("\t", @$sam), "\n"; | |
| 73 } | |
| 74 | |
| 75 sub aln2cm { | |
| 76 my ($cigar, $q, $s, $cmaux) = @_; | |
| 77 my $l = length($$q); | |
| 78 for (my $i = 0; $i < $l; ++$i) { | |
| 79 my $op; | |
| 80 # set $op | |
| 81 if (substr($$q, $i, 1) eq '-') { $op = 2; } | |
| 82 elsif (substr($$s, $i, 1) eq '-') { $op = 1; } | |
| 83 else { $op = 0; } | |
| 84 # for CIGAR | |
| 85 if ($cmaux->[0] == $op) { | |
| 86 ++$cmaux->[1]; | |
| 87 } else { | |
| 88 push(@$cigar, $cmaux->[1] . substr("MDI", $cmaux->[0], 1)); | |
| 89 $cmaux->[0] = $op; $cmaux->[1] = 1; | |
| 90 } | |
| 91 } | |
| 92 } |
