Mercurial > repos > portiahollyoak > temp
comparison scripts/pickUniqMate.pl.orig @ 21:9672fe07a232 draft default tip
planemo upload for repository https://github.com/portiahollyoak/Tools commit 0fea84d05f8976b8360a8b4943ecb01b87e3ade0-dirty
| author | mvdbeek |
|---|---|
| date | Mon, 05 Dec 2016 09:58:47 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 20:6e02b9179a24 | 21:9672fe07a232 |
|---|---|
| 1 #!/share/bin/perl | |
| 2 use List::Util qw(sum); | |
| 3 use Bio::Seq; | |
| 4 | |
| 5 die "perl $0 <mate sam with header> <uniq bed>\n" if @ARGV<1; | |
| 6 | |
| 7 open in,$ARGV[1]; | |
| 8 my %uniq; | |
| 9 while(<in>) | |
| 10 { | |
| 11 chomp; | |
| 12 my @f=split; | |
| 13 $uniq{$f[3]}=[@f]; | |
| 14 } | |
| 15 close in; | |
| 16 | |
| 17 open in,$ARGV[0]; | |
| 18 my (%te,@ref,%ref); | |
| 19 while(<in>) | |
| 20 { | |
| 21 chomp; | |
| 22 my @f=split/\s+/,$_; | |
| 23 # headers | |
| 24 if(/^\@SQ/) | |
| 25 { | |
| 26 my ($sn,$ln)=/SN:(.*?)\tLN:(\d+)/; | |
| 27 push @ref,[$sn,$ln]; | |
| 28 $ref{$sn}=$#ref; | |
| 29 next; | |
| 30 } | |
| 31 | |
| 32 # unmapped | |
| 33 next if $f[2] eq "*"; | |
| 34 | |
| 35 my $mm=200; | |
| 36 my $xa=""; | |
| 37 for my $q (11..$#f) | |
| 38 { | |
| 39 if($f[$q]=~/NM:/) | |
| 40 { | |
| 41 $mm=$f[$q]; | |
| 42 $mm =~ s/NM://; | |
| 43 } | |
| 44 | |
| 45 if($f[$q]=~/XA:Z:/) | |
| 46 { | |
| 47 ($xa)=$f[$q]=~/XA:Z:(.*);$/; | |
| 48 last; | |
| 49 } | |
| 50 } | |
| 51 | |
| 52 if ($mm > 5) {next;} | |
| 53 | |
| 54 my ($rnum)=$f[1]=~/(\d)$/; | |
| 55 # CIGAR | |
| 56 my (@cigar_m)=$f[5]=~/(\d+)M/g; | |
| 57 my (@cigar_d)=$f[5]=~/(\d+)D/g; | |
| 58 my (@cigar_s)=$f[5]=~/(\d+)S/g; | |
| 59 my (@cigar_i)=$f[5]=~/(\d+)I/g; | |
| 60 my $aln_ln=sum(@cigar_m,@cigar_d); | |
| 61 | |
| 62 my $strand="+"; | |
| 63 if($f[1]=~/r/) | |
| 64 { | |
| 65 my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna'); | |
| 66 $f[9]=$seq->revcom->seq; | |
| 67 $strand="-"; | |
| 68 } | |
| 69 | |
| 70 # align to the junctions | |
| 71 if(($f[3]+$aln_ln-1)>${$ref[$ref{$f[2]}]}[1]) | |
| 72 { | |
| 73 if(($f[3]+($aln_ln-1)/2)>${$ref[$ref{$f[2]}]}[1]) | |
| 74 { | |
| 75 $f[2]=${$ref[$ref{$f[2]}+1]}[0]; | |
| 76 $f[3]=1; | |
| 77 $aln_ln=$aln_ln-(${$ref[$ref{$f[2]}]}[1]-$f[3]+1); | |
| 78 } | |
| 79 else | |
| 80 { | |
| 81 $aln_ln=${$ref[$ref{$f[2]}]}[1]-$f[3]+1; | |
| 82 } | |
| 83 } | |
| 84 | |
| 85 $pe{$f[0]}{$rnum}=$f[2].",".$strand."$f[3]".";"; | |
| 86 | |
| 87 # XA tag | |
| 88 my @xa=split(";",$xa); | |
| 89 $pe{$f[0]}{$rnum}.=join(",",(split/,/)[0,1]).";" foreach @xa; | |
| 90 | |
| 91 } | |
| 92 close in; | |
| 93 | |
| 94 foreach my $id (keys %pe) | |
| 95 { | |
| 96 next if exists $pe{$id}{1} && exists $pe{$id}{2} && exists $uniq{$id."/1"} && exists $uniq{$id."/2"}; | |
| 97 foreach my $rid (keys %{$pe{$id}}) | |
| 98 { | |
| 99 my $mate_id=($rid==1)?2:1; | |
| 100 if(exists $uniq{$id."/".$mate_id}) | |
| 101 { | |
| 102 ${$uniq{$id."/".$mate_id}}[4]=$pe{$id}{$rid}; | |
| 103 print join("\t",@{$uniq{$id."/".$mate_id}}),"\n"; | |
| 104 } | |
| 105 } | |
| 106 } |
