comparison scripts/pickUniqMate.pl.orig @ 21:9672fe07a232 draft default tip

planemo upload for repository https://github.com/portiahollyoak/Tools commit 0fea84d05f8976b8360a8b4943ecb01b87e3ade0-dirty
author mvdbeek
date Mon, 05 Dec 2016 09:58:47 -0500
parents
children
comparison
equal deleted inserted replaced
20:6e02b9179a24 21:9672fe07a232
1 #!/share/bin/perl
2 use List::Util qw(sum);
3 use Bio::Seq;
4
5 die "perl $0 <mate sam with header> <uniq bed>\n" if @ARGV<1;
6
7 open in,$ARGV[1];
8 my %uniq;
9 while(<in>)
10 {
11 chomp;
12 my @f=split;
13 $uniq{$f[3]}=[@f];
14 }
15 close in;
16
17 open in,$ARGV[0];
18 my (%te,@ref,%ref);
19 while(<in>)
20 {
21 chomp;
22 my @f=split/\s+/,$_;
23 # headers
24 if(/^\@SQ/)
25 {
26 my ($sn,$ln)=/SN:(.*?)\tLN:(\d+)/;
27 push @ref,[$sn,$ln];
28 $ref{$sn}=$#ref;
29 next;
30 }
31
32 # unmapped
33 next if $f[2] eq "*";
34
35 my $mm=200;
36 my $xa="";
37 for my $q (11..$#f)
38 {
39 if($f[$q]=~/NM:/)
40 {
41 $mm=$f[$q];
42 $mm =~ s/NM://;
43 }
44
45 if($f[$q]=~/XA:Z:/)
46 {
47 ($xa)=$f[$q]=~/XA:Z:(.*);$/;
48 last;
49 }
50 }
51
52 if ($mm > 5) {next;}
53
54 my ($rnum)=$f[1]=~/(\d)$/;
55 # CIGAR
56 my (@cigar_m)=$f[5]=~/(\d+)M/g;
57 my (@cigar_d)=$f[5]=~/(\d+)D/g;
58 my (@cigar_s)=$f[5]=~/(\d+)S/g;
59 my (@cigar_i)=$f[5]=~/(\d+)I/g;
60 my $aln_ln=sum(@cigar_m,@cigar_d);
61
62 my $strand="+";
63 if($f[1]=~/r/)
64 {
65 my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna');
66 $f[9]=$seq->revcom->seq;
67 $strand="-";
68 }
69
70 # align to the junctions
71 if(($f[3]+$aln_ln-1)>${$ref[$ref{$f[2]}]}[1])
72 {
73 if(($f[3]+($aln_ln-1)/2)>${$ref[$ref{$f[2]}]}[1])
74 {
75 $f[2]=${$ref[$ref{$f[2]}+1]}[0];
76 $f[3]=1;
77 $aln_ln=$aln_ln-(${$ref[$ref{$f[2]}]}[1]-$f[3]+1);
78 }
79 else
80 {
81 $aln_ln=${$ref[$ref{$f[2]}]}[1]-$f[3]+1;
82 }
83 }
84
85 $pe{$f[0]}{$rnum}=$f[2].",".$strand."$f[3]".";";
86
87 # XA tag
88 my @xa=split(";",$xa);
89 $pe{$f[0]}{$rnum}.=join(",",(split/,/)[0,1]).";" foreach @xa;
90
91 }
92 close in;
93
94 foreach my $id (keys %pe)
95 {
96 next if exists $pe{$id}{1} && exists $pe{$id}{2} && exists $uniq{$id."/1"} && exists $uniq{$id."/2"};
97 foreach my $rid (keys %{$pe{$id}})
98 {
99 my $mate_id=($rid==1)?2:1;
100 if(exists $uniq{$id."/".$mate_id})
101 {
102 ${$uniq{$id."/".$mate_id}}[4]=$pe{$id}{$rid};
103 print join("\t",@{$uniq{$id."/".$mate_id}}),"\n";
104 }
105 }
106 }