diff scripts/pickUniqMate.pl.orig @ 21:9672fe07a232 draft default tip

planemo upload for repository https://github.com/portiahollyoak/Tools commit 0fea84d05f8976b8360a8b4943ecb01b87e3ade0-dirty
author mvdbeek
date Mon, 05 Dec 2016 09:58:47 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/pickUniqMate.pl.orig	Mon Dec 05 09:58:47 2016 -0500
@@ -0,0 +1,106 @@
+#!/share/bin/perl
+use List::Util qw(sum);
+use Bio::Seq;
+
+die "perl $0 <mate sam with header> <uniq bed>\n" if @ARGV<1;
+
+open in,$ARGV[1];
+my %uniq;
+while(<in>)
+{
+	chomp;
+	my @f=split;
+	$uniq{$f[3]}=[@f];
+}
+close in;
+
+open in,$ARGV[0];
+my (%te,@ref,%ref);
+while(<in>)
+{
+	chomp;
+	my @f=split/\s+/,$_;
+	# headers
+	if(/^\@SQ/)
+	{
+		my ($sn,$ln)=/SN:(.*?)\tLN:(\d+)/;
+		push @ref,[$sn,$ln];
+		$ref{$sn}=$#ref;
+		next;
+	}
+
+	# unmapped
+	next if $f[2] eq "*";
+	
+	my $mm=200;
+	my $xa="";
+	for my $q (11..$#f)
+	{
+	    if($f[$q]=~/NM:/)
+	    {
+		$mm=$f[$q];
+		$mm =~ s/NM://;
+	    }
+
+	    if($f[$q]=~/XA:Z:/)
+	    {
+		($xa)=$f[$q]=~/XA:Z:(.*);$/;
+		last;
+	    }
+	}
+
+	if ($mm > 5) {next;}
+
+	my ($rnum)=$f[1]=~/(\d)$/;
+	# CIGAR
+	my (@cigar_m)=$f[5]=~/(\d+)M/g;
+	my (@cigar_d)=$f[5]=~/(\d+)D/g;
+	my (@cigar_s)=$f[5]=~/(\d+)S/g;
+	my (@cigar_i)=$f[5]=~/(\d+)I/g;
+	my $aln_ln=sum(@cigar_m,@cigar_d);
+	
+	my $strand="+";
+	if($f[1]=~/r/)
+	{
+	    my $seq=Bio::Seq->new(-seq=>$f[9], -alphabet => 'dna');
+	    $f[9]=$seq->revcom->seq;
+	    $strand="-";
+	}
+	
+	# align to the junctions
+	if(($f[3]+$aln_ln-1)>${$ref[$ref{$f[2]}]}[1])
+	{
+	    if(($f[3]+($aln_ln-1)/2)>${$ref[$ref{$f[2]}]}[1])
+	    {
+		$f[2]=${$ref[$ref{$f[2]}+1]}[0];
+		$f[3]=1;
+		$aln_ln=$aln_ln-(${$ref[$ref{$f[2]}]}[1]-$f[3]+1);
+	    }
+	    else
+	    {
+		$aln_ln=${$ref[$ref{$f[2]}]}[1]-$f[3]+1;
+	    }
+	}
+	
+	$pe{$f[0]}{$rnum}=$f[2].",".$strand."$f[3]".";";
+	    
+	# XA tag
+	my @xa=split(";",$xa);
+	$pe{$f[0]}{$rnum}.=join(",",(split/,/)[0,1]).";" foreach @xa;
+
+}
+close in;
+
+foreach my $id (keys %pe)
+{
+	next if exists $pe{$id}{1} && exists $pe{$id}{2} && exists $uniq{$id."/1"} && exists $uniq{$id."/2"};
+	foreach my $rid (keys %{$pe{$id}})
+	{
+		my $mate_id=($rid==1)?2:1;
+		if(exists $uniq{$id."/".$mate_id})
+		{
+			${$uniq{$id."/".$mate_id}}[4]=$pe{$id}{$rid};
+			print join("\t",@{$uniq{$id."/".$mate_id}}),"\n";
+		}
+	}
+}