comparison 2.4/script/Check_integration.sh @ 13:e3609c8714fb draft

Uploaded
author plus91-technologies-pvt-ltd
date Fri, 30 May 2014 03:37:55 -0400
parents
children
comparison
equal deleted inserted replaced
12:980fce54e60a 13:e3609c8714fb
1 #!/bin/sh
2 #$ -V
3 #$ -cwd
4 #$ -q 1-day
5 #$ -m ae
6 #$ -M hart.steven@mayo.edu
7 #$ -l h_vmem=8G
8 #$ -l h_stack=10M
9 VCF_FILE=$1
10 x=$2
11 #VIRAL_SEQDB=/data2/bsi/tertiary/m110344/SoftTile/Mia/BLAST_DB/OBrien/Virus_PCGS.fasta #must me indexed by bwasw
12 VIRAL_SEQDB=$3
13 VCF_FILE=$4
14 #VCF_FILE=final.vcf
15
16 set -x
17
18 perl -ane '$dist=100;
19 $mate=$F[4];
20 $mate=~s/[A-Z]|\[|\]//g;
21 @mate=split(/:/,$mate);
22 $end1a=@F[1]-$dist;
23 $end1b=@F[1]+$dist;
24 $end2a=$dist+@mate[1];
25 $end2b=$dist+@mate[1];
26 print "@F[0]\t$end1a\t$end1b\n@mate[0]\t$end2a\t$end2b\n"' $VCF_FILE|
27 sortBed > targets.bed
28
29
30 #100 min
31 time samtools view -h $x -L targets.bed |awk '(($9==0)&&($11!~/#/)&&($3!~/^chrGL/)&&($3!~/^chrM/))'|perl -ane 'print "\@@F[0]\n@F[9]\n+\n@F[10]\n"' >${x%%.bam}.res.fq
32 #23 min
33 time bwa mem -t 4 $VIRAL_SEQDB ${x%%.bam}.res.fq |samtools view -S - |grep gi > ${x%%.bam}.tmp.sam
34
35 #find out how many hits there are
36 cut -f3 ${x%%.bam}.tmp.sam|perl -ne '@_=split(":",$_);@res=split(/_/,@_[1],2);print "@res[1]"' | sort|uniq -c|sort -k1nr|tee ${x%%.bam}.Viral_maps.out |head
37 #Get the reads mapping to those hits to find out where the integration site is
38
39 #Read in the viruses until there is a significant drop off in number of reads (i.e. contributing less than 10%)
40 perl -ne '@_=split(" ",$_);$i=$_[0]+$i;$j=$_[0];$res=$j/$i;if($res>.1){print "@_[1]\n"}' ${x%%.bam}.Viral_maps.out >${x%%.bam}.to.keep
41 fgrep -f ${x%%.bam}.to.keep ${x%%.bam}.tmp.sam |cut -f1 >${x%%.bam}.reads
42
43 #75min+
44
45 time samtools view $x -L targets.bed |
46 fgrep -f ${x%%.bam}.reads|
47 awk '{if(($9==0)&&($11!~/#/)&&($3!~/^chrGL/)&&($3!~/^chrM/)&&($3!~/^\*/)){print $3"\t"$4"\t"$4"\t"$1}}'|
48 tee ${x%%.bam}.unsorted.bed|
49 sortBed | mergeBed -nms -d 1000|
50 perl -e 'open (FILE,"$ARGV[0]") or die "cant open file\n\n";
51 $SAM="$ARGV[1]";
52 $SAM=~chomp;
53 while(<FILE>){
54 chomp;
55 @_=split(/\t/,$_);
56 @reads=split(/;/,@_[3]);
57 #print "LINE=$_\nRES=grep $reads[0] $SAM\n";
58 $res=`grep $reads[0] $SAM` ;
59 # print "AFTER GREP, RES=$res\n";
60 if($res){
61 @res=split(/\t/,$res);
62 print join("\t",@_[0..2],@res[2])."\n"
63 }
64 };
65 close FILE' - ${x%%.bam}.tmp.sam |
66 perl -ne 's/\|/\t/g;@_=split("\t",$_);print join ("\t",@_[0..2,7])'|
67 perl -pne 's/_/\t/'| cut -f4 --complement |
68 perl -e '
69 open (FILE,"$ARGV[0]") or die "cant open file\n\n";
70 $SAM="$ARGV[1]";
71 while(<FILE>){
72 chomp;
73 @_=split(/\t/,$_);
74 $res=`grep $_[3] $SAM`;
75 if($res){
76 @res=split(" ",$res);
77 $reads[0]=chomp;
78 print join("\t",@_[0..4],@res[0])."\n";
79 }
80 }
81 close FILE;
82 ' - ${x%%.bam}.Viral_maps.out|
83 perl -pne 's/_/ /g'> ${x%%.bam}.Virus.integrated.bed
84
85 rm ${x%%.bam}.reads ${x%%.bam}.to.keep ${x%%.bam}.tmp.sam ${x%%.bam}.res.fq
86 echo "DONE with $x"