annotate pileup.pl @ 7:f3a394b6cbb7 draft default tip

Uploaded
author ziru-zhou
date Tue, 18 Dec 2012 09:45:18 -0500
parents 5f4cb6ee3427
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
1 #!/usr/bin/perl
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
2
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
3 #purpose: script which calculates the percent of genome coverage and average coverage of bases
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
4 #author: Ziru Zhou
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
5 #date: October, 2012
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
6
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
7
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
8 use strict;
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
9 use warnings;
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
10 use File::Basename;
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
11
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
12 #globals
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
13 #======================================================================
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
14 my $bamfile = $ARGV[0];
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
15 my $reffile = $ARGV[1];
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
16 my $outputfile = $ARGV[2];
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
17 my $bamname = $ARGV[3];
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
18 my $refname = $ARGV[4];
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
19
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
20 my $genomesize = 0;
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
21 my $pileupwc = 0;
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
22 my $pileupc4 = 0;
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
23
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
24 my $percentofcover = 0;
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
25 my $averagebasecoverage = 0;
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
26
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
27
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
28 #function declarations
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
29 #======================================================================
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
30 #get the pileupwc value
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
31 sub Getpileupwc()
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
32 {
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
33 #system ("samtools pileup -f ${reffile} ${bamfile} | cut --fields=4 > tmp");
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
34 #$pileupwc = `cat tmp | wc -l`;
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
35 $pileupwc = `samtools pileup -f ${reffile} ${bamfile} | cut --fields=4 | wc -l`;
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
36 }
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
37
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
38 #get the pileupc4 value
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
39 sub Getpileupc4()
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
40 {
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
41 $pileupc4 = `samtools pileup -f ${reffile} ${bamfile} | cut --fields=4 | awk '{ total += \$1 } END { print total }'`;
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
42 }
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
43
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
44 #get the genomesize value
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
45 sub Getgenomesize()
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
46 {
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
47 $genomesize = `tail -n 1 "$reffile.fai"| cut --fields=3`;
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
48 }
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
49
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
50 #function to calculate the final 2 values for output
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
51 sub Calculate()
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
52 {
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
53 $percentofcover = ( int($pileupwc) / int($genomesize) ) * 100;
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
54 $averagebasecoverage = int($pileupc4) / int($pileupwc);
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
55 }
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
56
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
57 #function to write to output file
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
58 sub Output()
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
59 {
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
60 open FILE, '>'.$outputfile or die "unable to create $outputfile\n";
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
61
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
62 print FILE "\n#";
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
63 print FILE "\n# Generated by BAMEdit. Please send your questions/comments to modENCODE DCC at help\@modencode.org.";
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
64 print FILE "\n#";
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
65 print FILE "\n# coverage: % of bases in genome covered";
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
66 print FILE "\n# avg coverage: average coverage of bases covered in genome ";
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
67 print FILE "\n#\n";
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
68 print FILE "input file\t$bamname\n";
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
69 print FILE "genome file\t$refname\n";
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
70 print FILE "genome length\t$genomesize";
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
71 printf FILE "coverage\t%.2f\n", $percentofcover;
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
72 printf FILE "avg coverage\t%.2f\n", $averagebasecoverage;
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
73
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
74 close FILE;
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
75 }
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
76
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
77 #function to clean up, the tmp file and the .fai file
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
78 sub Cleanup()
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
79 {
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
80 system ("sudo rm ${reffile}.dat.fai");
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
81 }
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
82
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
83 #function calls
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
84 #======================================================================
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
85 Getpileupwc();
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
86 Getpileupc4();
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
87 Getgenomesize();
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
88 Calculate();
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
89 Output();
5f4cb6ee3427 Uploaded
ziru-zhou
parents:
diff changeset
90 Cleanup();