comparison pileup.pl @ 2:5f4cb6ee3427 draft

Uploaded
author ziru-zhou
date Tue, 18 Dec 2012 09:20:03 -0500
parents
children
comparison
equal deleted inserted replaced
1:b8a15b4e7c98 2:5f4cb6ee3427
1 #!/usr/bin/perl
2
3 #purpose: script which calculates the percent of genome coverage and average coverage of bases
4 #author: Ziru Zhou
5 #date: October, 2012
6
7
8 use strict;
9 use warnings;
10 use File::Basename;
11
12 #globals
13 #======================================================================
14 my $bamfile = $ARGV[0];
15 my $reffile = $ARGV[1];
16 my $outputfile = $ARGV[2];
17 my $bamname = $ARGV[3];
18 my $refname = $ARGV[4];
19
20 my $genomesize = 0;
21 my $pileupwc = 0;
22 my $pileupc4 = 0;
23
24 my $percentofcover = 0;
25 my $averagebasecoverage = 0;
26
27
28 #function declarations
29 #======================================================================
30 #get the pileupwc value
31 sub Getpileupwc()
32 {
33 #system ("samtools pileup -f ${reffile} ${bamfile} | cut --fields=4 > tmp");
34 #$pileupwc = `cat tmp | wc -l`;
35 $pileupwc = `samtools pileup -f ${reffile} ${bamfile} | cut --fields=4 | wc -l`;
36 }
37
38 #get the pileupc4 value
39 sub Getpileupc4()
40 {
41 $pileupc4 = `samtools pileup -f ${reffile} ${bamfile} | cut --fields=4 | awk '{ total += \$1 } END { print total }'`;
42 }
43
44 #get the genomesize value
45 sub Getgenomesize()
46 {
47 $genomesize = `tail -n 1 "$reffile.fai"| cut --fields=3`;
48 }
49
50 #function to calculate the final 2 values for output
51 sub Calculate()
52 {
53 $percentofcover = ( int($pileupwc) / int($genomesize) ) * 100;
54 $averagebasecoverage = int($pileupc4) / int($pileupwc);
55 }
56
57 #function to write to output file
58 sub Output()
59 {
60 open FILE, '>'.$outputfile or die "unable to create $outputfile\n";
61
62 print FILE "\n#";
63 print FILE "\n# Generated by BAMEdit. Please send your questions/comments to modENCODE DCC at help\@modencode.org.";
64 print FILE "\n#";
65 print FILE "\n# coverage: % of bases in genome covered";
66 print FILE "\n# avg coverage: average coverage of bases covered in genome ";
67 print FILE "\n#\n";
68 print FILE "input file\t$bamname\n";
69 print FILE "genome file\t$refname\n";
70 print FILE "genome length\t$genomesize";
71 printf FILE "coverage\t%.2f\n", $percentofcover;
72 printf FILE "avg coverage\t%.2f\n", $averagebasecoverage;
73
74 close FILE;
75 }
76
77 #function to clean up, the tmp file and the .fai file
78 sub Cleanup()
79 {
80 system ("sudo rm ${reffile}.dat.fai");
81 }
82
83 #function calls
84 #======================================================================
85 Getpileupwc();
86 Getpileupc4();
87 Getgenomesize();
88 Calculate();
89 Output();
90 Cleanup();