Mercurial > repos > ziru-zhou > bamedit
comparison pileup.pl @ 2:5f4cb6ee3427 draft
Uploaded
author | ziru-zhou |
---|---|
date | Tue, 18 Dec 2012 09:20:03 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:b8a15b4e7c98 | 2:5f4cb6ee3427 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 #purpose: script which calculates the percent of genome coverage and average coverage of bases | |
4 #author: Ziru Zhou | |
5 #date: October, 2012 | |
6 | |
7 | |
8 use strict; | |
9 use warnings; | |
10 use File::Basename; | |
11 | |
12 #globals | |
13 #====================================================================== | |
14 my $bamfile = $ARGV[0]; | |
15 my $reffile = $ARGV[1]; | |
16 my $outputfile = $ARGV[2]; | |
17 my $bamname = $ARGV[3]; | |
18 my $refname = $ARGV[4]; | |
19 | |
20 my $genomesize = 0; | |
21 my $pileupwc = 0; | |
22 my $pileupc4 = 0; | |
23 | |
24 my $percentofcover = 0; | |
25 my $averagebasecoverage = 0; | |
26 | |
27 | |
28 #function declarations | |
29 #====================================================================== | |
30 #get the pileupwc value | |
31 sub Getpileupwc() | |
32 { | |
33 #system ("samtools pileup -f ${reffile} ${bamfile} | cut --fields=4 > tmp"); | |
34 #$pileupwc = `cat tmp | wc -l`; | |
35 $pileupwc = `samtools pileup -f ${reffile} ${bamfile} | cut --fields=4 | wc -l`; | |
36 } | |
37 | |
38 #get the pileupc4 value | |
39 sub Getpileupc4() | |
40 { | |
41 $pileupc4 = `samtools pileup -f ${reffile} ${bamfile} | cut --fields=4 | awk '{ total += \$1 } END { print total }'`; | |
42 } | |
43 | |
44 #get the genomesize value | |
45 sub Getgenomesize() | |
46 { | |
47 $genomesize = `tail -n 1 "$reffile.fai"| cut --fields=3`; | |
48 } | |
49 | |
50 #function to calculate the final 2 values for output | |
51 sub Calculate() | |
52 { | |
53 $percentofcover = ( int($pileupwc) / int($genomesize) ) * 100; | |
54 $averagebasecoverage = int($pileupc4) / int($pileupwc); | |
55 } | |
56 | |
57 #function to write to output file | |
58 sub Output() | |
59 { | |
60 open FILE, '>'.$outputfile or die "unable to create $outputfile\n"; | |
61 | |
62 print FILE "\n#"; | |
63 print FILE "\n# Generated by BAMEdit. Please send your questions/comments to modENCODE DCC at help\@modencode.org."; | |
64 print FILE "\n#"; | |
65 print FILE "\n# coverage: % of bases in genome covered"; | |
66 print FILE "\n# avg coverage: average coverage of bases covered in genome "; | |
67 print FILE "\n#\n"; | |
68 print FILE "input file\t$bamname\n"; | |
69 print FILE "genome file\t$refname\n"; | |
70 print FILE "genome length\t$genomesize"; | |
71 printf FILE "coverage\t%.2f\n", $percentofcover; | |
72 printf FILE "avg coverage\t%.2f\n", $averagebasecoverage; | |
73 | |
74 close FILE; | |
75 } | |
76 | |
77 #function to clean up, the tmp file and the .fai file | |
78 sub Cleanup() | |
79 { | |
80 system ("sudo rm ${reffile}.dat.fai"); | |
81 } | |
82 | |
83 #function calls | |
84 #====================================================================== | |
85 Getpileupwc(); | |
86 Getpileupc4(); | |
87 Getgenomesize(); | |
88 Calculate(); | |
89 Output(); | |
90 Cleanup(); |