18
|
1 #!/usr/bin/perl -w
|
|
2 ###
|
|
3 # But : ajout ou modif de la couverture d'un gff
|
|
4 #
|
|
5 # Entrees : fichier gff
|
|
6 #
|
|
7 # Sortie : gff affiche a l'ecran
|
|
8 #
|
|
9 ###------------------------------------------------------
|
|
10
|
|
11 #!/usr/bin/perl -w
|
|
12
|
|
13 use vars qw($USAGE);
|
|
14 use strict;
|
|
15
|
|
16 =head1 NAME
|
|
17
|
|
18 coverageGff.pl - add or compute the coverage of a gff file
|
|
19
|
|
20 =head1 SYNOPSIS
|
|
21
|
|
22 % coverageGff.pl -i file.gff -l readLength [-h]
|
|
23
|
|
24 =head1 DESCRIPTION
|
|
25 This script will parse gff file, compute read coverage form the "nbElements" tag and write coverage in gff3 format.
|
|
26
|
|
27 -i|--input fileName gff input file name
|
|
28 -l|--length ReadLength lenght of the reads in bp [38 default]
|
|
29 -o|--output fileName gff3 output file name
|
|
30 [-h|--help] help mode then die
|
|
31
|
|
32 =head1 AUTHOR - Claire Toffano-Nioche - fev.11
|
|
33
|
|
34 =cut
|
|
35 #-----------------------
|
|
36 my ($fileName, $length, $outFileName) = ("", 38, "coverageOut.gff3") ;
|
|
37 # command line check
|
|
38 foreach my $num (0 .. $#ARGV) {
|
|
39 SWITCH: for ($ARGV[$num]) {
|
|
40 /--input|-i/ && do {
|
|
41 $fileName=$ARGV[$num+1];
|
|
42 open ( fichierGff, "< $fileName" ) or die "Can't open gff file: \"$fileName\"\n" ;
|
|
43 last };
|
|
44 /--length|-l/ && do {
|
|
45 $length=$ARGV[$num+1];
|
|
46 last };
|
|
47 /--help|-h/ && do { exec("pod2text $0\n") ; die };
|
|
48 }
|
|
49 }
|
|
50 # informations retrieval
|
|
51 # open(OUT,">$outFileName") or die "Error can't $outFileName open for output. $!\n";
|
|
52 my @lines = <fichierGff> ;
|
|
53 close fichierGff ;
|
|
54 # treatment
|
|
55 #print "gff file read ; number of lines : $#lines\n";
|
|
56 for (my $i=0 ; $i <= $#lines ; $i++) {
|
|
57 # compute coverage :
|
|
58 if ($lines[$i] =~ /nbElements=/) {
|
|
59 my ($nbE)=($lines[$i] =~ /nbElements=(\d+)/) ;
|
|
60 my @gffCol=split("\t", $lines[$i]) ;
|
|
61 # print "ligne : $i, nbE : $nbE, length : $length, debut : $gffCol[3], fin : $gffCol[4].\n";
|
|
62 my $cover=$length*$nbE/($gffCol[4]-$gffCol[3]+1) ;
|
|
63 $cover=int(100*$cover+0.5)/100 ; # arronri sup. precision 2 chiffres
|
|
64 if ($lines[$i] =~ /coverage=/) { # replace coverage
|
|
65 $lines[$i] =~ s/coverage=.*;/coverage=$cover;/ ;
|
|
66 } else { # add coverage
|
|
67 $lines[$i] =~ s/;/;coverage=$cover;/ ;
|
|
68 }
|
|
69 }
|
|
70 # print OUT $lines[$i] ;
|
|
71 print $lines[$i] ;
|
|
72 }
|
|
73 #close OUT ;
|
|
74 exit(0);
|