annotate SMART/bacteriaRegulatoryRegion_Detection/coverageGff.pl @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 #!/usr/bin/perl -w
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2 ###
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 # But : ajout ou modif de la couverture d'un gff
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 # Entrees : fichier gff
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 # Sortie : gff affiche a l'ecran
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 ###------------------------------------------------------
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 #!/usr/bin/perl -w
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13 use vars qw($USAGE);
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14 use strict;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 =head1 NAME
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 coverageGff.pl - add or compute the coverage of a gff file
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 =head1 SYNOPSIS
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22 % coverageGff.pl -i file.gff -l readLength [-h]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 =head1 DESCRIPTION
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25 This script will parse gff file, compute read coverage form the "nbElements" tag and write coverage in gff3 format.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 -i|--input fileName gff input file name
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 -l|--length ReadLength lenght of the reads in bp [38 default]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 -o|--output fileName gff3 output file name
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 [-h|--help] help mode then die
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32 =head1 AUTHOR - Claire Toffano-Nioche - fev.11
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 =cut
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 #-----------------------
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 my ($fileName, $length, $outFileName) = ("", 38, "coverageOut.gff3") ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 # command line check
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 foreach my $num (0 .. $#ARGV) {
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39 SWITCH: for ($ARGV[$num]) {
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40 /--input|-i/ && do {
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 $fileName=$ARGV[$num+1];
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42 open ( fichierGff, "< $fileName" ) or die "Can't open gff file: \"$fileName\"\n" ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43 last };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44 /--length|-l/ && do {
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45 $length=$ARGV[$num+1];
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 last };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 /--help|-h/ && do { exec("pod2text $0\n") ; die };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48 }
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 }
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 # informations retrieval
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 # open(OUT,">$outFileName") or die "Error can't $outFileName open for output. $!\n";
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 my @lines = <fichierGff> ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 close fichierGff ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54 # treatment
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 #print "gff file read ; number of lines : $#lines\n";
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 for (my $i=0 ; $i <= $#lines ; $i++) {
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57 # compute coverage :
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 if ($lines[$i] =~ /nbElements=/) {
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59 my ($nbE)=($lines[$i] =~ /nbElements=(\d+)/) ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60 my @gffCol=split("\t", $lines[$i]) ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 # print "ligne : $i, nbE : $nbE, length : $length, debut : $gffCol[3], fin : $gffCol[4].\n";
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 my $cover=$length*$nbE/($gffCol[4]-$gffCol[3]+1) ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63 $cover=int(100*$cover+0.5)/100 ; # arronri sup. precision 2 chiffres
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 if ($lines[$i] =~ /coverage=/) { # replace coverage
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 $lines[$i] =~ s/coverage=.*;/coverage=$cover;/ ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 } else { # add coverage
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 $lines[$i] =~ s/;/;coverage=$cover;/ ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 }
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 }
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 # print OUT $lines[$i] ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 print $lines[$i] ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72 }
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 #close OUT ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 exit(0);