view SMART/bacteriaRegulatoryRegion_Detection/coverageGff.pl @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
line wrap: on
line source

#!/usr/bin/perl -w
###
# But : ajout ou modif de la couverture d'un gff
# 
# Entrees : fichier gff
#
# Sortie : gff affiche a l'ecran
#
###------------------------------------------------------

#!/usr/bin/perl -w                                                                                                                                                     
              
use vars qw($USAGE);                      
use strict;                               

=head1 NAME

coverageGff.pl - add or compute the coverage of a gff file

=head1 SYNOPSIS

% coverageGff.pl -i file.gff -l readLength [-h] 

=head1 DESCRIPTION
This script will parse gff file, compute read coverage form the "nbElements" tag and write coverage in gff3 format.

    -i|--input fileName     gff input file name
    -l|--length ReadLength  lenght of the reads in bp [38 default]
    -o|--output fileName    gff3 output file name
   [-h|--help]              help mode then die                              

=head1 AUTHOR - Claire Toffano-Nioche - fev.11

=cut
#-----------------------
my ($fileName, $length, $outFileName) = ("", 38, "coverageOut.gff3") ;
   # command line check
    foreach my $num (0 .. $#ARGV) {
        SWITCH: for ($ARGV[$num]) {
        /--input|-i/ && do { 
		$fileName=$ARGV[$num+1]; 
		open ( fichierGff, "< $fileName" ) or die "Can't open gff file: \"$fileName\"\n" ; 
		last };
        /--length|-l/ && do { 
		$length=$ARGV[$num+1]; 
		last };
        /--help|-h/ && do { exec("pod2text $0\n") ; die };
        }
    }
    # informations retrieval
#    open(OUT,">$outFileName") or die "Error can't $outFileName open for output. $!\n";
    my @lines = <fichierGff> ; 
    close fichierGff ;
    # treatment
	#print "gff file read ; number of lines : $#lines\n";
    for (my $i=0 ; $i <= $#lines ; $i++) {
	# compute coverage :
		if ($lines[$i] =~ /nbElements=/) {
			my ($nbE)=($lines[$i] =~ /nbElements=(\d+)/) ;
			my @gffCol=split("\t", $lines[$i]) ;
			# print "ligne : $i, nbE : $nbE, length : $length, debut : $gffCol[3], fin : $gffCol[4].\n";
			my $cover=$length*$nbE/($gffCol[4]-$gffCol[3]+1) ;
			$cover=int(100*$cover+0.5)/100 ; # arronri sup. precision 2 chiffres 
			if ($lines[$i] =~ /coverage=/) { # replace coverage
			    $lines[$i] =~ s/coverage=.*;/coverage=$cover;/ ;
		} else { # add coverage
		    $lines[$i] =~ s/;/;coverage=$cover;/ ;
		}
	}
#	print OUT $lines[$i] ;
	print $lines[$i] ;
    }
#close OUT ;
exit(0);