diff SMART/bacteriaRegulatoryRegion_Detection/coverageGff.pl @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/coverageGff.pl	Tue Apr 30 14:33:21 2013 -0400
@@ -0,0 +1,74 @@
+#!/usr/bin/perl -w
+###
+# But : ajout ou modif de la couverture d'un gff
+# 
+# Entrees : fichier gff
+#
+# Sortie : gff affiche a l'ecran
+#
+###------------------------------------------------------
+
+#!/usr/bin/perl -w                                                                                                                                                     
+              
+use vars qw($USAGE);                      
+use strict;                               
+
+=head1 NAME
+
+coverageGff.pl - add or compute the coverage of a gff file
+
+=head1 SYNOPSIS
+
+% coverageGff.pl -i file.gff -l readLength [-h] 
+
+=head1 DESCRIPTION
+This script will parse gff file, compute read coverage form the "nbElements" tag and write coverage in gff3 format.
+
+    -i|--input fileName     gff input file name
+    -l|--length ReadLength  lenght of the reads in bp [38 default]
+    -o|--output fileName    gff3 output file name
+   [-h|--help]              help mode then die                              
+
+=head1 AUTHOR - Claire Toffano-Nioche - fev.11
+
+=cut
+#-----------------------
+my ($fileName, $length, $outFileName) = ("", 38, "coverageOut.gff3") ;
+   # command line check
+    foreach my $num (0 .. $#ARGV) {
+        SWITCH: for ($ARGV[$num]) {
+        /--input|-i/ && do { 
+		$fileName=$ARGV[$num+1]; 
+		open ( fichierGff, "< $fileName" ) or die "Can't open gff file: \"$fileName\"\n" ; 
+		last };
+        /--length|-l/ && do { 
+		$length=$ARGV[$num+1]; 
+		last };
+        /--help|-h/ && do { exec("pod2text $0\n") ; die };
+        }
+    }
+    # informations retrieval
+#    open(OUT,">$outFileName") or die "Error can't $outFileName open for output. $!\n";
+    my @lines = <fichierGff> ; 
+    close fichierGff ;
+    # treatment
+	#print "gff file read ; number of lines : $#lines\n";
+    for (my $i=0 ; $i <= $#lines ; $i++) {
+	# compute coverage :
+		if ($lines[$i] =~ /nbElements=/) {
+			my ($nbE)=($lines[$i] =~ /nbElements=(\d+)/) ;
+			my @gffCol=split("\t", $lines[$i]) ;
+			# print "ligne : $i, nbE : $nbE, length : $length, debut : $gffCol[3], fin : $gffCol[4].\n";
+			my $cover=$length*$nbE/($gffCol[4]-$gffCol[3]+1) ;
+			$cover=int(100*$cover+0.5)/100 ; # arronri sup. precision 2 chiffres 
+			if ($lines[$i] =~ /coverage=/) { # replace coverage
+			    $lines[$i] =~ s/coverage=.*;/coverage=$cover;/ ;
+		} else { # add coverage
+		    $lines[$i] =~ s/;/;coverage=$cover;/ ;
+		}
+	}
+#	print OUT $lines[$i] ;
+	print $lines[$i] ;
+    }
+#close OUT ;
+exit(0);