diff SMART/bacteriaRegulatoryRegion_Detection/strictlyIncludeGff.pl @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/strictlyIncludeGff.pl	Mon Apr 29 03:20:15 2013 -0400
@@ -0,0 +1,79 @@
+#!/usr/bin/perl -w 
+###
+# But : protocol permettant la detection d'RNA non codant potentiel
+# 
+# Entrees : fichier de mapping Smart gff3
+#			fichier gff des gènes
+#			fichier gff des clusters Cis regulateur potentiel
+#
+# Sortie : fichier gff des clusters ARN nc
+#
+###------------------------------------------------------
+
+use vars qw($USAGE);                      
+use strict; 
+
+=head1 NAME
+
+protocol_NC_V2_CTN3.pl 
+
+=head1 SYNOPSIS
+
+% strictlyIncludeGff.pl -i toSelect.gff3 -t template.gff3 > result.gff3
+
+=head1 DESCRIPTION
+
+strictlyIncludeGff.pl - print elements strictly include in template (gff files)
+
+    -i|--input 		fileName  gff input file name
+    -t|--template	fileName  gff template file name
+    [-h|--help]		help mode then die                              
+
+=head1 AUTHOR - CTN - mar.11 
+(from RNA-Vibrio/protocol_NC_V2_CTN3.pl - Claire KUCHLY)
+
+=cut
+      
+#----------------------------------------------------------------------------
+# check command line :
+my $outFileName = "outSIG.gff3";
+if ($#ARGV==0) {
+	die (exec("pod2text $0\n"));
+} else {
+    foreach my $num (0 .. $#ARGV) {
+	SWITCH: for ($ARGV[$num]) {
+	/--input|-i/ && do { open(ARN,"<$ARGV[$num+1]") 
+			or die "Error: Can't open \"$ARGV[$num+1]\", $!"; 
+		last };
+	/--template|-t/ && do { open(SEED,"<$ARGV[$num+1]") 
+			or die "Error : Can't open file \"$ARGV[$num+1]\", $!";
+		last };
+	/--help|-h/ && do { exec("pod2text $0\n") ; die };
+	}
+    }
+    ##NC_011753.2	RefSeq	gene	367	834	.	-	.	locus_tag=VS_0001;db_xref=GeneID:7162789
+#    open(OUT,">$outFileName") or die "Error can't $outFileName open for output. $!\n";
+    my @seed ;
+    my $s=0;
+    while (my $seedLine = <SEED> ) {
+		my @list = split(/\t/,$seedLine);
+		$seed[$s][0]= $list[3] ; # position begin seed
+		$seed[$s][1]= $list[4] ; # position end seed
+		$seed[$s][2]= $list[6] ; # seed sens
+		$seed[$s][3]= $list[0] ; # chromesome name
+		$s++;
+    }
+    close SEED ;
+    while(my $ligne = <ARN>){
+		$s=0;
+		my @list = split(/\t/,$ligne);
+		while (($s <= $#seed)) {
+			if (($seed[$s][3] eq $list[0]) and ($seed[$s][0] <= $list[3]) and ($seed[$s][1] >= $list[4]) and ($seed[$s][2] eq $list[6])) { # if list include in seed + same direction
+				print "$ligne";
+			}
+			$s++;
+		}
+    }
+    close ARN ;
+    exit(0);
+}