view SMART/bacteriaRegulatoryRegion_Detection/strictlyIncludeGff.pl @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
line wrap: on
line source

#!/usr/bin/perl -w 
###
# But : protocol permettant la detection d'RNA non codant potentiel
# 
# Entrees : fichier de mapping Smart gff3
#			fichier gff des gènes
#			fichier gff des clusters Cis regulateur potentiel
#
# Sortie : fichier gff des clusters ARN nc
#
###------------------------------------------------------

use vars qw($USAGE);                      
use strict; 

=head1 NAME

protocol_NC_V2_CTN3.pl 

=head1 SYNOPSIS

% strictlyIncludeGff.pl -i toSelect.gff3 -t template.gff3 > result.gff3

=head1 DESCRIPTION

strictlyIncludeGff.pl - print elements strictly include in template (gff files)

    -i|--input 		fileName  gff input file name
    -t|--template	fileName  gff template file name
    [-h|--help]		help mode then die                              

=head1 AUTHOR - CTN - mar.11 
(from RNA-Vibrio/protocol_NC_V2_CTN3.pl - Claire KUCHLY)

=cut
      
#----------------------------------------------------------------------------
# check command line :
my $outFileName = "outSIG.gff3";
if ($#ARGV==0) {
	die (exec("pod2text $0\n"));
} else {
    foreach my $num (0 .. $#ARGV) {
	SWITCH: for ($ARGV[$num]) {
	/--input|-i/ && do { open(ARN,"<$ARGV[$num+1]") 
			or die "Error: Can't open \"$ARGV[$num+1]\", $!"; 
		last };
	/--template|-t/ && do { open(SEED,"<$ARGV[$num+1]") 
			or die "Error : Can't open file \"$ARGV[$num+1]\", $!";
		last };
	/--help|-h/ && do { exec("pod2text $0\n") ; die };
	}
    }
    ##NC_011753.2	RefSeq	gene	367	834	.	-	.	locus_tag=VS_0001;db_xref=GeneID:7162789
#    open(OUT,">$outFileName") or die "Error can't $outFileName open for output. $!\n";
    my @seed ;
    my $s=0;
    while (my $seedLine = <SEED> ) {
		my @list = split(/\t/,$seedLine);
		$seed[$s][0]= $list[3] ; # position begin seed
		$seed[$s][1]= $list[4] ; # position end seed
		$seed[$s][2]= $list[6] ; # seed sens
		$seed[$s][3]= $list[0] ; # chromesome name
		$s++;
    }
    close SEED ;
    while(my $ligne = <ARN>){
		$s=0;
		my @list = split(/\t/,$ligne);
		while (($s <= $#seed)) {
			if (($seed[$s][3] eq $list[0]) and ($seed[$s][0] <= $list[3]) and ($seed[$s][1] >= $list[4]) and ($seed[$s][2] eq $list[6])) { # if list include in seed + same direction
				print "$ligne";
			}
			$s++;
		}
    }
    close ARN ;
    exit(0);
}