Mercurial > repos > yufei-luo > s_mart
diff SMART/bacteriaRegulatoryRegion_Detection/interElementGff.pl @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SMART/bacteriaRegulatoryRegion_Detection/interElementGff.pl Mon Apr 29 03:20:15 2013 -0400 @@ -0,0 +1,187 @@ +#!/usr/bin/perl -w +### +# But : protocol permettant la detection d'RNA non codant potentiel +# +# Entrees : fichier de mapping Smart gff3 +# fichier gff des gènes +# fichier gff des clusters Cis regulateur potentiel +# +# Sortie : fichier gff des clusters ARN nc +# +###------------------------------------------------------ +use vars qw($USAGE); +use strict; + +=head1 NAME + +interElementGff.pl - creation of a new Gff corresponding to the region of two successive Elements + +=head1 SYNOPSIS + +% interElementGff.pl -i inputFile.gff3 -o outputFile.gff3 [-s 50] [-a 20] [-n seqName] [-h] + +=head1 DESCRIPTION +This script will determine cluster ok ncRNA. + + -i|--input fileName gff input file name + -o|--output fileName gff output file name + -n|--name seqName sequence name + -p|--print print parameters used + + -f5ff n number of nt to exclude from 5' seed when gene before is Forward, seed is Forward and next gene is Forward [default 0] + -ff3f n number... " ...[default 0] + + -f5fr n number... " ...[default 0] + -ff3r n number... " ...[default 0] + + -fr3f n number... " ...[default 0] + -fr5f n number... " ...[default 0] + + -f3rr n number... " ...[default 0] + -fr5r n number... " ...[default 0] + + -r5ff n number... " ...[default 0] + -rf3f n number... " ...[default 0] + + -r5fr n number... " ...[default 0] + -rf3r n number... " ...[default 0] + + -r3rf n number... " ...[default 0] + -rr5f n number... " ...[default 0] + + -r3rr n number... " ...[default 0] + -rr5r n number... " ...[default 0] + + [-h|--help] help mode then die + + +USAGE_CASE + +% interElementGff.pl -i inputFile.gff3 -o outputFile.gff3 -ff 53 -rr 23 -n NC_011744 + +BUG + +Caution : input file needs to be sorted on positions + +Caution : for -f/r options add +3 bp to include stop codon if not in input file + +=head1 AUTHOR - CTN - apr.11 +(from RNA-Vibrio/protocol_NC_V2.pl - Claire KUCHLY) + +=cut +#---------------------------------------------------------------------------- +# check command line : +my ($IDfile, $OutputFileName, $f5ff, $ff3f, $f5fr, $ff3r, $f3rf, $fr5f, $f3rr,$fr5r, $r5ff, $rf3f, $r5fr, $rf3r, $r3rf, $rr5f, $r3rr, $rr5r, $seqName, $printParameters) = + (undef, undef , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "", 0) ; +if ($#ARGV==0) { + die (exec("pod2text $0\n")); +} else { + foreach my $num (0 .. $#ARGV) { + SWITCH: for ($ARGV[$num]) { + /--input|-i/ && do { $IDfile=$ARGV[$num+1]; + open(F,"<$IDfile") or die "Error: Can't open \"$IDfile\", $!"; + last; }; + /-f5ff/ && do { $f5ff=$ARGV[$num+1]+1; last; }; # need +1 for intervall computations + /-ff3f/ && do { $ff3f=$ARGV[$num+1]+1; last; }; + + /-f5fr/ && do { $f5fr=$ARGV[$num+1]+1; last; }; + /-ff3r/ && do { $ff3r=$ARGV[$num+1]+1; last; }; + + /-f3rf/ && do { $f3rf=$ARGV[$num+1]+1; last; }; + /-fr5f/ && do { $fr5f=$ARGV[$num+1]+1; last; }; + + /-f3rr/ && do { $f3rr=$ARGV[$num+1]+1; last; }; + /-fr5r/ && do { $fr5r=$ARGV[$num+1]+1; last; }; + + /-r5ff/ && do { $r5ff=$ARGV[$num+1]+1; last; }; + /-rf3f/ && do { $rf3f=$ARGV[$num+1]+1; last; }; + + /-r5fr/ && do { $r5fr=$ARGV[$num+1]+1; last; }; + /-rf3r/ && do { $rf3r=$ARGV[$num+1]+1; last; }; + + /-r3rf/ && do { $r3rf=$ARGV[$num+1]+1; last; }; + /-rr5f/ && do { $rr5f=$ARGV[$num+1]+1; last; }; + + /-r3rr/ && do { $r3rr=$ARGV[$num+1]+1; last; }; + /-rr5r/ && do { $rr5r=$ARGV[$num+1]+1; last; }; + +# /--name|-n/ && do { $seqName=$ARGV[$num+1]; last; }; + /--print|-p/ && do { $printParameters=1; last; }; + /--output|-o/ && do { $OutputFileName=$ARGV[$num+1]; + open(S,">$OutputFileName") or die "Error : Can't open result file \"$OutputFileName\", $!"; + last; }; + /--help|-h/ && do { exec("pod2text $0\n") ; die }; + } + } + if ($printParameters) { + print " + --> f5ff ",$f5ff-1," --> ff3f ",$ff3f-1," --> ; + --> f5fr ",$f5fr-1," --> ff3r ",$ff3r-1," <-- ; + --> f3rf ",$f3rf-1," <-- fr5f ",$fr5f-1," --> ; + --> f3rr ",$f3rr-1," <-- fr5r ",$fr5r-1," <-- ; + <-- r5ff ",$r5ff-1," --> rf3f ",$rf3f-1," --> ; + <-- r5fr ",$r5fr-1," --> rf3r ",$rf3r-1," <-- ; + <-- r3rf ",$r3rf-1," <-- rr5f ",$rr5f-1," --> ; + <-- r3rr ",$r3rr-1," <-- rr5r ",$rr5r-1," <-- ;\n"; + } + ##NC_011753.2 RefSeq gene 367 834 . - . locus_tag=VS_0001;db_xref=GeneID:7162789 + my $finSeedSens; + my $finSeedAntisens; + my $debSeedSens; + my $debSeedAntisens; + my $info_gene=""; + my $sensGeneAvant = "+" ; # 1rst seed definition : geneAvant (gene[i-1]) doesn't exist + my @chromList; + while(my $ligne = <F>){ + chomp($ligne); + my @list = split(/\t/,$ligne); + if ((scalar(@chromList) == 0) or ($chromList[$#chromList] ne $list[0])){ + push(@chromList, $list[0]); + my $finSeedSens; + my $finSeedAntisens; + my $debSeedSens; + my $debSeedAntisens; + my $info_gene=""; + my $sensGeneAvant = "+" ; # 1rst seed definition : geneAvant (gene[i-1]) doesn't exist + } + if (($sensGeneAvant eq "+") and ($list[6] eq "+")) { #CTN ie geneavant == f, geneapres == f + $debSeedSens += $f5ff; + $finSeedSens = $list[3]- $ff3f; + $debSeedAntisens += $f3rf; + $finSeedAntisens = $list[3]- $fr5f; + } elsif (($sensGeneAvant eq "+") and ($list[6] eq "-")) { #CTN ie geneaavant == f, geneapres == r + $debSeedSens += $f5fr; + $finSeedSens = $list[3]- $ff3r; + $debSeedAntisens += $f3rr; + $finSeedAntisens = $list[3]- $fr5r; + } elsif (($sensGeneAvant eq "-") and ($list[6] eq "+")) { #CTN ie geneaavant == r, geneapres == f + $debSeedSens += $r5ff; + $finSeedSens = $list[3]- $rf3f; + $debSeedAntisens += $r3rf; + $finSeedAntisens = $list[3]- $rr5f; + } else { #CTN ie geneaavant == r, geneapres == r + $debSeedSens += $r5fr; + $finSeedSens = $list[3]- $rf3r; + $debSeedAntisens += $r3rr; + $finSeedAntisens = $list[3]- $rr5r; + } + if ($debSeedSens <= 0) { $debSeedSens=1 ; } # 1srt + if ($debSeedAntisens <= 0) { $debSeedAntisens=1 ; } + if($debSeedSens < $finSeedSens){ # only "real" seed + #print "$gene_avant\nNC_011753\tperso\tseed\t$deb_seed\t$fin_seed\t.\t+\t.\tgeneavant=$info_gene;geneapres=$list[@list-1]\n$ligne\n\n"; + # + + print S "$list[0]\tperso\tseedIR\t$debSeedSens\t$finSeedSens\t.\t+\t.\tgeneavant=$info_gene;geneapres=$list[@list-1]\n"; + } + if ($debSeedAntisens < $finSeedAntisens){ + print S "$list[0]\tperso\tseedIR\t$debSeedAntisens\t$finSeedAntisens\t.\t-\t.\tgeneavant=$info_gene;geneapres=$list[@list-1]\n"; + } + $sensGeneAvant = $list[6] ; # GFF : column 6 gives strand + $debSeedSens = $list[4]; + $debSeedAntisens = $list[4]; + $info_gene = $list[@list-1]; + } + close F; + close S; + exit(0); +}