Mercurial > repos > yufei-luo > s_mart
comparison SMART/bacteriaRegulatoryRegion_Detection/interElementGff.pl @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:b0e8584489e6 | 18:94ab73e8a190 |
---|---|
1 #!/usr/bin/perl -w | |
2 ### | |
3 # But : protocol permettant la detection d'RNA non codant potentiel | |
4 # | |
5 # Entrees : fichier de mapping Smart gff3 | |
6 # fichier gff des gènes | |
7 # fichier gff des clusters Cis regulateur potentiel | |
8 # | |
9 # Sortie : fichier gff des clusters ARN nc | |
10 # | |
11 ###------------------------------------------------------ | |
12 use vars qw($USAGE); | |
13 use strict; | |
14 | |
15 =head1 NAME | |
16 | |
17 interElementGff.pl - creation of a new Gff corresponding to the region of two successive Elements | |
18 | |
19 =head1 SYNOPSIS | |
20 | |
21 % interElementGff.pl -i inputFile.gff3 -o outputFile.gff3 [-s 50] [-a 20] [-n seqName] [-h] | |
22 | |
23 =head1 DESCRIPTION | |
24 This script will determine cluster ok ncRNA. | |
25 | |
26 -i|--input fileName gff input file name | |
27 -o|--output fileName gff output file name | |
28 -n|--name seqName sequence name | |
29 -p|--print print parameters used | |
30 | |
31 -f5ff n number of nt to exclude from 5' seed when gene before is Forward, seed is Forward and next gene is Forward [default 0] | |
32 -ff3f n number... " ...[default 0] | |
33 | |
34 -f5fr n number... " ...[default 0] | |
35 -ff3r n number... " ...[default 0] | |
36 | |
37 -fr3f n number... " ...[default 0] | |
38 -fr5f n number... " ...[default 0] | |
39 | |
40 -f3rr n number... " ...[default 0] | |
41 -fr5r n number... " ...[default 0] | |
42 | |
43 -r5ff n number... " ...[default 0] | |
44 -rf3f n number... " ...[default 0] | |
45 | |
46 -r5fr n number... " ...[default 0] | |
47 -rf3r n number... " ...[default 0] | |
48 | |
49 -r3rf n number... " ...[default 0] | |
50 -rr5f n number... " ...[default 0] | |
51 | |
52 -r3rr n number... " ...[default 0] | |
53 -rr5r n number... " ...[default 0] | |
54 | |
55 [-h|--help] help mode then die | |
56 | |
57 | |
58 USAGE_CASE | |
59 | |
60 % interElementGff.pl -i inputFile.gff3 -o outputFile.gff3 -ff 53 -rr 23 -n NC_011744 | |
61 | |
62 BUG | |
63 | |
64 Caution : input file needs to be sorted on positions | |
65 | |
66 Caution : for -f/r options add +3 bp to include stop codon if not in input file | |
67 | |
68 =head1 AUTHOR - CTN - apr.11 | |
69 (from RNA-Vibrio/protocol_NC_V2.pl - Claire KUCHLY) | |
70 | |
71 =cut | |
72 #---------------------------------------------------------------------------- | |
73 # check command line : | |
74 my ($IDfile, $OutputFileName, $f5ff, $ff3f, $f5fr, $ff3r, $f3rf, $fr5f, $f3rr,$fr5r, $r5ff, $rf3f, $r5fr, $rf3r, $r3rf, $rr5f, $r3rr, $rr5r, $seqName, $printParameters) = | |
75 (undef, undef , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "", 0) ; | |
76 if ($#ARGV==0) { | |
77 die (exec("pod2text $0\n")); | |
78 } else { | |
79 foreach my $num (0 .. $#ARGV) { | |
80 SWITCH: for ($ARGV[$num]) { | |
81 /--input|-i/ && do { $IDfile=$ARGV[$num+1]; | |
82 open(F,"<$IDfile") or die "Error: Can't open \"$IDfile\", $!"; | |
83 last; }; | |
84 /-f5ff/ && do { $f5ff=$ARGV[$num+1]+1; last; }; # need +1 for intervall computations | |
85 /-ff3f/ && do { $ff3f=$ARGV[$num+1]+1; last; }; | |
86 | |
87 /-f5fr/ && do { $f5fr=$ARGV[$num+1]+1; last; }; | |
88 /-ff3r/ && do { $ff3r=$ARGV[$num+1]+1; last; }; | |
89 | |
90 /-f3rf/ && do { $f3rf=$ARGV[$num+1]+1; last; }; | |
91 /-fr5f/ && do { $fr5f=$ARGV[$num+1]+1; last; }; | |
92 | |
93 /-f3rr/ && do { $f3rr=$ARGV[$num+1]+1; last; }; | |
94 /-fr5r/ && do { $fr5r=$ARGV[$num+1]+1; last; }; | |
95 | |
96 /-r5ff/ && do { $r5ff=$ARGV[$num+1]+1; last; }; | |
97 /-rf3f/ && do { $rf3f=$ARGV[$num+1]+1; last; }; | |
98 | |
99 /-r5fr/ && do { $r5fr=$ARGV[$num+1]+1; last; }; | |
100 /-rf3r/ && do { $rf3r=$ARGV[$num+1]+1; last; }; | |
101 | |
102 /-r3rf/ && do { $r3rf=$ARGV[$num+1]+1; last; }; | |
103 /-rr5f/ && do { $rr5f=$ARGV[$num+1]+1; last; }; | |
104 | |
105 /-r3rr/ && do { $r3rr=$ARGV[$num+1]+1; last; }; | |
106 /-rr5r/ && do { $rr5r=$ARGV[$num+1]+1; last; }; | |
107 | |
108 # /--name|-n/ && do { $seqName=$ARGV[$num+1]; last; }; | |
109 /--print|-p/ && do { $printParameters=1; last; }; | |
110 /--output|-o/ && do { $OutputFileName=$ARGV[$num+1]; | |
111 open(S,">$OutputFileName") or die "Error : Can't open result file \"$OutputFileName\", $!"; | |
112 last; }; | |
113 /--help|-h/ && do { exec("pod2text $0\n") ; die }; | |
114 } | |
115 } | |
116 if ($printParameters) { | |
117 print " | |
118 --> f5ff ",$f5ff-1," --> ff3f ",$ff3f-1," --> ; | |
119 --> f5fr ",$f5fr-1," --> ff3r ",$ff3r-1," <-- ; | |
120 --> f3rf ",$f3rf-1," <-- fr5f ",$fr5f-1," --> ; | |
121 --> f3rr ",$f3rr-1," <-- fr5r ",$fr5r-1," <-- ; | |
122 <-- r5ff ",$r5ff-1," --> rf3f ",$rf3f-1," --> ; | |
123 <-- r5fr ",$r5fr-1," --> rf3r ",$rf3r-1," <-- ; | |
124 <-- r3rf ",$r3rf-1," <-- rr5f ",$rr5f-1," --> ; | |
125 <-- r3rr ",$r3rr-1," <-- rr5r ",$rr5r-1," <-- ;\n"; | |
126 } | |
127 ##NC_011753.2 RefSeq gene 367 834 . - . locus_tag=VS_0001;db_xref=GeneID:7162789 | |
128 my $finSeedSens; | |
129 my $finSeedAntisens; | |
130 my $debSeedSens; | |
131 my $debSeedAntisens; | |
132 my $info_gene=""; | |
133 my $sensGeneAvant = "+" ; # 1rst seed definition : geneAvant (gene[i-1]) doesn't exist | |
134 my @chromList; | |
135 while(my $ligne = <F>){ | |
136 chomp($ligne); | |
137 my @list = split(/\t/,$ligne); | |
138 if ((scalar(@chromList) == 0) or ($chromList[$#chromList] ne $list[0])){ | |
139 push(@chromList, $list[0]); | |
140 my $finSeedSens; | |
141 my $finSeedAntisens; | |
142 my $debSeedSens; | |
143 my $debSeedAntisens; | |
144 my $info_gene=""; | |
145 my $sensGeneAvant = "+" ; # 1rst seed definition : geneAvant (gene[i-1]) doesn't exist | |
146 } | |
147 if (($sensGeneAvant eq "+") and ($list[6] eq "+")) { #CTN ie geneavant == f, geneapres == f | |
148 $debSeedSens += $f5ff; | |
149 $finSeedSens = $list[3]- $ff3f; | |
150 $debSeedAntisens += $f3rf; | |
151 $finSeedAntisens = $list[3]- $fr5f; | |
152 } elsif (($sensGeneAvant eq "+") and ($list[6] eq "-")) { #CTN ie geneaavant == f, geneapres == r | |
153 $debSeedSens += $f5fr; | |
154 $finSeedSens = $list[3]- $ff3r; | |
155 $debSeedAntisens += $f3rr; | |
156 $finSeedAntisens = $list[3]- $fr5r; | |
157 } elsif (($sensGeneAvant eq "-") and ($list[6] eq "+")) { #CTN ie geneaavant == r, geneapres == f | |
158 $debSeedSens += $r5ff; | |
159 $finSeedSens = $list[3]- $rf3f; | |
160 $debSeedAntisens += $r3rf; | |
161 $finSeedAntisens = $list[3]- $rr5f; | |
162 } else { #CTN ie geneaavant == r, geneapres == r | |
163 $debSeedSens += $r5fr; | |
164 $finSeedSens = $list[3]- $rf3r; | |
165 $debSeedAntisens += $r3rr; | |
166 $finSeedAntisens = $list[3]- $rr5r; | |
167 } | |
168 if ($debSeedSens <= 0) { $debSeedSens=1 ; } # 1srt | |
169 if ($debSeedAntisens <= 0) { $debSeedAntisens=1 ; } | |
170 if($debSeedSens < $finSeedSens){ # only "real" seed | |
171 #print "$gene_avant\nNC_011753\tperso\tseed\t$deb_seed\t$fin_seed\t.\t+\t.\tgeneavant=$info_gene;geneapres=$list[@list-1]\n$ligne\n\n"; | |
172 # | |
173 | |
174 print S "$list[0]\tperso\tseedIR\t$debSeedSens\t$finSeedSens\t.\t+\t.\tgeneavant=$info_gene;geneapres=$list[@list-1]\n"; | |
175 } | |
176 if ($debSeedAntisens < $finSeedAntisens){ | |
177 print S "$list[0]\tperso\tseedIR\t$debSeedAntisens\t$finSeedAntisens\t.\t-\t.\tgeneavant=$info_gene;geneapres=$list[@list-1]\n"; | |
178 } | |
179 $sensGeneAvant = $list[6] ; # GFF : column 6 gives strand | |
180 $debSeedSens = $list[4]; | |
181 $debSeedAntisens = $list[4]; | |
182 $info_gene = $list[@list-1]; | |
183 } | |
184 close F; | |
185 close S; | |
186 exit(0); | |
187 } |