annotate SMART/bacteriaRegulatoryRegion_Detection/seedGff.pl @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 #!/usr/bin/perl -w
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2 ###
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 # But : extension des UTR5 à partir des clusters de reads
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 # Entrees : fichier gff annotation + cluster
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 # Sortie : UTR5.gff
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 ###------------------------------------------------------
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10 use vars qw($USAGE);
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 use strict;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 use Getopt::Long;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14 =head1 NAME
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 seedGff.pl
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 =head1 SYNOPSIS
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 % seedGff.pl -i annotation.gff -p BeginPosFromAtg [-l lgSeed | -e EndPosFromAtg ] [-h]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22 =head1 DESCRIPTION
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 This script will parse input gff file and write information in gff3 format.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25 -i|--input fileName gff input file name of annotations
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 -p|--pos BeginPosFromAtg greather positive number for the begin position of the seed from Atg
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 [-l|--length seedLength] lentgth of the seed to compute (default 4nt)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 [-e|--end seedEnd] end of the seed to compute (smaller positive number)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 -o|--output fileName gff output file name
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 [-h|--help] help mode then die
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32 =head1 AUTHOR - Claire Toffano-Nioche - mar.11
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 from Claire Kuchly initial script
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 =cut
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 #-----------------------
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 my ($inFileName, $beginSeed, $endSeed, $lgSeed, $outFileName) = ("", 0, 0, 0, "SEED.gff") ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 # command line check
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39 foreach my $num (0 .. $#ARGV) {
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40 SWITCH: for ($ARGV[$num]) {
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 /--input|-i/ && do {
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42 $inFileName=$ARGV[$num+1];
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43 open (INGFF, "< $inFileName" ) or die "Can't open gff file: \"$inFileName\"\n" ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44 last };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45 /--pos|-p/ && do {
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 $beginSeed=$ARGV[$num+1];
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 last };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48 /--end|-e/ && do {
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 $endSeed=$ARGV[$num+1];
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 last };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 /--length|-l/ && do {
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 $lgSeed=$ARGV[$num+1];
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 last };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54 /--output|-o/ && do {
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 $outFileName=$ARGV[$num+1];
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 last };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57 /--help|-h/ && do { exec("pod2text $0\n") ; die };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 }
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59 }
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60 open(UTR5,">$outFileName") or die "Error can't $outFileName open for output. $!\n";
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 if (($endSeed > 0) and ($lgSeed > 0)) {
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 print "Error : only -e or -l definition, not both\n";
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63 exec("pod2text $0\n") ; die ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 } elsif ($lgSeed > 0) {
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 print "ERROR : Lg Seed => TODO \n";
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 }
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 #Création des fichiers de filtres (séquences UTR) :
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 #print "Création des fichiers de séquences !\n";
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 ###Creer les fichiers des séquences en 5' et 3' des gènes.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 ###Seed pour les clusters en 5' : il faut qu'ils soient encrés sur le -20 par rapport à l'ATG. Donc seed de -22/-18.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72 while(my $ligne = <INGFF>){
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 chomp($ligne);
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 my @list = split(/\t/,$ligne) ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 my $finUTR5 ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 my $debUTR5 ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 my $strand = $list[6] ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78 if($strand eq "+"){
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 $finUTR5 = $list[3]-$endSeed;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80 $debUTR5 = $list[3]-$beginSeed;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81 } elsif($strand eq "-"){
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82 $debUTR5 = $list[4]+$endSeed;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83 $finUTR5 = $list[4]+$beginSeed;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 }
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85 if($debUTR5 < 0){$debUTR5 =0;}
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86 if($finUTR5 < 0){$finUTR5 =0;}
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87 print UTR5 "$list[0]\t$list[1]\t5UTR\t$debUTR5\t$finUTR5\t$list[5]\t$list[6]\t$list[7]\t$list[8]\n";
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88 }
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89 close INGFF;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90 close UTR5;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91 exit(0);