18
|
1 #!/usr/bin/perl -w
|
|
2 ###
|
|
3 # But : extension des UTR5 à partir des clusters de reads
|
|
4 #
|
|
5 # Entrees : fichier gff annotation + cluster
|
|
6 #
|
|
7 # Sortie : UTR5.gff
|
|
8 #
|
|
9 ###------------------------------------------------------
|
|
10 use vars qw($USAGE);
|
|
11 use strict;
|
|
12 use Getopt::Long;
|
|
13
|
|
14 =head1 NAME
|
|
15
|
|
16 seedGff.pl
|
|
17
|
|
18 =head1 SYNOPSIS
|
|
19
|
|
20 % seedGff.pl -i annotation.gff -p BeginPosFromAtg [-l lgSeed | -e EndPosFromAtg ] [-h]
|
|
21
|
|
22 =head1 DESCRIPTION
|
|
23 This script will parse input gff file and write information in gff3 format.
|
|
24
|
|
25 -i|--input fileName gff input file name of annotations
|
|
26 -p|--pos BeginPosFromAtg greather positive number for the begin position of the seed from Atg
|
|
27 [-l|--length seedLength] lentgth of the seed to compute (default 4nt)
|
|
28 [-e|--end seedEnd] end of the seed to compute (smaller positive number)
|
|
29 -o|--output fileName gff output file name
|
|
30 [-h|--help] help mode then die
|
|
31
|
|
32 =head1 AUTHOR - Claire Toffano-Nioche - mar.11
|
|
33 from Claire Kuchly initial script
|
|
34
|
|
35 =cut
|
|
36 #-----------------------
|
|
37 my ($inFileName, $beginSeed, $endSeed, $lgSeed, $outFileName) = ("", 0, 0, 0, "SEED.gff") ;
|
|
38 # command line check
|
|
39 foreach my $num (0 .. $#ARGV) {
|
|
40 SWITCH: for ($ARGV[$num]) {
|
|
41 /--input|-i/ && do {
|
|
42 $inFileName=$ARGV[$num+1];
|
|
43 open (INGFF, "< $inFileName" ) or die "Can't open gff file: \"$inFileName\"\n" ;
|
|
44 last };
|
|
45 /--pos|-p/ && do {
|
|
46 $beginSeed=$ARGV[$num+1];
|
|
47 last };
|
|
48 /--end|-e/ && do {
|
|
49 $endSeed=$ARGV[$num+1];
|
|
50 last };
|
|
51 /--length|-l/ && do {
|
|
52 $lgSeed=$ARGV[$num+1];
|
|
53 last };
|
|
54 /--output|-o/ && do {
|
|
55 $outFileName=$ARGV[$num+1];
|
|
56 last };
|
|
57 /--help|-h/ && do { exec("pod2text $0\n") ; die };
|
|
58 }
|
|
59 }
|
|
60 open(UTR5,">$outFileName") or die "Error can't $outFileName open for output. $!\n";
|
|
61 if (($endSeed > 0) and ($lgSeed > 0)) {
|
|
62 print "Error : only -e or -l definition, not both\n";
|
|
63 exec("pod2text $0\n") ; die ;
|
|
64 } elsif ($lgSeed > 0) {
|
|
65 print "ERROR : Lg Seed => TODO \n";
|
|
66 }
|
|
67
|
|
68 #Création des fichiers de filtres (séquences UTR) :
|
|
69 #print "Création des fichiers de séquences !\n";
|
|
70 ###Creer les fichiers des séquences en 5' et 3' des gènes.
|
|
71 ###Seed pour les clusters en 5' : il faut qu'ils soient encrés sur le -20 par rapport à l'ATG. Donc seed de -22/-18.
|
|
72 while(my $ligne = <INGFF>){
|
|
73 chomp($ligne);
|
|
74 my @list = split(/\t/,$ligne) ;
|
|
75 my $finUTR5 ;
|
|
76 my $debUTR5 ;
|
|
77 my $strand = $list[6] ;
|
|
78 if($strand eq "+"){
|
|
79 $finUTR5 = $list[3]-$endSeed;
|
|
80 $debUTR5 = $list[3]-$beginSeed;
|
|
81 } elsif($strand eq "-"){
|
|
82 $debUTR5 = $list[4]+$endSeed;
|
|
83 $finUTR5 = $list[4]+$beginSeed;
|
|
84 }
|
|
85 if($debUTR5 < 0){$debUTR5 =0;}
|
|
86 if($finUTR5 < 0){$finUTR5 =0;}
|
|
87 print UTR5 "$list[0]\t$list[1]\t5UTR\t$debUTR5\t$finUTR5\t$list[5]\t$list[6]\t$list[7]\t$list[8]\n";
|
|
88 }
|
|
89 close INGFF;
|
|
90 close UTR5;
|
|
91 exit(0);
|