comparison SMART/bacteriaRegulatoryRegion_Detection/seedGff.pl @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
comparison
equal deleted inserted replaced
30:5677346472b5 31:0ab839023fe4
1 #!/usr/bin/perl -w
2 ###
3 # But : extension des UTR5 à partir des clusters de reads
4 #
5 # Entrees : fichier gff annotation + cluster
6 #
7 # Sortie : UTR5.gff
8 #
9 ###------------------------------------------------------
10 use vars qw($USAGE);
11 use strict;
12 use Getopt::Long;
13
14 =head1 NAME
15
16 seedGff.pl
17
18 =head1 SYNOPSIS
19
20 % seedGff.pl -i annotation.gff -p BeginPosFromAtg [-l lgSeed | -e EndPosFromAtg ] [-h]
21
22 =head1 DESCRIPTION
23 This script will parse input gff file and write information in gff3 format.
24
25 -i|--input fileName gff input file name of annotations
26 -p|--pos BeginPosFromAtg greather positive number for the begin position of the seed from Atg
27 [-l|--length seedLength] lentgth of the seed to compute (default 4nt)
28 [-e|--end seedEnd] end of the seed to compute (smaller positive number)
29 -o|--output fileName gff output file name
30 [-h|--help] help mode then die
31
32 =head1 AUTHOR - Claire Toffano-Nioche - mar.11
33 from Claire Kuchly initial script
34
35 =cut
36 #-----------------------
37 my ($inFileName, $beginSeed, $endSeed, $lgSeed, $outFileName) = ("", 0, 0, 0, "SEED.gff") ;
38 # command line check
39 foreach my $num (0 .. $#ARGV) {
40 SWITCH: for ($ARGV[$num]) {
41 /--input|-i/ && do {
42 $inFileName=$ARGV[$num+1];
43 open (INGFF, "< $inFileName" ) or die "Can't open gff file: \"$inFileName\"\n" ;
44 last };
45 /--pos|-p/ && do {
46 $beginSeed=$ARGV[$num+1];
47 last };
48 /--end|-e/ && do {
49 $endSeed=$ARGV[$num+1];
50 last };
51 /--length|-l/ && do {
52 $lgSeed=$ARGV[$num+1];
53 last };
54 /--output|-o/ && do {
55 $outFileName=$ARGV[$num+1];
56 last };
57 /--help|-h/ && do { exec("pod2text $0\n") ; die };
58 }
59 }
60 open(UTR5,">$outFileName") or die "Error can't $outFileName open for output. $!\n";
61 if (($endSeed > 0) and ($lgSeed > 0)) {
62 print "Error : only -e or -l definition, not both\n";
63 exec("pod2text $0\n") ; die ;
64 } elsif ($lgSeed > 0) {
65 print "ERROR : Lg Seed => TODO \n";
66 }
67
68 #Création des fichiers de filtres (séquences UTR) :
69 #print "Création des fichiers de séquences !\n";
70 ###Creer les fichiers des séquences en 5' et 3' des gènes.
71 ###Seed pour les clusters en 5' : il faut qu'ils soient encrés sur le -20 par rapport à l'ATG. Donc seed de -22/-18.
72 while(my $ligne = <INGFF>){
73 chomp($ligne);
74 my @list = split(/\t/,$ligne) ;
75 my $finUTR5 ;
76 my $debUTR5 ;
77 my $strand = $list[6] ;
78 if($strand eq "+"){
79 $finUTR5 = $list[3]-$endSeed;
80 $debUTR5 = $list[3]-$beginSeed;
81 } elsif($strand eq "-"){
82 $debUTR5 = $list[4]+$endSeed;
83 $finUTR5 = $list[4]+$beginSeed;
84 }
85 if($debUTR5 < 0){$debUTR5 =0;}
86 if($finUTR5 < 0){$finUTR5 =0;}
87 print UTR5 "$list[0]\t$list[1]\t5UTR\t$debUTR5\t$finUTR5\t$list[5]\t$list[6]\t$list[7]\t$list[8]\n";
88 }
89 close INGFF;
90 close UTR5;
91 exit(0);