Mercurial > repos > yufei-luo > s_mart
comparison SMART/bacteriaRegulatoryRegion_Detection/seedGff.pl @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
comparison
equal
deleted
inserted
replaced
30:5677346472b5 | 31:0ab839023fe4 |
---|---|
1 #!/usr/bin/perl -w | |
2 ### | |
3 # But : extension des UTR5 à partir des clusters de reads | |
4 # | |
5 # Entrees : fichier gff annotation + cluster | |
6 # | |
7 # Sortie : UTR5.gff | |
8 # | |
9 ###------------------------------------------------------ | |
10 use vars qw($USAGE); | |
11 use strict; | |
12 use Getopt::Long; | |
13 | |
14 =head1 NAME | |
15 | |
16 seedGff.pl | |
17 | |
18 =head1 SYNOPSIS | |
19 | |
20 % seedGff.pl -i annotation.gff -p BeginPosFromAtg [-l lgSeed | -e EndPosFromAtg ] [-h] | |
21 | |
22 =head1 DESCRIPTION | |
23 This script will parse input gff file and write information in gff3 format. | |
24 | |
25 -i|--input fileName gff input file name of annotations | |
26 -p|--pos BeginPosFromAtg greather positive number for the begin position of the seed from Atg | |
27 [-l|--length seedLength] lentgth of the seed to compute (default 4nt) | |
28 [-e|--end seedEnd] end of the seed to compute (smaller positive number) | |
29 -o|--output fileName gff output file name | |
30 [-h|--help] help mode then die | |
31 | |
32 =head1 AUTHOR - Claire Toffano-Nioche - mar.11 | |
33 from Claire Kuchly initial script | |
34 | |
35 =cut | |
36 #----------------------- | |
37 my ($inFileName, $beginSeed, $endSeed, $lgSeed, $outFileName) = ("", 0, 0, 0, "SEED.gff") ; | |
38 # command line check | |
39 foreach my $num (0 .. $#ARGV) { | |
40 SWITCH: for ($ARGV[$num]) { | |
41 /--input|-i/ && do { | |
42 $inFileName=$ARGV[$num+1]; | |
43 open (INGFF, "< $inFileName" ) or die "Can't open gff file: \"$inFileName\"\n" ; | |
44 last }; | |
45 /--pos|-p/ && do { | |
46 $beginSeed=$ARGV[$num+1]; | |
47 last }; | |
48 /--end|-e/ && do { | |
49 $endSeed=$ARGV[$num+1]; | |
50 last }; | |
51 /--length|-l/ && do { | |
52 $lgSeed=$ARGV[$num+1]; | |
53 last }; | |
54 /--output|-o/ && do { | |
55 $outFileName=$ARGV[$num+1]; | |
56 last }; | |
57 /--help|-h/ && do { exec("pod2text $0\n") ; die }; | |
58 } | |
59 } | |
60 open(UTR5,">$outFileName") or die "Error can't $outFileName open for output. $!\n"; | |
61 if (($endSeed > 0) and ($lgSeed > 0)) { | |
62 print "Error : only -e or -l definition, not both\n"; | |
63 exec("pod2text $0\n") ; die ; | |
64 } elsif ($lgSeed > 0) { | |
65 print "ERROR : Lg Seed => TODO \n"; | |
66 } | |
67 | |
68 #Création des fichiers de filtres (séquences UTR) : | |
69 #print "Création des fichiers de séquences !\n"; | |
70 ###Creer les fichiers des séquences en 5' et 3' des gènes. | |
71 ###Seed pour les clusters en 5' : il faut qu'ils soient encrés sur le -20 par rapport à l'ATG. Donc seed de -22/-18. | |
72 while(my $ligne = <INGFF>){ | |
73 chomp($ligne); | |
74 my @list = split(/\t/,$ligne) ; | |
75 my $finUTR5 ; | |
76 my $debUTR5 ; | |
77 my $strand = $list[6] ; | |
78 if($strand eq "+"){ | |
79 $finUTR5 = $list[3]-$endSeed; | |
80 $debUTR5 = $list[3]-$beginSeed; | |
81 } elsif($strand eq "-"){ | |
82 $debUTR5 = $list[4]+$endSeed; | |
83 $finUTR5 = $list[4]+$beginSeed; | |
84 } | |
85 if($debUTR5 < 0){$debUTR5 =0;} | |
86 if($finUTR5 < 0){$finUTR5 =0;} | |
87 print UTR5 "$list[0]\t$list[1]\t5UTR\t$debUTR5\t$finUTR5\t$list[5]\t$list[6]\t$list[7]\t$list[8]\n"; | |
88 } | |
89 close INGFF; | |
90 close UTR5; | |
91 exit(0); |