Mercurial > repos > yufei-luo > s_mart
diff SMART/bacteriaRegulatoryRegion_Detection/seedGff.pl @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SMART/bacteriaRegulatoryRegion_Detection/seedGff.pl Mon Apr 29 03:20:15 2013 -0400 @@ -0,0 +1,91 @@ +#!/usr/bin/perl -w +### +# But : extension des UTR5 à partir des clusters de reads +# +# Entrees : fichier gff annotation + cluster +# +# Sortie : UTR5.gff +# +###------------------------------------------------------ +use vars qw($USAGE); +use strict; +use Getopt::Long; + +=head1 NAME + +seedGff.pl + +=head1 SYNOPSIS + +% seedGff.pl -i annotation.gff -p BeginPosFromAtg [-l lgSeed | -e EndPosFromAtg ] [-h] + +=head1 DESCRIPTION +This script will parse input gff file and write information in gff3 format. + + -i|--input fileName gff input file name of annotations + -p|--pos BeginPosFromAtg greather positive number for the begin position of the seed from Atg + [-l|--length seedLength] lentgth of the seed to compute (default 4nt) + [-e|--end seedEnd] end of the seed to compute (smaller positive number) + -o|--output fileName gff output file name + [-h|--help] help mode then die + +=head1 AUTHOR - Claire Toffano-Nioche - mar.11 + from Claire Kuchly initial script + +=cut +#----------------------- +my ($inFileName, $beginSeed, $endSeed, $lgSeed, $outFileName) = ("", 0, 0, 0, "SEED.gff") ; + # command line check + foreach my $num (0 .. $#ARGV) { + SWITCH: for ($ARGV[$num]) { + /--input|-i/ && do { + $inFileName=$ARGV[$num+1]; + open (INGFF, "< $inFileName" ) or die "Can't open gff file: \"$inFileName\"\n" ; + last }; + /--pos|-p/ && do { + $beginSeed=$ARGV[$num+1]; + last }; + /--end|-e/ && do { + $endSeed=$ARGV[$num+1]; + last }; + /--length|-l/ && do { + $lgSeed=$ARGV[$num+1]; + last }; + /--output|-o/ && do { + $outFileName=$ARGV[$num+1]; + last }; + /--help|-h/ && do { exec("pod2text $0\n") ; die }; + } + } + open(UTR5,">$outFileName") or die "Error can't $outFileName open for output. $!\n"; + if (($endSeed > 0) and ($lgSeed > 0)) { + print "Error : only -e or -l definition, not both\n"; + exec("pod2text $0\n") ; die ; + } elsif ($lgSeed > 0) { + print "ERROR : Lg Seed => TODO \n"; + } + + #Création des fichiers de filtres (séquences UTR) : + #print "Création des fichiers de séquences !\n"; +###Creer les fichiers des séquences en 5' et 3' des gènes. +###Seed pour les clusters en 5' : il faut qu'ils soient encrés sur le -20 par rapport à l'ATG. Donc seed de -22/-18. + while(my $ligne = <INGFF>){ + chomp($ligne); + my @list = split(/\t/,$ligne) ; + my $finUTR5 ; + my $debUTR5 ; + my $strand = $list[6] ; + if($strand eq "+"){ + $finUTR5 = $list[3]-$endSeed; + $debUTR5 = $list[3]-$beginSeed; + } elsif($strand eq "-"){ + $debUTR5 = $list[4]+$endSeed; + $finUTR5 = $list[4]+$beginSeed; + } + if($debUTR5 < 0){$debUTR5 =0;} + if($finUTR5 < 0){$finUTR5 =0;} + print UTR5 "$list[0]\t$list[1]\t5UTR\t$debUTR5\t$finUTR5\t$list[5]\t$list[6]\t$list[7]\t$list[8]\n"; + } + close INGFF; + close UTR5; +exit(0);