0
|
1 #!/usr/bin/perl -w
|
|
2 $|=1;
|
|
3 use warnings;
|
|
4 use strict;
|
|
5 #Script that takes a gff format file from step1.pl as input and orders
|
|
6 #each block of gene data by the start position of the motif.
|
|
7 my $line;
|
|
8 my @cols;
|
|
9 my %hash1;
|
|
10 my %hash2;
|
|
11 my @list1;
|
|
12 my @list2;
|
|
13 my $gene;
|
|
14 my $pos1;
|
|
15 my $n;
|
|
16 my $index;
|
|
17 my $position;
|
|
18 my $scalar;
|
|
19 my $TF;
|
|
20
|
|
21
|
|
22 if(@ARGV < 2){
|
|
23 print "\nUsage: step2.pl motif_search-position-sorted.gff motif_search-gene-&-position-sorted.gff e\n\n";
|
|
24 exit(0);
|
|
25 }
|
|
26
|
|
27 my $counter=0;
|
|
28 open(FIMO, "<$ARGV[0]") ||
|
|
29 die "File '$ARGV[0]' not found\n";
|
|
30 open(OUTPUT, ">$ARGV[1]") ||
|
|
31 die "File '>$ARGV[1]' not found\n";
|
|
32
|
|
33 while (<FIMO>) {
|
|
34 $line=$_;
|
|
35 chomp $line;
|
|
36 @cols=split;
|
|
37
|
|
38 if ($line=~/^ /){
|
|
39 printf OUTPUT "%s\n", $line;
|
|
40 $counter++;
|
|
41 }
|
|
42 else {
|
|
43 $gene=substr $cols[0],0,21;
|
|
44 $pos1 = $cols[3];
|
|
45 $TF= substr $cols[8],5,8;
|
|
46 $scalar= scalar @list1;
|
|
47 if (not exists $hash1{$gene} and not $scalar == 0) {
|
|
48 $n= scalar @list1;
|
|
49 my @list_pos_sorted= sort { $list1[$a] <=> $list1[$b] } 0..($n - 1);
|
|
50 for (my $i=0; $i <(scalar @list_pos_sorted); $i++){
|
|
51 $index=$list_pos_sorted[$i];
|
|
52 $position = $list1[$index];
|
|
53 #printf OUTPUT "%s\n",$hash2{$position};
|
|
54 printf OUTPUT "%s\n", $list2[$index];
|
|
55 $counter++;
|
|
56 }
|
|
57 }
|
|
58 if (not exists $hash1{$gene}) {
|
|
59 %hash1=();
|
|
60 %hash2=();
|
|
61 @list1=();
|
|
62 @list2=();
|
|
63 $hash1{$gene}=1;
|
|
64 $hash2{$pos1}=$line;
|
|
65 push @list1, $pos1;
|
|
66 push @list2, $line;
|
|
67 }
|
|
68
|
|
69 elsif (exists $hash1{$gene}) {
|
|
70 $hash2{$pos1}=$line;
|
|
71 push @list1, $pos1;
|
|
72 push @list2, $line;
|
|
73 }
|
|
74
|
|
75
|
|
76 }
|
|
77
|
|
78 }
|
|
79
|
|
80 $n= scalar @list1;
|
|
81 my @list_pos_sorted= sort { $list1[$a] <=> $list1[$b] } 0..($n - 1);
|
|
82 for (my $i=0; $i <(scalar @list_pos_sorted); $i++){
|
|
83 $index=$list_pos_sorted[$i];
|
|
84 $position = $list1[$index];
|
|
85 printf OUTPUT "%s\n", $hash2{$position};
|
|
86 #printf OUTPUT "%s\n", $list2[$index];
|
|
87 $counter++;
|
|
88 }
|
|
89 print $counter;
|