annotate Tools/Motif_search/sort_positions_galaxy.pl @ 0:229d36377838 draft

Uploaded
author amadeo
date Mon, 05 Sep 2016 05:53:08 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
229d36377838 Uploaded
amadeo
parents:
diff changeset
1 #!/usr/bin/perl -w
229d36377838 Uploaded
amadeo
parents:
diff changeset
2 $|=1;
229d36377838 Uploaded
amadeo
parents:
diff changeset
3 use warnings;
229d36377838 Uploaded
amadeo
parents:
diff changeset
4 use strict;
229d36377838 Uploaded
amadeo
parents:
diff changeset
5 #Script that takes a gff format file from step1.pl as input and orders
229d36377838 Uploaded
amadeo
parents:
diff changeset
6 #each block of gene data by the start position of the motif.
229d36377838 Uploaded
amadeo
parents:
diff changeset
7 my $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
8 my @cols;
229d36377838 Uploaded
amadeo
parents:
diff changeset
9 my %hash1;
229d36377838 Uploaded
amadeo
parents:
diff changeset
10 my %hash2;
229d36377838 Uploaded
amadeo
parents:
diff changeset
11 my @list1;
229d36377838 Uploaded
amadeo
parents:
diff changeset
12 my @list2;
229d36377838 Uploaded
amadeo
parents:
diff changeset
13 my $gene;
229d36377838 Uploaded
amadeo
parents:
diff changeset
14 my $pos1;
229d36377838 Uploaded
amadeo
parents:
diff changeset
15 my $n;
229d36377838 Uploaded
amadeo
parents:
diff changeset
16 my $index;
229d36377838 Uploaded
amadeo
parents:
diff changeset
17 my $position;
229d36377838 Uploaded
amadeo
parents:
diff changeset
18 my $scalar;
229d36377838 Uploaded
amadeo
parents:
diff changeset
19 my $TF;
229d36377838 Uploaded
amadeo
parents:
diff changeset
20
229d36377838 Uploaded
amadeo
parents:
diff changeset
21
229d36377838 Uploaded
amadeo
parents:
diff changeset
22 if(@ARGV < 2){
229d36377838 Uploaded
amadeo
parents:
diff changeset
23 print "\nUsage: step2.pl motif_search-position-sorted.gff motif_search-gene-&-position-sorted.gff e\n\n";
229d36377838 Uploaded
amadeo
parents:
diff changeset
24 exit(0);
229d36377838 Uploaded
amadeo
parents:
diff changeset
25 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
26
229d36377838 Uploaded
amadeo
parents:
diff changeset
27 my $counter=0;
229d36377838 Uploaded
amadeo
parents:
diff changeset
28 open(FIMO, "<$ARGV[0]") ||
229d36377838 Uploaded
amadeo
parents:
diff changeset
29 die "File '$ARGV[0]' not found\n";
229d36377838 Uploaded
amadeo
parents:
diff changeset
30 open(OUTPUT, ">$ARGV[1]") ||
229d36377838 Uploaded
amadeo
parents:
diff changeset
31 die "File '>$ARGV[1]' not found\n";
229d36377838 Uploaded
amadeo
parents:
diff changeset
32
229d36377838 Uploaded
amadeo
parents:
diff changeset
33 while (<FIMO>) {
229d36377838 Uploaded
amadeo
parents:
diff changeset
34 $line=$_;
229d36377838 Uploaded
amadeo
parents:
diff changeset
35 chomp $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
36 @cols=split;
229d36377838 Uploaded
amadeo
parents:
diff changeset
37
229d36377838 Uploaded
amadeo
parents:
diff changeset
38 if ($line=~/^ /){
229d36377838 Uploaded
amadeo
parents:
diff changeset
39 printf OUTPUT "%s\n", $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
40 $counter++;
229d36377838 Uploaded
amadeo
parents:
diff changeset
41 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
42 else {
229d36377838 Uploaded
amadeo
parents:
diff changeset
43 $gene=substr $cols[0],0,21;
229d36377838 Uploaded
amadeo
parents:
diff changeset
44 $pos1 = $cols[3];
229d36377838 Uploaded
amadeo
parents:
diff changeset
45 $TF= substr $cols[8],5,8;
229d36377838 Uploaded
amadeo
parents:
diff changeset
46 $scalar= scalar @list1;
229d36377838 Uploaded
amadeo
parents:
diff changeset
47 if (not exists $hash1{$gene} and not $scalar == 0) {
229d36377838 Uploaded
amadeo
parents:
diff changeset
48 $n= scalar @list1;
229d36377838 Uploaded
amadeo
parents:
diff changeset
49 my @list_pos_sorted= sort { $list1[$a] <=> $list1[$b] } 0..($n - 1);
229d36377838 Uploaded
amadeo
parents:
diff changeset
50 for (my $i=0; $i <(scalar @list_pos_sorted); $i++){
229d36377838 Uploaded
amadeo
parents:
diff changeset
51 $index=$list_pos_sorted[$i];
229d36377838 Uploaded
amadeo
parents:
diff changeset
52 $position = $list1[$index];
229d36377838 Uploaded
amadeo
parents:
diff changeset
53 #printf OUTPUT "%s\n",$hash2{$position};
229d36377838 Uploaded
amadeo
parents:
diff changeset
54 printf OUTPUT "%s\n", $list2[$index];
229d36377838 Uploaded
amadeo
parents:
diff changeset
55 $counter++;
229d36377838 Uploaded
amadeo
parents:
diff changeset
56 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
57 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
58 if (not exists $hash1{$gene}) {
229d36377838 Uploaded
amadeo
parents:
diff changeset
59 %hash1=();
229d36377838 Uploaded
amadeo
parents:
diff changeset
60 %hash2=();
229d36377838 Uploaded
amadeo
parents:
diff changeset
61 @list1=();
229d36377838 Uploaded
amadeo
parents:
diff changeset
62 @list2=();
229d36377838 Uploaded
amadeo
parents:
diff changeset
63 $hash1{$gene}=1;
229d36377838 Uploaded
amadeo
parents:
diff changeset
64 $hash2{$pos1}=$line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
65 push @list1, $pos1;
229d36377838 Uploaded
amadeo
parents:
diff changeset
66 push @list2, $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
67 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
68
229d36377838 Uploaded
amadeo
parents:
diff changeset
69 elsif (exists $hash1{$gene}) {
229d36377838 Uploaded
amadeo
parents:
diff changeset
70 $hash2{$pos1}=$line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
71 push @list1, $pos1;
229d36377838 Uploaded
amadeo
parents:
diff changeset
72 push @list2, $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
73 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
74
229d36377838 Uploaded
amadeo
parents:
diff changeset
75
229d36377838 Uploaded
amadeo
parents:
diff changeset
76 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
77
229d36377838 Uploaded
amadeo
parents:
diff changeset
78 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
79
229d36377838 Uploaded
amadeo
parents:
diff changeset
80 $n= scalar @list1;
229d36377838 Uploaded
amadeo
parents:
diff changeset
81 my @list_pos_sorted= sort { $list1[$a] <=> $list1[$b] } 0..($n - 1);
229d36377838 Uploaded
amadeo
parents:
diff changeset
82 for (my $i=0; $i <(scalar @list_pos_sorted); $i++){
229d36377838 Uploaded
amadeo
parents:
diff changeset
83 $index=$list_pos_sorted[$i];
229d36377838 Uploaded
amadeo
parents:
diff changeset
84 $position = $list1[$index];
229d36377838 Uploaded
amadeo
parents:
diff changeset
85 printf OUTPUT "%s\n", $hash2{$position};
229d36377838 Uploaded
amadeo
parents:
diff changeset
86 #printf OUTPUT "%s\n", $list2[$index];
229d36377838 Uploaded
amadeo
parents:
diff changeset
87 $counter++;
229d36377838 Uploaded
amadeo
parents:
diff changeset
88 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
89 print $counter;