annotate Tools/Motif_search/rules_galaxy.pl @ 0:229d36377838 draft

Uploaded
author amadeo
date Mon, 05 Sep 2016 05:53:08 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
229d36377838 Uploaded
amadeo
parents:
diff changeset
1 #!/usr/bin/perl -w
229d36377838 Uploaded
amadeo
parents:
diff changeset
2
229d36377838 Uploaded
amadeo
parents:
diff changeset
3 $|=1;
229d36377838 Uploaded
amadeo
parents:
diff changeset
4 use warnings;
229d36377838 Uploaded
amadeo
parents:
diff changeset
5 use strict;
229d36377838 Uploaded
amadeo
parents:
diff changeset
6 #Script that looks for genes that have motifs from a certain rule.
229d36377838 Uploaded
amadeo
parents:
diff changeset
7
229d36377838 Uploaded
amadeo
parents:
diff changeset
8 #Declaration of variables
229d36377838 Uploaded
amadeo
parents:
diff changeset
9 my $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
10 my $line2;
229d36377838 Uploaded
amadeo
parents:
diff changeset
11 my @cols;
229d36377838 Uploaded
amadeo
parents:
diff changeset
12 my @cols2;
229d36377838 Uploaded
amadeo
parents:
diff changeset
13 my %hash;
229d36377838 Uploaded
amadeo
parents:
diff changeset
14 my %hash1;
229d36377838 Uploaded
amadeo
parents:
diff changeset
15 my %hash3;
229d36377838 Uploaded
amadeo
parents:
diff changeset
16
229d36377838 Uploaded
amadeo
parents:
diff changeset
17 my $gene;
229d36377838 Uploaded
amadeo
parents:
diff changeset
18 my $TF;
229d36377838 Uploaded
amadeo
parents:
diff changeset
19 my $num_motifs;
229d36377838 Uploaded
amadeo
parents:
diff changeset
20 my @genes_rules;
229d36377838 Uploaded
amadeo
parents:
diff changeset
21
229d36377838 Uploaded
amadeo
parents:
diff changeset
22
229d36377838 Uploaded
amadeo
parents:
diff changeset
23 $num_motifs=$ARGV[2];
229d36377838 Uploaded
amadeo
parents:
diff changeset
24
229d36377838 Uploaded
amadeo
parents:
diff changeset
25
229d36377838 Uploaded
amadeo
parents:
diff changeset
26 open(FIMO, "<$ARGV[0]") ||
229d36377838 Uploaded
amadeo
parents:
diff changeset
27 die "File '$ARGV[0]' not found\n";
229d36377838 Uploaded
amadeo
parents:
diff changeset
28
229d36377838 Uploaded
amadeo
parents:
diff changeset
29 open(OUTPUT, ">$ARGV[1]") ||
229d36377838 Uploaded
amadeo
parents:
diff changeset
30 die "File '>$ARGV[1]' not found\n";
229d36377838 Uploaded
amadeo
parents:
diff changeset
31
229d36377838 Uploaded
amadeo
parents:
diff changeset
32
229d36377838 Uploaded
amadeo
parents:
diff changeset
33 while (<FIMO>) {
229d36377838 Uploaded
amadeo
parents:
diff changeset
34 $line=$_;
229d36377838 Uploaded
amadeo
parents:
diff changeset
35 chomp $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
36 @cols=split;
229d36377838 Uploaded
amadeo
parents:
diff changeset
37 if (not $line=~/^ /){
229d36377838 Uploaded
amadeo
parents:
diff changeset
38 $TF= substr $cols[8],5,8;
229d36377838 Uploaded
amadeo
parents:
diff changeset
39 $gene=substr $cols[0],0,21;
229d36377838 Uploaded
amadeo
parents:
diff changeset
40
229d36377838 Uploaded
amadeo
parents:
diff changeset
41 if (not exists $hash{$gene}) {
229d36377838 Uploaded
amadeo
parents:
diff changeset
42 $hash1{$gene}=0;
229d36377838 Uploaded
amadeo
parents:
diff changeset
43
229d36377838 Uploaded
amadeo
parents:
diff changeset
44 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
45 if (not exists $hash{$gene}{$TF}) {
229d36377838 Uploaded
amadeo
parents:
diff changeset
46 $hash1{$gene}++;
229d36377838 Uploaded
amadeo
parents:
diff changeset
47 $hash{$gene}{$TF}=1;
229d36377838 Uploaded
amadeo
parents:
diff changeset
48 #print $hash1{$gene};
229d36377838 Uploaded
amadeo
parents:
diff changeset
49 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
50
229d36377838 Uploaded
amadeo
parents:
diff changeset
51
229d36377838 Uploaded
amadeo
parents:
diff changeset
52 if ($hash1{$gene}==$num_motifs and not exists $hash3{$gene}) {
229d36377838 Uploaded
amadeo
parents:
diff changeset
53 $hash3{$gene}=1;
229d36377838 Uploaded
amadeo
parents:
diff changeset
54 #print $line, "\n";
229d36377838 Uploaded
amadeo
parents:
diff changeset
55
229d36377838 Uploaded
amadeo
parents:
diff changeset
56 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
57
229d36377838 Uploaded
amadeo
parents:
diff changeset
58
229d36377838 Uploaded
amadeo
parents:
diff changeset
59
229d36377838 Uploaded
amadeo
parents:
diff changeset
60 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
61
229d36377838 Uploaded
amadeo
parents:
diff changeset
62 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
63
229d36377838 Uploaded
amadeo
parents:
diff changeset
64 close FIMO;
229d36377838 Uploaded
amadeo
parents:
diff changeset
65 open(FIMO, "<$ARGV[0]") ||
229d36377838 Uploaded
amadeo
parents:
diff changeset
66 die "File '$ARGV[0]' not found\n";
229d36377838 Uploaded
amadeo
parents:
diff changeset
67
229d36377838 Uploaded
amadeo
parents:
diff changeset
68 while (<FIMO>) {
229d36377838 Uploaded
amadeo
parents:
diff changeset
69 $line2=$_;
229d36377838 Uploaded
amadeo
parents:
diff changeset
70 chomp $line2;
229d36377838 Uploaded
amadeo
parents:
diff changeset
71 @cols2=split;
229d36377838 Uploaded
amadeo
parents:
diff changeset
72
229d36377838 Uploaded
amadeo
parents:
diff changeset
73 if (not $line2=~/^ /){
229d36377838 Uploaded
amadeo
parents:
diff changeset
74 $TF= substr $cols2[8],5,8;
229d36377838 Uploaded
amadeo
parents:
diff changeset
75 $gene=substr $cols2[0],0,21;
229d36377838 Uploaded
amadeo
parents:
diff changeset
76 foreach my $gene_listed (keys %hash3){
229d36377838 Uploaded
amadeo
parents:
diff changeset
77
229d36377838 Uploaded
amadeo
parents:
diff changeset
78 if ($gene_listed eq $gene) {
229d36377838 Uploaded
amadeo
parents:
diff changeset
79 printf OUTPUT "%s\n", $line2;
229d36377838 Uploaded
amadeo
parents:
diff changeset
80
229d36377838 Uploaded
amadeo
parents:
diff changeset
81 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
82
229d36377838 Uploaded
amadeo
parents:
diff changeset
83
229d36377838 Uploaded
amadeo
parents:
diff changeset
84 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
85
229d36377838 Uploaded
amadeo
parents:
diff changeset
86 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
87 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
88
229d36377838 Uploaded
amadeo
parents:
diff changeset
89
229d36377838 Uploaded
amadeo
parents:
diff changeset
90 print "Genes that have this rule:", "\n";
229d36377838 Uploaded
amadeo
parents:
diff changeset
91 foreach my $gene_listed (keys %hash3){
229d36377838 Uploaded
amadeo
parents:
diff changeset
92 print $gene_listed,"\n";
229d36377838 Uploaded
amadeo
parents:
diff changeset
93 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
94
229d36377838 Uploaded
amadeo
parents:
diff changeset
95