comparison tools/regVariation/categorize_elements_satisfying_criteria.pl @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9071e359b9a3
1 #!/usr/bin/perl -w
2
3 # The program takes as input a set of categories, such that each category contains many elements.
4 # It also takes a table relating elements with criteria, such that each element is assigned a number
5 # representing the number of times the element satisfies a certain criterion.
6 # The first input is a TABULAR format file, such that the left column represents the name of categories and,
7 # all other columns represent the names of elements.
8 # The second input is a TABULAR format file relating elements with criteria, such that the first line
9 # represents the names of criteria and the left column represents the names of elements.
10 # The output is a TABULAR format file relating catergories with criteria, such that each categoy is
11 # assigned a number representing the total number of times its elements satisfies a certain criterion.
12 # Each category is assigned as many numbers as criteria.
13
14 use strict;
15 use warnings;
16
17 #variables to handle information of the categories input file
18 my @categoryElementsArray = ();
19 my @categoriesArray = ();
20 my $categoryMemberNames;
21 my $categoryName;
22 my %categoryMembersHash = ();
23 my $memberNumber = 0;
24 my $totalMembersNumber = 0;
25 my $totalCategoriesNumber = 0;
26 my @categoryCountersTwoDimArray = ();
27 my $lineCounter1 = 0;
28
29 #variables to handle information of the criteria and elements data input file
30 my $elementLine;
31 my @elementDataArray = ();
32 my $elementName;
33 my @criteriaArray = ();
34 my $criteriaNumber = 0;
35 my $totalCriteriaNumber = 0;
36 my $lineCounter2 = 0;
37
38 #variable representing the row and column indices used to store results into a two-dimensional array
39 my $row = 0;
40 my $column = 0;
41
42 # check to make sure having correct files
43 my $usage = "usage: categorize_motifs_significance.pl [TABULAR.in] [TABULAR.in] [TABULAR.out] \n";
44 die $usage unless @ARGV == 3;
45
46 #get the categories input file
47 my $categories_inputFile = $ARGV[0];
48
49 #get the criteria and data input file
50 my $elements_data_inputFile = $ARGV[1];
51
52 #get the output file
53 my $categorized_data_outputFile = $ARGV[2];
54
55 #open the input and output files
56 open (INPUT1, "<", $categories_inputFile) || die("Could not open file $categories_inputFile \n");
57 open (INPUT2, "<", $elements_data_inputFile ) || die("Could not open file $elements_data_inputFile \n");
58 open (OUTPUT, ">", $categorized_data_outputFile) || die("Could not open file $categorized_data_outputFile \n");
59
60 #store the first input file into an array
61 my @categoriesData = <INPUT1>;
62
63 #reset the value of $lineCounter1 to 0
64 $lineCounter1 = 0;
65
66 #iterate through the first input file to get the names of categories and their corresponding elements
67 foreach $categoryMemberNames (@categoriesData){
68 chomp ($categoryMemberNames);
69
70 @categoryElementsArray = split(/\t/, $categoryMemberNames);
71
72 #store the name of the current category into an array
73 $categoriesArray [$lineCounter1] = $categoryElementsArray[0];
74
75 #store the name of the current category into a two-dimensional array
76 $categoryCountersTwoDimArray [$lineCounter1] [0] = $categoryElementsArray[0];
77
78 #get the total number of elements in the current category
79 $totalMembersNumber = @categoryElementsArray;
80
81 #store the names of categories and their corresponding elements into a hash
82 for ($memberNumber = 1; $memberNumber < $totalMembersNumber; $memberNumber++) {
83
84 $categoryMembersHash{$categoryElementsArray[$memberNumber]} = $categoriesArray[$lineCounter1];
85 }
86
87 $lineCounter1++;
88 }
89
90 #store the second input file into an array
91 my @elementsData = <INPUT2>;
92
93 #reset the value of $lineCounter2 to 0
94 $lineCounter2 = 0;
95
96 #iterate through the second input file in order to count the number of elements
97 #in each category that satisfy each criterion
98 foreach $elementLine (@elementsData){
99 chomp ($elementLine);
100
101 $lineCounter2++;
102
103 @elementDataArray = split(/\t/, $elementLine);
104
105 #if at the first line, get the total number of criteria and the total
106 #number of catergories and initialize the two-dimensional array
107 if ($lineCounter2 == 1){
108 @criteriaArray = @elementDataArray;
109 $totalCriteriaNumber = @elementDataArray;
110
111 $totalCategoriesNumber = @categoriesArray;
112
113 #initialize the two-dimensional array
114 for ($row = 0; $row < $totalCategoriesNumber; $row++) {
115
116 for ($column = 1; $column <= $totalCriteriaNumber; $column++) {
117
118 $categoryCountersTwoDimArray [$row][$column] = 0;
119 }
120 }
121 }
122 else{
123 #get the element data
124 $elementName = $elementDataArray[0];
125
126 #do the counting and store the result in the two-dimensional array
127 for ($criteriaNumber = 0; $criteriaNumber < $totalCriteriaNumber; $criteriaNumber++) {
128
129 if ($elementDataArray[$criteriaNumber + 1] > 0){
130
131 $categoryName = $categoryMembersHash{$elementName};
132
133 my ($categoryIndex) = grep $categoriesArray[$_] eq $categoryName, 0 .. $#categoriesArray;
134
135 $categoryCountersTwoDimArray [$categoryIndex] [$criteriaNumber + 1] = $categoryCountersTwoDimArray [$categoryIndex] [$criteriaNumber + 1] + $elementDataArray[$criteriaNumber + 1];
136 }
137 }
138 }
139 }
140
141 print OUTPUT "\t";
142
143 #store the criteria names into the output file
144 for ($column = 1; $column <= $totalCriteriaNumber; $column++) {
145
146 if ($column < $totalCriteriaNumber){
147 print OUTPUT $criteriaArray[$column - 1] . "\t";
148 }
149 else{
150 print OUTPUT $criteriaArray[$column - 1] . "\n";
151 }
152 }
153
154 #store the category names and their corresponding number of elements satisfying criteria into the output file
155 for ($row = 0; $row < $totalCategoriesNumber; $row++) {
156
157 for ($column = 0; $column <= $totalCriteriaNumber; $column++) {
158
159 if ($column < $totalCriteriaNumber){
160 print OUTPUT $categoryCountersTwoDimArray [$row][$column] . "\t";
161 }
162 else{
163 print OUTPUT $categoryCountersTwoDimArray [$row][$column] . "\n";
164 }
165 }
166 }
167
168 #close the input and output file
169 close(OUTPUT);
170 close(INPUT2);
171 close(INPUT1);
172