comparison rDiff/src/perform_parametric_tests.m @ 0:0f80a5141704

version 0.3 uploaded
author vipints
date Thu, 14 Feb 2013 23:38:36 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0f80a5141704
1 function []=perform_parametric_tests(CFG,genes,variance_function_parametric_1, variance_function_parametric_2)
2
3
4
5 %Get the gene expression
6 fprintf('Loading gene expression\n')
7 if isempty(CFG.Counts_gene_expression)
8 EXPR_TAB_FILENAME=[CFG.out_base 'Gene_expression.tab'];
9 else
10 EXPR_TAB_FILENAME=CFG.Counts_gene_expression;
11 end
12
13 try
14 Gene_expression=importdata(EXPR_TAB_FILENAME,'\t',1);
15 Gene_expression=Gene_expression.data;
16 catch
17 error(['Could not open: ' EXPR_TAB_FILENAME])
18 end
19
20 %Get the counts
21 fprintf('Loading alternative region counts\n')
22 if isempty(CFG.Counts_rDiff_parametric)
23 IN_FILENAME=[CFG.out_base 'Alternative_region_counts.mat'];
24 load(IN_FILENAME,'Counts_rDiff_parametric')
25 else
26 IN_FILENAME=[CFG.out_base CFG.Counts_rDiff_parametric];
27 load(IN_FILENAMEc,'Counts_rDiff_parametric')
28 end
29
30
31
32 if CFG.use_rproc
33 JB_NR=1;
34 JOB_INFO = rproc_empty();
35 end
36
37 PAR.variance_function_parametric_1=variance_function_parametric_1;
38 PAR.variance_function_parametric_2=variance_function_parametric_2;
39
40 %%% Perform the test
41 % configuration
42 if not(CFG.use_rproc)
43 fprintf('Performing parametric testing\n')
44 end
45
46 %define the splits of the genes for the jobs
47 idx=[(1:size(genes,2))',ceil((1:size(genes,2))*CFG.rproc_num_jobs/size(genes,2))'];
48 % submit jobs to cluster
49 for i = 1:CFG.rproc_num_jobs
50 PAR.genes = genes(idx(idx(:,2)==i,1));
51 PAR.Counts_rDiff_parametric=Counts_rDiff_parametric(idx(idx(:,2)==i,1),:);
52 PAR.Gene_expression=Gene_expression(idx(idx(:,2)==i,1),:);
53 CFG.rproc_memreq = 5000;
54 CFG.rproc_par.mem_req_resubmit = [5000 10000 32000];
55 CFG.rproc_par.identifier = sprintf('Pp.%i-',i);
56 CFG.outfile_prefix=[CFG.out_base_temp 'P_values_parametric_' num2str(i) '_of_' num2str(CFG.rproc_num_jobs)];
57 PAR.CFG=CFG;
58 if CFG.use_rproc
59 fprintf(1, 'Submitting job %i to cluster\n',i);
60 JOB_INFO(JB_NR) = rproc('get_parametric_tests_caller', PAR,CFG.rproc_memreq, CFG.rproc_par, CFG.rproc_time);
61 JB_NR=JB_NR+1;
62 else
63 get_parametric_tests_caller(PAR);
64 end
65 end
66 if CFG.use_rproc
67 [JOB_INFO num_crashed] = rproc_wait(JOB_INFO, 60, 1, -1);
68 end
69
70 % Get the test results
71
72
73
74
75 %%% Generate the output files
76 fprintf('Reading temporary results\n')
77 P_values_rDiff_parametric=ones(size(genes,2),1);
78 P_values_rDiff_poisson=ones(size(genes,2),1);
79
80
81 P_values_rDiff_parametric_error_flag=cell(size(genes,2),1);
82 P_values_rDiff_poisson_error_flag=cell(size(genes,2),1);
83 NAMES=cell(size(genes,2),1);
84 %Field containing the errors
85 ERRORS_NR=[];
86 idx=[(1:size(genes,2))',ceil((1:size(genes,2))*CFG.rproc_num_jobs/size(genes,2))'];
87 % Iterate over the result files to load the data from the count files
88 for j = 1:CFG.rproc_num_jobs
89 IN_FILENAME=[CFG.out_base_temp 'P_values_parametric_' num2str(j) '_of_' num2str(CFG.rproc_num_jobs)];
90
91 IDX=idx(idx(:,2)==j,1);
92 try
93 load(IN_FILENAME)
94 for k=1:length(IDX)
95 if isempty(P_VALS{k,1}) %Gene was not tested for
96 %some reason
97 P_values_rDiff_parametric_error_flag{IDX(k)}='NOT_TESTED';
98 P_values_rDiff_poisson_error_flag{IDX(k)}='NOT_TESTED';
99 else
100 if not(isempty(P_VALS{k,1}))
101 NAMES{IDX(k)}=P_VALS{k,1};
102 end
103 COUNTER=2;
104 %Get the results from rDiff.parametric
105 if CFG.perform_parametric
106 if not(isempty(P_VALS{k,COUNTER}))
107 P_values_rDiff_parametric(IDX(k))=P_VALS{k,COUNTER}{1};
108 if not(isempty(P_VALS{k,COUNTER}{2}))
109 P_values_rDiff_parametric_error_flag{IDX(k)}='NOT_TESTED';
110 else
111 P_values_rDiff_parametric_error_flag{IDX(k)}='OK';
112 end
113 COUNTER=COUNTER+1;
114 end
115 end
116
117 %Get the results from rDiff.poisson
118 if CFG.perform_poisson
119 if not(isempty(P_VALS{k,COUNTER}))
120 P_values_rDiff_poisson(IDX(k))=P_VALS{k,COUNTER}{1};
121 if not(isempty(P_VALS{k,COUNTER}{2}))
122 P_values_rDiff_poisson_error_flag{IDX(k)}='NOT_TESTED';
123 else
124 P_values_rDiff_poisson_error_flag{IDX(k)}='OK';
125 end
126 end
127 end
128 end
129 end
130 catch
131 for k=1:length(IDX)
132 P_values_rDiff_parametric_error_flag{IDX(k)}='NOT_TESTED';
133 P_values_rDiff_poisson_error_flag{IDX(k)}='NOT_TESTED';
134 end
135 warning(['There was a problem loading: ' IN_FILENAME ])
136 ERRORS_NR=[ERRORS_NR;j];
137 end
138 end
139 if not(isempty(ERRORS_NR))
140 warning('There have been problems loading some of the parametric test result files');
141 end
142
143 fprintf('Writing output files\n')
144 %Generate P-value table for rDiff.nonparametric
145 if CFG.perform_parametric
146 %Open file handler
147 P_TABLE_FNAME=[CFG.out_base 'P_values_rDiff_parametric.tab'];
148 fid=fopen(P_TABLE_FNAME,'w');
149
150 %print header
151 fprintf(fid,'gene\tp-value\ttest-status\n');
152
153 %print lines
154 for j=1:size(genes,2)
155 fprintf(fid,'%s',NAMES{j});
156 fprintf(fid,'\t%f\t%s\n',P_values_rDiff_parametric(j),P_values_rDiff_parametric_error_flag{j});
157 end
158 %close file handler
159 fclose(fid);
160 end
161
162
163
164 %Generate P-value table for rDiff.poisson
165 if CFG.perform_poisson
166 %Open file handler
167 P_TABLE_FNAME=[CFG.out_base 'P_values_rDiff_poisson.tab'];
168 fid=fopen(P_TABLE_FNAME,'w');
169
170 %print header
171 fprintf(fid,'gene\tp-value\ttest-status\n');
172
173 %print lines
174 for j=1:size(genes,2)
175 fprintf(fid,'%s',NAMES{j});
176 fprintf(fid,'\t%f\t%s\n',P_values_rDiff_poisson(j),P_values_rDiff_poisson_error_flag{j});
177 end
178 %close file handler
179 fclose(fid);
180 end
181
182