Mercurial > repos > vipints > rdiff
comparison rDiff/src/perform_nonparametric_tests.m @ 0:0f80a5141704
version 0.3 uploaded
author | vipints |
---|---|
date | Thu, 14 Feb 2013 23:38:36 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0f80a5141704 |
---|---|
1 function []=perform_nonparametric_tests(CFG,genes,variance_function_nonparametric_1, variance_function_nonparametric_2) | |
2 | |
3 | |
4 %Get the gene expression | |
5 fprintf('Loading gene expression\n') | |
6 if isempty(CFG.Counts_gene_expression) | |
7 EXPR_TAB_FILENAME=[CFG.out_base 'Gene_expression.tab']; | |
8 else | |
9 EXPR_TAB_FILENAME=CFG.Counts_gene_expression; | |
10 end | |
11 | |
12 try | |
13 Gene_expression=importdata(EXPR_TAB_FILENAME,'\t',1); | |
14 Gene_expression=Gene_expression.data; | |
15 catch | |
16 error(['Could not open: ' EXPR_TAB_FILENAME]) | |
17 end | |
18 | |
19 | |
20 %Get the counts | |
21 fprintf('Loading nonparametric region counts\n') | |
22 if isempty(CFG.Counts_rDiff_nonparametric) | |
23 IN_FILENAME=[CFG.out_base 'Nonparametric_region_counts.mat']; | |
24 load(IN_FILENAME,'Counts_rDiff_nonparametric') | |
25 else | |
26 IN_FILENAME=[CFG.out_base CFG.Counts_rDiff_nonparametric]; | |
27 load(IN_FILENAMEc,'Counts_rDiff_nonparametric') | |
28 end | |
29 | |
30 | |
31 | |
32 if CFG.use_rproc | |
33 JB_NR=1; | |
34 JOB_INFO = rproc_empty(); | |
35 end | |
36 | |
37 PAR.variance_function_nonparametric_1=variance_function_nonparametric_1; | |
38 PAR.variance_function_nonparametric_2=variance_function_nonparametric_2; | |
39 if 1==1 | |
40 %%% Perform the test | |
41 % configuration | |
42 if not(CFG.use_rproc) | |
43 fprintf('Performing nonparametric testing\n') | |
44 end | |
45 %define the splits of the genes for the jobs | |
46 idx=[(1:size(genes,2))',ceil((1:size(genes,2))*CFG.rproc_num_jobs/size(genes,2))']; | |
47 % submit jobs to cluster | |
48 | |
49 for i = 1:CFG.rproc_num_jobs | |
50 PAR.genes = genes(idx(idx(:,2)==i,1)); | |
51 PAR.Counts_rDiff_nonparametric=Counts_rDiff_nonparametric(idx(idx(:,2)==i,1),:); | |
52 PAR.Gene_expression=Gene_expression(idx(idx(:,2)==i,1),:); | |
53 CFG.rproc_memreq = 5000; | |
54 CFG.rproc_par.mem_req_resubmit = [5000 10000 32000]; | |
55 CFG.rproc_par.identifier = sprintf('Pnp.%i-',i); | |
56 CFG.outfile_prefix=[CFG.out_base_temp 'P_values_nonparametric_' num2str(i) '_of_' num2str(CFG.rproc_num_jobs)]; | |
57 PAR.CFG=CFG; | |
58 if CFG.use_rproc | |
59 fprintf(1, 'Submitting job %i to cluster\n',i); | |
60 JOB_INFO(JB_NR) = rproc('get_nonparametric_tests_caller', PAR,CFG.rproc_memreq, CFG.rproc_par, CFG.rproc_time); | |
61 JB_NR=JB_NR+1; | |
62 else | |
63 get_nonparametric_tests_caller(PAR); | |
64 end | |
65 end | |
66 if CFG.use_rproc | |
67 [JOB_INFO num_crashed] = rproc_wait(JOB_INFO, 60, 1, -1); | |
68 end | |
69 end | |
70 % Get the test results | |
71 | |
72 %%% Generate the output files | |
73 fprintf('Reading temporary results\n') | |
74 P_values_rDiff_nonparametric=ones(size(genes,2),1); | |
75 P_values_rDiff_mmd=ones(size(genes,2),1); | |
76 | |
77 | |
78 P_values_rDiff_nonparametric_error_flag=cell(size(genes,2),1); | |
79 P_values_rDiff_mmd_error_flag=cell(size(genes,2),1); | |
80 NAMES=cell(size(genes,2),1); | |
81 %Field containing the errors | |
82 ERRORS_NR=[]; | |
83 idx=[(1:size(genes,2))',ceil((1:size(genes,2))*CFG.rproc_num_jobs/size(genes,2))']; | |
84 % Iterate over the result files to load the data from the count files | |
85 for j = 1:CFG.rproc_num_jobs | |
86 IN_FILENAME=[CFG.out_base_temp 'P_values_nonparametric_' num2str(j) '_of_' num2str(CFG.rproc_num_jobs)]; | |
87 IDX=idx(idx(:,2)==j,1); | |
88 try | |
89 load(IN_FILENAME) | |
90 for k=1:length(IDX) | |
91 if isempty(P_VALS{k,1}) %Gene was not tested for | |
92 %some reason | |
93 P_values_rDiff_nonparametric_error_flag{IDX(k)}='NOT_TESTED'; | |
94 P_values_rDiff_mmd_error_flag{IDX(k)}='NOT_TESTED'; | |
95 else | |
96 if not(isempty(P_VALS{k,1})) | |
97 NAMES{IDX(k)}=P_VALS{k,1}; | |
98 end | |
99 COUNTER=2; | |
100 %Get the results from rDiff.mmd | |
101 if CFG.perform_mmd | |
102 if not(isempty(P_VALS{k,COUNTER})) | |
103 P_values_rDiff_mmd(IDX(k))=P_VALS{k,COUNTER}{1}; | |
104 if (isempty(P_VALS{k,COUNTER}{2})) | |
105 P_values_rDiff_mmd_error_flag{IDX(k)}='NOT_TESTED'; | |
106 else | |
107 P_values_rDiff_mmd_error_flag{IDX(k)}='OK'; | |
108 end | |
109 end | |
110 COUNTER=COUNTER+1; | |
111 end | |
112 | |
113 %Get the results from rDiff.parametric | |
114 if CFG.perform_nonparametric | |
115 if not(isempty(P_VALS{k,COUNTER})) | |
116 P_values_rDiff_nonparametric(IDX(k))=P_VALS{k,COUNTER}{1}; | |
117 if length(P_VALS{k,COUNTER})>1 | |
118 if iscell(P_VALS{k,COUNTER}{2}) && length(P_VALS{k,COUNTER}{2}{3})>3 | |
119 P_values_rDiff_nonparametric(IDX(k))=min(10*min(P_VALS{k,COUNTER}{2}{3})+max(P_VALS{k,COUNTER}{2}{3})*(10/(CFG.bootstraps+1)),1); | |
120 end | |
121 end | |
122 if (isempty(P_VALS{k,COUNTER}{2})) | |
123 P_values_rDiff_nonparametric_error_flag{IDX(k)}='NOT_TESTED'; | |
124 else | |
125 P_values_rDiff_nonparametric_error_flag{IDX(k)}='OK'; | |
126 end | |
127 end; | |
128 end | |
129 | |
130 end | |
131 end | |
132 catch | |
133 for k=1:length(IDX) | |
134 P_values_rDiff_nonparametric_error_flag{IDX(k)}='NOT_TESTED'; | |
135 P_values_rDiff_mmd_error_flag{IDX(k)}='NOT_TESTED'; | |
136 end | |
137 warning(['There was a problem loading: ' IN_FILENAME ]) | |
138 ERRORS_NR=[ERRORS_NR;j]; | |
139 end | |
140 end | |
141 if not(isempty(ERRORS_NR)) | |
142 warning('There have been problems loading some of the parametric test result files'); | |
143 end | |
144 | |
145 fprintf('Writing output files\n') | |
146 %Generate P-value table for rDiff.nonparametric | |
147 if CFG.perform_nonparametric | |
148 %Open file handler | |
149 P_TABLE_FNAME=[CFG.out_base 'P_values_rDiff_nonparametric.tab']; | |
150 fid=fopen(P_TABLE_FNAME,'w'); | |
151 | |
152 %print header | |
153 fprintf(fid,'gene\tp-value\ttest-status\n'); | |
154 | |
155 %print lines | |
156 for j=1:size(genes,2) | |
157 fprintf(fid,'%s',NAMES{j}); | |
158 fprintf(fid,'\t%f\t%s\n',P_values_rDiff_nonparametric(j),P_values_rDiff_nonparametric_error_flag{j}); | |
159 end | |
160 %close file handler | |
161 fclose(fid) | |
162 end | |
163 | |
164 | |
165 | |
166 %Generate P-value table for rDiff.mmd | |
167 if CFG.perform_mmd | |
168 %Open file handler | |
169 P_TABLE_FNAME=[CFG.out_base 'P_values_rDiff_mmd.tab']; | |
170 fid=fopen(P_TABLE_FNAME,'w'); | |
171 | |
172 %print header | |
173 fprintf(fid,'gene\tp-value\ttest-status\n'); | |
174 | |
175 %print lines | |
176 for j=1:size(genes,2) | |
177 fprintf(fid,'%s',NAMES{j}); | |
178 fprintf(fid,'\t%f\t%s\n',P_values_rDiff_mmd(j),P_values_rDiff_mmd_error_flag{j}); | |
179 end | |
180 %close file handler | |
181 fclose(fid) | |
182 end | |
183 | |
184 |