annotate rDiff/src/perform_nonparametric_tests.m @ 2:233c30f91d66

updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
author vipints <vipin@cbio.mskcc.org>
date Tue, 08 Oct 2013 07:15:44 -0400
parents 0f80a5141704
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
1 function []=perform_nonparametric_tests(CFG,genes,variance_function_nonparametric_1, variance_function_nonparametric_2)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
2
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
3
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
4 %Get the gene expression
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
5 fprintf('Loading gene expression\n')
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
6 if isempty(CFG.Counts_gene_expression)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
7 EXPR_TAB_FILENAME=[CFG.out_base 'Gene_expression.tab'];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
8 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
9 EXPR_TAB_FILENAME=CFG.Counts_gene_expression;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
10 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
11
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
12 try
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
13 Gene_expression=importdata(EXPR_TAB_FILENAME,'\t',1);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
14 Gene_expression=Gene_expression.data;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
15 catch
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
16 error(['Could not open: ' EXPR_TAB_FILENAME])
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
17 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
18
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
19
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
20 %Get the counts
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
21 fprintf('Loading nonparametric region counts\n')
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
22 if isempty(CFG.Counts_rDiff_nonparametric)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
23 IN_FILENAME=[CFG.out_base 'Nonparametric_region_counts.mat'];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
24 load(IN_FILENAME,'Counts_rDiff_nonparametric')
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
25 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
26 IN_FILENAME=[CFG.out_base CFG.Counts_rDiff_nonparametric];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
27 load(IN_FILENAMEc,'Counts_rDiff_nonparametric')
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
28 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
29
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
30
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
31
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
32 if CFG.use_rproc
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
33 JB_NR=1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
34 JOB_INFO = rproc_empty();
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
35 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
36
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
37 PAR.variance_function_nonparametric_1=variance_function_nonparametric_1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
38 PAR.variance_function_nonparametric_2=variance_function_nonparametric_2;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
39 if 1==1
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
40 %%% Perform the test
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
41 % configuration
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
42 if not(CFG.use_rproc)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
43 fprintf('Performing nonparametric testing\n')
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
44 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
45 %define the splits of the genes for the jobs
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
46 idx=[(1:size(genes,2))',ceil((1:size(genes,2))*CFG.rproc_num_jobs/size(genes,2))'];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
47 % submit jobs to cluster
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
48
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
49 for i = 1:CFG.rproc_num_jobs
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
50 PAR.genes = genes(idx(idx(:,2)==i,1));
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
51 PAR.Counts_rDiff_nonparametric=Counts_rDiff_nonparametric(idx(idx(:,2)==i,1),:);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
52 PAR.Gene_expression=Gene_expression(idx(idx(:,2)==i,1),:);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
53 CFG.rproc_memreq = 5000;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
54 CFG.rproc_par.mem_req_resubmit = [5000 10000 32000];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
55 CFG.rproc_par.identifier = sprintf('Pnp.%i-',i);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
56 CFG.outfile_prefix=[CFG.out_base_temp 'P_values_nonparametric_' num2str(i) '_of_' num2str(CFG.rproc_num_jobs)];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
57 PAR.CFG=CFG;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
58 if CFG.use_rproc
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
59 fprintf(1, 'Submitting job %i to cluster\n',i);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
60 JOB_INFO(JB_NR) = rproc('get_nonparametric_tests_caller', PAR,CFG.rproc_memreq, CFG.rproc_par, CFG.rproc_time);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
61 JB_NR=JB_NR+1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
62 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
63 get_nonparametric_tests_caller(PAR);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
64 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
65 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
66 if CFG.use_rproc
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
67 [JOB_INFO num_crashed] = rproc_wait(JOB_INFO, 60, 1, -1);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
68 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
69 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
70 % Get the test results
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
71
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
72 %%% Generate the output files
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
73 fprintf('Reading temporary results\n')
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
74 P_values_rDiff_nonparametric=ones(size(genes,2),1);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
75 P_values_rDiff_mmd=ones(size(genes,2),1);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
76
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
77
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
78 P_values_rDiff_nonparametric_error_flag=cell(size(genes,2),1);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
79 P_values_rDiff_mmd_error_flag=cell(size(genes,2),1);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
80 NAMES=cell(size(genes,2),1);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
81 %Field containing the errors
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
82 ERRORS_NR=[];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
83 idx=[(1:size(genes,2))',ceil((1:size(genes,2))*CFG.rproc_num_jobs/size(genes,2))'];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
84 % Iterate over the result files to load the data from the count files
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
85 for j = 1:CFG.rproc_num_jobs
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
86 IN_FILENAME=[CFG.out_base_temp 'P_values_nonparametric_' num2str(j) '_of_' num2str(CFG.rproc_num_jobs)];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
87 IDX=idx(idx(:,2)==j,1);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
88 try
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
89 load(IN_FILENAME)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
90 for k=1:length(IDX)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
91 if isempty(P_VALS{k,1}) %Gene was not tested for
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
92 %some reason
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
93 P_values_rDiff_nonparametric_error_flag{IDX(k)}='NOT_TESTED';
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
94 P_values_rDiff_mmd_error_flag{IDX(k)}='NOT_TESTED';
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
95 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
96 if not(isempty(P_VALS{k,1}))
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
97 NAMES{IDX(k)}=P_VALS{k,1};
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
98 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
99 COUNTER=2;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
100 %Get the results from rDiff.mmd
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
101 if CFG.perform_mmd
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
102 if not(isempty(P_VALS{k,COUNTER}))
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
103 P_values_rDiff_mmd(IDX(k))=P_VALS{k,COUNTER}{1};
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
104 if (isempty(P_VALS{k,COUNTER}{2}))
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
105 P_values_rDiff_mmd_error_flag{IDX(k)}='NOT_TESTED';
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
106 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
107 P_values_rDiff_mmd_error_flag{IDX(k)}='OK';
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
108 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
109 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
110 COUNTER=COUNTER+1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
111 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
112
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
113 %Get the results from rDiff.parametric
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
114 if CFG.perform_nonparametric
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
115 if not(isempty(P_VALS{k,COUNTER}))
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
116 P_values_rDiff_nonparametric(IDX(k))=P_VALS{k,COUNTER}{1};
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
117 if length(P_VALS{k,COUNTER})>1
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
118 if iscell(P_VALS{k,COUNTER}{2}) && length(P_VALS{k,COUNTER}{2}{3})>3
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
119 P_values_rDiff_nonparametric(IDX(k))=min(10*min(P_VALS{k,COUNTER}{2}{3})+max(P_VALS{k,COUNTER}{2}{3})*(10/(CFG.bootstraps+1)),1);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
120 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
121 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
122 if (isempty(P_VALS{k,COUNTER}{2}))
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
123 P_values_rDiff_nonparametric_error_flag{IDX(k)}='NOT_TESTED';
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
124 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
125 P_values_rDiff_nonparametric_error_flag{IDX(k)}='OK';
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
126 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
127 end;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
128 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
129
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
130 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
131 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
132 catch
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
133 for k=1:length(IDX)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
134 P_values_rDiff_nonparametric_error_flag{IDX(k)}='NOT_TESTED';
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
135 P_values_rDiff_mmd_error_flag{IDX(k)}='NOT_TESTED';
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
136 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
137 warning(['There was a problem loading: ' IN_FILENAME ])
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
138 ERRORS_NR=[ERRORS_NR;j];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
139 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
140 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
141 if not(isempty(ERRORS_NR))
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
142 warning('There have been problems loading some of the parametric test result files');
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
143 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
144
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
145 fprintf('Writing output files\n')
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
146 %Generate P-value table for rDiff.nonparametric
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
147 if CFG.perform_nonparametric
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
148 %Open file handler
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
149 P_TABLE_FNAME=[CFG.out_base 'P_values_rDiff_nonparametric.tab'];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
150 fid=fopen(P_TABLE_FNAME,'w');
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
151
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
152 %print header
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
153 fprintf(fid,'gene\tp-value\ttest-status\n');
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
154
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
155 %print lines
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
156 for j=1:size(genes,2)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
157 fprintf(fid,'%s',NAMES{j});
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
158 fprintf(fid,'\t%f\t%s\n',P_values_rDiff_nonparametric(j),P_values_rDiff_nonparametric_error_flag{j});
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
159 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
160 %close file handler
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
161 fclose(fid)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
162 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
163
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
164
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
165
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
166 %Generate P-value table for rDiff.mmd
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
167 if CFG.perform_mmd
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
168 %Open file handler
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
169 P_TABLE_FNAME=[CFG.out_base 'P_values_rDiff_mmd.tab'];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
170 fid=fopen(P_TABLE_FNAME,'w');
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
171
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
172 %print header
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
173 fprintf(fid,'gene\tp-value\ttest-status\n');
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
174
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
175 %print lines
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
176 for j=1:size(genes,2)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
177 fprintf(fid,'%s',NAMES{j});
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
178 fprintf(fid,'\t%f\t%s\n',P_values_rDiff_mmd(j),P_values_rDiff_mmd_error_flag{j});
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
179 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
180 %close file handler
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
181 fclose(fid)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
182 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
183
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
184