Mercurial > repos > vipints > rdiff
diff rDiff/src/perform_nonparametric_tests.m @ 0:0f80a5141704
version 0.3 uploaded
author | vipints |
---|---|
date | Thu, 14 Feb 2013 23:38:36 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rDiff/src/perform_nonparametric_tests.m Thu Feb 14 23:38:36 2013 -0500 @@ -0,0 +1,184 @@ +function []=perform_nonparametric_tests(CFG,genes,variance_function_nonparametric_1, variance_function_nonparametric_2) + + +%Get the gene expression +fprintf('Loading gene expression\n') +if isempty(CFG.Counts_gene_expression) + EXPR_TAB_FILENAME=[CFG.out_base 'Gene_expression.tab']; +else + EXPR_TAB_FILENAME=CFG.Counts_gene_expression; +end + +try + Gene_expression=importdata(EXPR_TAB_FILENAME,'\t',1); + Gene_expression=Gene_expression.data; +catch + error(['Could not open: ' EXPR_TAB_FILENAME]) +end + + +%Get the counts +fprintf('Loading nonparametric region counts\n') +if isempty(CFG.Counts_rDiff_nonparametric) + IN_FILENAME=[CFG.out_base 'Nonparametric_region_counts.mat']; + load(IN_FILENAME,'Counts_rDiff_nonparametric') +else + IN_FILENAME=[CFG.out_base CFG.Counts_rDiff_nonparametric]; + load(IN_FILENAMEc,'Counts_rDiff_nonparametric') +end + + + +if CFG.use_rproc + JB_NR=1; + JOB_INFO = rproc_empty(); +end + +PAR.variance_function_nonparametric_1=variance_function_nonparametric_1; +PAR.variance_function_nonparametric_2=variance_function_nonparametric_2; +if 1==1 +%%% Perform the test +% configuration +if not(CFG.use_rproc) + fprintf('Performing nonparametric testing\n') +end +%define the splits of the genes for the jobs +idx=[(1:size(genes,2))',ceil((1:size(genes,2))*CFG.rproc_num_jobs/size(genes,2))']; +% submit jobs to cluster + +for i = 1:CFG.rproc_num_jobs + PAR.genes = genes(idx(idx(:,2)==i,1)); + PAR.Counts_rDiff_nonparametric=Counts_rDiff_nonparametric(idx(idx(:,2)==i,1),:); + PAR.Gene_expression=Gene_expression(idx(idx(:,2)==i,1),:); + CFG.rproc_memreq = 5000; + CFG.rproc_par.mem_req_resubmit = [5000 10000 32000]; + CFG.rproc_par.identifier = sprintf('Pnp.%i-',i); + CFG.outfile_prefix=[CFG.out_base_temp 'P_values_nonparametric_' num2str(i) '_of_' num2str(CFG.rproc_num_jobs)]; + PAR.CFG=CFG; + if CFG.use_rproc + fprintf(1, 'Submitting job %i to cluster\n',i); + JOB_INFO(JB_NR) = rproc('get_nonparametric_tests_caller', PAR,CFG.rproc_memreq, CFG.rproc_par, CFG.rproc_time); + JB_NR=JB_NR+1; + else + get_nonparametric_tests_caller(PAR); + end +end +if CFG.use_rproc + [JOB_INFO num_crashed] = rproc_wait(JOB_INFO, 60, 1, -1); +end +end +% Get the test results + +%%% Generate the output files +fprintf('Reading temporary results\n') +P_values_rDiff_nonparametric=ones(size(genes,2),1); +P_values_rDiff_mmd=ones(size(genes,2),1); + + +P_values_rDiff_nonparametric_error_flag=cell(size(genes,2),1); +P_values_rDiff_mmd_error_flag=cell(size(genes,2),1); +NAMES=cell(size(genes,2),1); +%Field containing the errors +ERRORS_NR=[]; +idx=[(1:size(genes,2))',ceil((1:size(genes,2))*CFG.rproc_num_jobs/size(genes,2))']; +% Iterate over the result files to load the data from the count files +for j = 1:CFG.rproc_num_jobs + IN_FILENAME=[CFG.out_base_temp 'P_values_nonparametric_' num2str(j) '_of_' num2str(CFG.rproc_num_jobs)]; + IDX=idx(idx(:,2)==j,1); + try + load(IN_FILENAME) + for k=1:length(IDX) + if isempty(P_VALS{k,1}) %Gene was not tested for + %some reason + P_values_rDiff_nonparametric_error_flag{IDX(k)}='NOT_TESTED'; + P_values_rDiff_mmd_error_flag{IDX(k)}='NOT_TESTED'; + else + if not(isempty(P_VALS{k,1})) + NAMES{IDX(k)}=P_VALS{k,1}; + end + COUNTER=2; + %Get the results from rDiff.mmd + if CFG.perform_mmd + if not(isempty(P_VALS{k,COUNTER})) + P_values_rDiff_mmd(IDX(k))=P_VALS{k,COUNTER}{1}; + if (isempty(P_VALS{k,COUNTER}{2})) + P_values_rDiff_mmd_error_flag{IDX(k)}='NOT_TESTED'; + else + P_values_rDiff_mmd_error_flag{IDX(k)}='OK'; + end + end + COUNTER=COUNTER+1; + end + + %Get the results from rDiff.parametric + if CFG.perform_nonparametric + if not(isempty(P_VALS{k,COUNTER})) + P_values_rDiff_nonparametric(IDX(k))=P_VALS{k,COUNTER}{1}; + if length(P_VALS{k,COUNTER})>1 + if iscell(P_VALS{k,COUNTER}{2}) && length(P_VALS{k,COUNTER}{2}{3})>3 + P_values_rDiff_nonparametric(IDX(k))=min(10*min(P_VALS{k,COUNTER}{2}{3})+max(P_VALS{k,COUNTER}{2}{3})*(10/(CFG.bootstraps+1)),1); + end + end + if (isempty(P_VALS{k,COUNTER}{2})) + P_values_rDiff_nonparametric_error_flag{IDX(k)}='NOT_TESTED'; + else + P_values_rDiff_nonparametric_error_flag{IDX(k)}='OK'; + end + end; + end + + end + end + catch + for k=1:length(IDX) + P_values_rDiff_nonparametric_error_flag{IDX(k)}='NOT_TESTED'; + P_values_rDiff_mmd_error_flag{IDX(k)}='NOT_TESTED'; + end + warning(['There was a problem loading: ' IN_FILENAME ]) + ERRORS_NR=[ERRORS_NR;j]; + end +end +if not(isempty(ERRORS_NR)) + warning('There have been problems loading some of the parametric test result files'); +end + +fprintf('Writing output files\n') +%Generate P-value table for rDiff.nonparametric +if CFG.perform_nonparametric +%Open file handler + P_TABLE_FNAME=[CFG.out_base 'P_values_rDiff_nonparametric.tab']; + fid=fopen(P_TABLE_FNAME,'w'); + + %print header + fprintf(fid,'gene\tp-value\ttest-status\n'); + + %print lines + for j=1:size(genes,2) + fprintf(fid,'%s',NAMES{j}); + fprintf(fid,'\t%f\t%s\n',P_values_rDiff_nonparametric(j),P_values_rDiff_nonparametric_error_flag{j}); + end + %close file handler + fclose(fid) +end + + + +%Generate P-value table for rDiff.mmd +if CFG.perform_mmd +%Open file handler + P_TABLE_FNAME=[CFG.out_base 'P_values_rDiff_mmd.tab']; + fid=fopen(P_TABLE_FNAME,'w'); + + %print header + fprintf(fid,'gene\tp-value\ttest-status\n'); + + %print lines + for j=1:size(genes,2) + fprintf(fid,'%s',NAMES{j}); + fprintf(fid,'\t%f\t%s\n',P_values_rDiff_mmd(j),P_values_rDiff_mmd_error_flag{j}); + end + %close file handler + fclose(fid) +end + +