Mercurial > repos > vipints > rdiff
view rDiff/src/perform_nonparametric_tests.m @ 2:233c30f91d66
updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
author | vipints <vipin@cbio.mskcc.org> |
---|---|
date | Tue, 08 Oct 2013 07:15:44 -0400 |
parents | 0f80a5141704 |
children |
line wrap: on
line source
function []=perform_nonparametric_tests(CFG,genes,variance_function_nonparametric_1, variance_function_nonparametric_2) %Get the gene expression fprintf('Loading gene expression\n') if isempty(CFG.Counts_gene_expression) EXPR_TAB_FILENAME=[CFG.out_base 'Gene_expression.tab']; else EXPR_TAB_FILENAME=CFG.Counts_gene_expression; end try Gene_expression=importdata(EXPR_TAB_FILENAME,'\t',1); Gene_expression=Gene_expression.data; catch error(['Could not open: ' EXPR_TAB_FILENAME]) end %Get the counts fprintf('Loading nonparametric region counts\n') if isempty(CFG.Counts_rDiff_nonparametric) IN_FILENAME=[CFG.out_base 'Nonparametric_region_counts.mat']; load(IN_FILENAME,'Counts_rDiff_nonparametric') else IN_FILENAME=[CFG.out_base CFG.Counts_rDiff_nonparametric]; load(IN_FILENAMEc,'Counts_rDiff_nonparametric') end if CFG.use_rproc JB_NR=1; JOB_INFO = rproc_empty(); end PAR.variance_function_nonparametric_1=variance_function_nonparametric_1; PAR.variance_function_nonparametric_2=variance_function_nonparametric_2; if 1==1 %%% Perform the test % configuration if not(CFG.use_rproc) fprintf('Performing nonparametric testing\n') end %define the splits of the genes for the jobs idx=[(1:size(genes,2))',ceil((1:size(genes,2))*CFG.rproc_num_jobs/size(genes,2))']; % submit jobs to cluster for i = 1:CFG.rproc_num_jobs PAR.genes = genes(idx(idx(:,2)==i,1)); PAR.Counts_rDiff_nonparametric=Counts_rDiff_nonparametric(idx(idx(:,2)==i,1),:); PAR.Gene_expression=Gene_expression(idx(idx(:,2)==i,1),:); CFG.rproc_memreq = 5000; CFG.rproc_par.mem_req_resubmit = [5000 10000 32000]; CFG.rproc_par.identifier = sprintf('Pnp.%i-',i); CFG.outfile_prefix=[CFG.out_base_temp 'P_values_nonparametric_' num2str(i) '_of_' num2str(CFG.rproc_num_jobs)]; PAR.CFG=CFG; if CFG.use_rproc fprintf(1, 'Submitting job %i to cluster\n',i); JOB_INFO(JB_NR) = rproc('get_nonparametric_tests_caller', PAR,CFG.rproc_memreq, CFG.rproc_par, CFG.rproc_time); JB_NR=JB_NR+1; else get_nonparametric_tests_caller(PAR); end end if CFG.use_rproc [JOB_INFO num_crashed] = rproc_wait(JOB_INFO, 60, 1, -1); end end % Get the test results %%% Generate the output files fprintf('Reading temporary results\n') P_values_rDiff_nonparametric=ones(size(genes,2),1); P_values_rDiff_mmd=ones(size(genes,2),1); P_values_rDiff_nonparametric_error_flag=cell(size(genes,2),1); P_values_rDiff_mmd_error_flag=cell(size(genes,2),1); NAMES=cell(size(genes,2),1); %Field containing the errors ERRORS_NR=[]; idx=[(1:size(genes,2))',ceil((1:size(genes,2))*CFG.rproc_num_jobs/size(genes,2))']; % Iterate over the result files to load the data from the count files for j = 1:CFG.rproc_num_jobs IN_FILENAME=[CFG.out_base_temp 'P_values_nonparametric_' num2str(j) '_of_' num2str(CFG.rproc_num_jobs)]; IDX=idx(idx(:,2)==j,1); try load(IN_FILENAME) for k=1:length(IDX) if isempty(P_VALS{k,1}) %Gene was not tested for %some reason P_values_rDiff_nonparametric_error_flag{IDX(k)}='NOT_TESTED'; P_values_rDiff_mmd_error_flag{IDX(k)}='NOT_TESTED'; else if not(isempty(P_VALS{k,1})) NAMES{IDX(k)}=P_VALS{k,1}; end COUNTER=2; %Get the results from rDiff.mmd if CFG.perform_mmd if not(isempty(P_VALS{k,COUNTER})) P_values_rDiff_mmd(IDX(k))=P_VALS{k,COUNTER}{1}; if (isempty(P_VALS{k,COUNTER}{2})) P_values_rDiff_mmd_error_flag{IDX(k)}='NOT_TESTED'; else P_values_rDiff_mmd_error_flag{IDX(k)}='OK'; end end COUNTER=COUNTER+1; end %Get the results from rDiff.parametric if CFG.perform_nonparametric if not(isempty(P_VALS{k,COUNTER})) P_values_rDiff_nonparametric(IDX(k))=P_VALS{k,COUNTER}{1}; if length(P_VALS{k,COUNTER})>1 if iscell(P_VALS{k,COUNTER}{2}) && length(P_VALS{k,COUNTER}{2}{3})>3 P_values_rDiff_nonparametric(IDX(k))=min(10*min(P_VALS{k,COUNTER}{2}{3})+max(P_VALS{k,COUNTER}{2}{3})*(10/(CFG.bootstraps+1)),1); end end if (isempty(P_VALS{k,COUNTER}{2})) P_values_rDiff_nonparametric_error_flag{IDX(k)}='NOT_TESTED'; else P_values_rDiff_nonparametric_error_flag{IDX(k)}='OK'; end end; end end end catch for k=1:length(IDX) P_values_rDiff_nonparametric_error_flag{IDX(k)}='NOT_TESTED'; P_values_rDiff_mmd_error_flag{IDX(k)}='NOT_TESTED'; end warning(['There was a problem loading: ' IN_FILENAME ]) ERRORS_NR=[ERRORS_NR;j]; end end if not(isempty(ERRORS_NR)) warning('There have been problems loading some of the parametric test result files'); end fprintf('Writing output files\n') %Generate P-value table for rDiff.nonparametric if CFG.perform_nonparametric %Open file handler P_TABLE_FNAME=[CFG.out_base 'P_values_rDiff_nonparametric.tab']; fid=fopen(P_TABLE_FNAME,'w'); %print header fprintf(fid,'gene\tp-value\ttest-status\n'); %print lines for j=1:size(genes,2) fprintf(fid,'%s',NAMES{j}); fprintf(fid,'\t%f\t%s\n',P_values_rDiff_nonparametric(j),P_values_rDiff_nonparametric_error_flag{j}); end %close file handler fclose(fid) end %Generate P-value table for rDiff.mmd if CFG.perform_mmd %Open file handler P_TABLE_FNAME=[CFG.out_base 'P_values_rDiff_mmd.tab']; fid=fopen(P_TABLE_FNAME,'w'); %print header fprintf(fid,'gene\tp-value\ttest-status\n'); %print lines for j=1:size(genes,2) fprintf(fid,'%s',NAMES{j}); fprintf(fid,'\t%f\t%s\n',P_values_rDiff_mmd(j),P_values_rDiff_mmd_error_flag{j}); end %close file handler fclose(fid) end