Mercurial > repos > vipints > rdiff
diff rDiff/src/variance/estimate_variance_helper.m @ 0:0f80a5141704
version 0.3 uploaded
author | vipints |
---|---|
date | Thu, 14 Feb 2013 23:38:36 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rDiff/src/variance/estimate_variance_helper.m Thu Feb 14 23:38:36 2013 -0500 @@ -0,0 +1,77 @@ +function [VARIANCE]=estimate_variance_helper(CFG,SAMPLE_IX,COUNTS_PER_GENE,GENE_EXPRESSION_MATRIX) + + +%compute means and variances +%Get the number of of positions to be used in MEANS and VARS +LENGTH=0; +for i=1:size(COUNTS_PER_GENE,1) + try + TEMP_SUM=COUNTS_PER_GENE{i, SAMPLE_IX(1)}; + for j=2:length(SAMPLE_IX) + TEMP_SUM=TEMP_SUM+COUNTS_PER_GENE{i,SAMPLE_IX(j)}; + end + LENGTH=LENGTH+sum(TEMP_SUM>0); + catch + LENGTH=LENGTH; + end +end + +%Compute the means and variances, normalizing for gene expression +MEANS=zeros(1,LENGTH); +VARS=zeros(1,LENGTH); +COUNTER=1; +for i=1:size(COUNTS_PER_GENE,1) + try + TS=GENE_EXPRESSION_MATRIX(i, SAMPLE_IX); + S=length(TS)*TS/sum(TS); + + TEMP_SUM=sparse(zeros(length(SAMPLE_IX),size(COUNTS_PER_GENE{i, SAMPLE_IX(1)},2))); + %if S(1)==0 + % TEMP_SUM(1,:)=0; + %else + % TEMP_SUM(1,:)=COUNTS_PER_GENE{i, SAMPLE_IX(1)}/S(1); + %end + for j=1:length(SAMPLE_IX) + if S(j)==0 + TEMP_SUM(j,:)=0; + else + TEMP_SUM(j,:)=COUNTS_PER_GENE{i,SAMPLE_IX(j)}/S(j); + end + end + TEMP_SUM=TEMP_SUM(:,sum(TEMP_SUM,1)>0); + MEANS(COUNTER:(COUNTER+size(TEMP_SUM,2)-1))=mean(TEMP_SUM(:,sum(TEMP_SUM,1)>0),1); + VARS(COUNTER:(COUNTER+size(TEMP_SUM,2)-1))=var(TEMP_SUM(:,sum(TEMP_SUM,1)>0)); + COUNTER=COUNTER+size(TEMP_SUM,2); + catch + LENGTH=LENGTH; + end +end + +%filter those which have a zeros mean +NONZERO_IDX=MEANS>0; +MEANS=MEANS(NONZERO_IDX); +VARS=VARS(NONZERO_IDX); +% Subsample the mean variance pairs to reduce the number of +% samples which have a low mean +[MEANS,POS]=sort(MEANS); +MAX_VAL=max(MEANS); +MIN_VAL=min(MEANS); +BOUNDS=exp(linspace(log(MIN_VAL),log(MAX_VAL), CFG.variance_samplebins+1)); +SAMPLE=[]; +for i=1:CFG.variance_samplebins + NR_IN_BIN=length((find(MEANS>=BOUNDS(i),1,'first'):find(MEANS<BOUNDS(i+1),1,'last'))); + if NR_IN_BIN==0 + continue; + elseif NR_IN_BIN<= CFG.variance_samples_per_bin + SAMPLE=[SAMPLE find(MEANS>=BOUNDS(i),1,'first'):find(MEANS<BOUNDS(i+1),1,'last')]; + else + CHUNK_SAMPLE=find(MEANS>=BOUNDS(i),1,'first'):find(MEANS<BOUNDS(i+1),1,'last'); + TEMP_SAMPLE=randperm(NR_IN_BIN); + SAMPLE=[SAMPLE CHUNK_SAMPLE(TEMP_SAMPLE(1:CFG.variance_samples_per_bin))]; + end +end +VARS=VARS(POS); + +%estimate variance function +[VARIANCE]=estimate_variance(full(MEANS(SAMPLE))',full(VARS(SAMPLE)')); +return