0
|
1 function [VARIANCE]=estimate_variance_helper(CFG,SAMPLE_IX,COUNTS_PER_GENE,GENE_EXPRESSION_MATRIX)
|
|
2
|
|
3
|
|
4 %compute means and variances
|
|
5 %Get the number of of positions to be used in MEANS and VARS
|
|
6 LENGTH=0;
|
|
7 for i=1:size(COUNTS_PER_GENE,1)
|
|
8 try
|
|
9 TEMP_SUM=COUNTS_PER_GENE{i, SAMPLE_IX(1)};
|
|
10 for j=2:length(SAMPLE_IX)
|
|
11 TEMP_SUM=TEMP_SUM+COUNTS_PER_GENE{i,SAMPLE_IX(j)};
|
|
12 end
|
|
13 LENGTH=LENGTH+sum(TEMP_SUM>0);
|
|
14 catch
|
|
15 LENGTH=LENGTH;
|
|
16 end
|
|
17 end
|
|
18
|
|
19 %Compute the means and variances, normalizing for gene expression
|
|
20 MEANS=zeros(1,LENGTH);
|
|
21 VARS=zeros(1,LENGTH);
|
|
22 COUNTER=1;
|
|
23 for i=1:size(COUNTS_PER_GENE,1)
|
|
24 try
|
|
25 TS=GENE_EXPRESSION_MATRIX(i, SAMPLE_IX);
|
|
26 S=length(TS)*TS/sum(TS);
|
|
27
|
|
28 TEMP_SUM=sparse(zeros(length(SAMPLE_IX),size(COUNTS_PER_GENE{i, SAMPLE_IX(1)},2)));
|
|
29 %if S(1)==0
|
|
30 % TEMP_SUM(1,:)=0;
|
|
31 %else
|
|
32 % TEMP_SUM(1,:)=COUNTS_PER_GENE{i, SAMPLE_IX(1)}/S(1);
|
|
33 %end
|
|
34 for j=1:length(SAMPLE_IX)
|
|
35 if S(j)==0
|
|
36 TEMP_SUM(j,:)=0;
|
|
37 else
|
|
38 TEMP_SUM(j,:)=COUNTS_PER_GENE{i,SAMPLE_IX(j)}/S(j);
|
|
39 end
|
|
40 end
|
|
41 TEMP_SUM=TEMP_SUM(:,sum(TEMP_SUM,1)>0);
|
|
42 MEANS(COUNTER:(COUNTER+size(TEMP_SUM,2)-1))=mean(TEMP_SUM(:,sum(TEMP_SUM,1)>0),1);
|
|
43 VARS(COUNTER:(COUNTER+size(TEMP_SUM,2)-1))=var(TEMP_SUM(:,sum(TEMP_SUM,1)>0));
|
|
44 COUNTER=COUNTER+size(TEMP_SUM,2);
|
|
45 catch
|
|
46 LENGTH=LENGTH;
|
|
47 end
|
|
48 end
|
|
49
|
|
50 %filter those which have a zeros mean
|
|
51 NONZERO_IDX=MEANS>0;
|
|
52 MEANS=MEANS(NONZERO_IDX);
|
|
53 VARS=VARS(NONZERO_IDX);
|
|
54 % Subsample the mean variance pairs to reduce the number of
|
|
55 % samples which have a low mean
|
|
56 [MEANS,POS]=sort(MEANS);
|
|
57 MAX_VAL=max(MEANS);
|
|
58 MIN_VAL=min(MEANS);
|
|
59 BOUNDS=exp(linspace(log(MIN_VAL),log(MAX_VAL), CFG.variance_samplebins+1));
|
|
60 SAMPLE=[];
|
|
61 for i=1:CFG.variance_samplebins
|
|
62 NR_IN_BIN=length((find(MEANS>=BOUNDS(i),1,'first'):find(MEANS<BOUNDS(i+1),1,'last')));
|
|
63 if NR_IN_BIN==0
|
|
64 continue;
|
|
65 elseif NR_IN_BIN<= CFG.variance_samples_per_bin
|
|
66 SAMPLE=[SAMPLE find(MEANS>=BOUNDS(i),1,'first'):find(MEANS<BOUNDS(i+1),1,'last')];
|
|
67 else
|
|
68 CHUNK_SAMPLE=find(MEANS>=BOUNDS(i),1,'first'):find(MEANS<BOUNDS(i+1),1,'last');
|
|
69 TEMP_SAMPLE=randperm(NR_IN_BIN);
|
|
70 SAMPLE=[SAMPLE CHUNK_SAMPLE(TEMP_SAMPLE(1:CFG.variance_samples_per_bin))];
|
|
71 end
|
|
72 end
|
|
73 VARS=VARS(POS);
|
|
74
|
|
75 %estimate variance function
|
|
76 [VARIANCE]=estimate_variance(full(MEANS(SAMPLE))',full(VARS(SAMPLE)'));
|
|
77 return
|