Mercurial > repos > vipints > rdiff
comparison rDiff/src/variance/estimate_variance_helper.m @ 0:0f80a5141704
version 0.3 uploaded
author | vipints |
---|---|
date | Thu, 14 Feb 2013 23:38:36 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0f80a5141704 |
---|---|
1 function [VARIANCE]=estimate_variance_helper(CFG,SAMPLE_IX,COUNTS_PER_GENE,GENE_EXPRESSION_MATRIX) | |
2 | |
3 | |
4 %compute means and variances | |
5 %Get the number of of positions to be used in MEANS and VARS | |
6 LENGTH=0; | |
7 for i=1:size(COUNTS_PER_GENE,1) | |
8 try | |
9 TEMP_SUM=COUNTS_PER_GENE{i, SAMPLE_IX(1)}; | |
10 for j=2:length(SAMPLE_IX) | |
11 TEMP_SUM=TEMP_SUM+COUNTS_PER_GENE{i,SAMPLE_IX(j)}; | |
12 end | |
13 LENGTH=LENGTH+sum(TEMP_SUM>0); | |
14 catch | |
15 LENGTH=LENGTH; | |
16 end | |
17 end | |
18 | |
19 %Compute the means and variances, normalizing for gene expression | |
20 MEANS=zeros(1,LENGTH); | |
21 VARS=zeros(1,LENGTH); | |
22 COUNTER=1; | |
23 for i=1:size(COUNTS_PER_GENE,1) | |
24 try | |
25 TS=GENE_EXPRESSION_MATRIX(i, SAMPLE_IX); | |
26 S=length(TS)*TS/sum(TS); | |
27 | |
28 TEMP_SUM=sparse(zeros(length(SAMPLE_IX),size(COUNTS_PER_GENE{i, SAMPLE_IX(1)},2))); | |
29 %if S(1)==0 | |
30 % TEMP_SUM(1,:)=0; | |
31 %else | |
32 % TEMP_SUM(1,:)=COUNTS_PER_GENE{i, SAMPLE_IX(1)}/S(1); | |
33 %end | |
34 for j=1:length(SAMPLE_IX) | |
35 if S(j)==0 | |
36 TEMP_SUM(j,:)=0; | |
37 else | |
38 TEMP_SUM(j,:)=COUNTS_PER_GENE{i,SAMPLE_IX(j)}/S(j); | |
39 end | |
40 end | |
41 TEMP_SUM=TEMP_SUM(:,sum(TEMP_SUM,1)>0); | |
42 MEANS(COUNTER:(COUNTER+size(TEMP_SUM,2)-1))=mean(TEMP_SUM(:,sum(TEMP_SUM,1)>0),1); | |
43 VARS(COUNTER:(COUNTER+size(TEMP_SUM,2)-1))=var(TEMP_SUM(:,sum(TEMP_SUM,1)>0)); | |
44 COUNTER=COUNTER+size(TEMP_SUM,2); | |
45 catch | |
46 LENGTH=LENGTH; | |
47 end | |
48 end | |
49 | |
50 %filter those which have a zeros mean | |
51 NONZERO_IDX=MEANS>0; | |
52 MEANS=MEANS(NONZERO_IDX); | |
53 VARS=VARS(NONZERO_IDX); | |
54 % Subsample the mean variance pairs to reduce the number of | |
55 % samples which have a low mean | |
56 [MEANS,POS]=sort(MEANS); | |
57 MAX_VAL=max(MEANS); | |
58 MIN_VAL=min(MEANS); | |
59 BOUNDS=exp(linspace(log(MIN_VAL),log(MAX_VAL), CFG.variance_samplebins+1)); | |
60 SAMPLE=[]; | |
61 for i=1:CFG.variance_samplebins | |
62 NR_IN_BIN=length((find(MEANS>=BOUNDS(i),1,'first'):find(MEANS<BOUNDS(i+1),1,'last'))); | |
63 if NR_IN_BIN==0 | |
64 continue; | |
65 elseif NR_IN_BIN<= CFG.variance_samples_per_bin | |
66 SAMPLE=[SAMPLE find(MEANS>=BOUNDS(i),1,'first'):find(MEANS<BOUNDS(i+1),1,'last')]; | |
67 else | |
68 CHUNK_SAMPLE=find(MEANS>=BOUNDS(i),1,'first'):find(MEANS<BOUNDS(i+1),1,'last'); | |
69 TEMP_SAMPLE=randperm(NR_IN_BIN); | |
70 SAMPLE=[SAMPLE CHUNK_SAMPLE(TEMP_SAMPLE(1:CFG.variance_samples_per_bin))]; | |
71 end | |
72 end | |
73 VARS=VARS(POS); | |
74 | |
75 %estimate variance function | |
76 [VARIANCE]=estimate_variance(full(MEANS(SAMPLE))',full(VARS(SAMPLE)')); | |
77 return |