annotate rDiff/src/variance/estimate_variance_helper.m @ 2:233c30f91d66

updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
author vipints <vipin@cbio.mskcc.org>
date Tue, 08 Oct 2013 07:15:44 -0400
parents 0f80a5141704
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
1 function [VARIANCE]=estimate_variance_helper(CFG,SAMPLE_IX,COUNTS_PER_GENE,GENE_EXPRESSION_MATRIX)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
2
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
3
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
4 %compute means and variances
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
5 %Get the number of of positions to be used in MEANS and VARS
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
6 LENGTH=0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
7 for i=1:size(COUNTS_PER_GENE,1)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
8 try
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
9 TEMP_SUM=COUNTS_PER_GENE{i, SAMPLE_IX(1)};
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
10 for j=2:length(SAMPLE_IX)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
11 TEMP_SUM=TEMP_SUM+COUNTS_PER_GENE{i,SAMPLE_IX(j)};
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
12 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
13 LENGTH=LENGTH+sum(TEMP_SUM>0);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
14 catch
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
15 LENGTH=LENGTH;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
16 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
17 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
18
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
19 %Compute the means and variances, normalizing for gene expression
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
20 MEANS=zeros(1,LENGTH);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
21 VARS=zeros(1,LENGTH);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
22 COUNTER=1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
23 for i=1:size(COUNTS_PER_GENE,1)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
24 try
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
25 TS=GENE_EXPRESSION_MATRIX(i, SAMPLE_IX);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
26 S=length(TS)*TS/sum(TS);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
27
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
28 TEMP_SUM=sparse(zeros(length(SAMPLE_IX),size(COUNTS_PER_GENE{i, SAMPLE_IX(1)},2)));
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
29 %if S(1)==0
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
30 % TEMP_SUM(1,:)=0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
31 %else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
32 % TEMP_SUM(1,:)=COUNTS_PER_GENE{i, SAMPLE_IX(1)}/S(1);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
33 %end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
34 for j=1:length(SAMPLE_IX)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
35 if S(j)==0
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
36 TEMP_SUM(j,:)=0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
37 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
38 TEMP_SUM(j,:)=COUNTS_PER_GENE{i,SAMPLE_IX(j)}/S(j);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
39 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
40 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
41 TEMP_SUM=TEMP_SUM(:,sum(TEMP_SUM,1)>0);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
42 MEANS(COUNTER:(COUNTER+size(TEMP_SUM,2)-1))=mean(TEMP_SUM(:,sum(TEMP_SUM,1)>0),1);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
43 VARS(COUNTER:(COUNTER+size(TEMP_SUM,2)-1))=var(TEMP_SUM(:,sum(TEMP_SUM,1)>0));
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
44 COUNTER=COUNTER+size(TEMP_SUM,2);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
45 catch
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
46 LENGTH=LENGTH;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
47 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
48 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
49
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
50 %filter those which have a zeros mean
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
51 NONZERO_IDX=MEANS>0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
52 MEANS=MEANS(NONZERO_IDX);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
53 VARS=VARS(NONZERO_IDX);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
54 % Subsample the mean variance pairs to reduce the number of
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
55 % samples which have a low mean
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
56 [MEANS,POS]=sort(MEANS);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
57 MAX_VAL=max(MEANS);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
58 MIN_VAL=min(MEANS);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
59 BOUNDS=exp(linspace(log(MIN_VAL),log(MAX_VAL), CFG.variance_samplebins+1));
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
60 SAMPLE=[];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
61 for i=1:CFG.variance_samplebins
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
62 NR_IN_BIN=length((find(MEANS>=BOUNDS(i),1,'first'):find(MEANS<BOUNDS(i+1),1,'last')));
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
63 if NR_IN_BIN==0
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
64 continue;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
65 elseif NR_IN_BIN<= CFG.variance_samples_per_bin
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
66 SAMPLE=[SAMPLE find(MEANS>=BOUNDS(i),1,'first'):find(MEANS<BOUNDS(i+1),1,'last')];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
67 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
68 CHUNK_SAMPLE=find(MEANS>=BOUNDS(i),1,'first'):find(MEANS<BOUNDS(i+1),1,'last');
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
69 TEMP_SAMPLE=randperm(NR_IN_BIN);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
70 SAMPLE=[SAMPLE CHUNK_SAMPLE(TEMP_SAMPLE(1:CFG.variance_samples_per_bin))];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
71 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
72 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
73 VARS=VARS(POS);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
74
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
75 %estimate variance function
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
76 [VARIANCE]=estimate_variance(full(MEANS(SAMPLE))',full(VARS(SAMPLE)'));
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
77 return