annotate rDiff/src/tests/rDiff_poisson.m @ 2:233c30f91d66

updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
author vipints <vipin@cbio.mskcc.org>
date Tue, 08 Oct 2013 07:15:44 -0400
parents 0f80a5141704
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
1 function [P_VALUE, RET_STRUCT]= rDiff_poisson(CFG,gene,Counts_rDiff_parametric,Gene_expression)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
2
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
3 % Calculates the p-Values of a poisson test on each
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
4 % alternative regions and combines the p-values using Bonferroni's correction
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
5
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
6
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
7 %Initialize gene.name
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
8 NR_OF_TRANS=size(gene.transcripts,2);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
9 if NR_OF_TRANS<=1
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
10 RET_STRUCT='NR_OF_TRANS too small';
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
11 P_VALUE=1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
12 return
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
13 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
14
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
15
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
16 %get the samples that are expressed (have more than 10 reads)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
17 TEMP_SAMPLE1=and(CFG.SAMPLES==1,Gene_expression>=10);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
18 TEMP_SAMPLE2=and(CFG.SAMPLES==2,Gene_expression>=10);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
19 SAMPLE1=find(TEMP_SAMPLE1);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
20 SAMPLE2=find(TEMP_SAMPLE2);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
21
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
22 %Check wether Counts_rDiff_parametric is nonempty
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
23 for j=1:length(TEMP_SAMPLE1)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
24 TEMP_SAMPLE1(j)=and(not(isempty(Counts_rDiff_parametric{j})), ...
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
25 TEMP_SAMPLE1(j));
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
26 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
27 for j=1:length(TEMP_SAMPLE2)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
28 TEMP_SAMPLE2(j)=and(not(isempty(Counts_rDiff_parametric{j})), ...
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
29 TEMP_SAMPLE2(j));
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
30 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
31
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
32 SAMPLE1=find(TEMP_SAMPLE1);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
33 SAMPLE2=find(TEMP_SAMPLE2);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
34
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
35
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
36
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
37 SAMPLE_LENGTH1=length(SAMPLE1);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
38 SAMPLE_LENGTH2=length(SAMPLE2);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
39
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
40 if min(SAMPLE_LENGTH1,SAMPLE_LENGTH2)==0
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
41 RET_STRUCT='SAMPLE_LENGTH too small';
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
42 P_VALUE=1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
43 return
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
44 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
45
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
46 % Get the region counts
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
47 region_counts_1=zeros(SAMPLE_LENGTH1,length(Counts_rDiff_parametric{1,1}));
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
48 for j=1:SAMPLE_LENGTH1
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
49 region_counts_1(j,:)=Counts_rDiff_parametric{1,SAMPLE1(j)};
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
50 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
51 region_counts_2=zeros(SAMPLE_LENGTH2,length(Counts_rDiff_parametric{1,1}));
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
52 for j=1:SAMPLE_LENGTH2
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
53 region_counts_2(j,:)=Counts_rDiff_parametric{1,SAMPLE2(j)};
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
54 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
55
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
56 % Get the gene expression
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
57 gene_expression_1=sum(Gene_expression(SAMPLE1));
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
58 gene_expression_2=sum(Gene_expression(SAMPLE2));
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
59
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
60 %compute the p-values
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
61 OBSERVED_COUNTS=[sum(region_counts_1,1);sum(region_counts_2,1)];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
62 TOTAL_COUNTS=sum(OBSERVED_COUNTS,1);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
63
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
64 %calculate gene expression ratio
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
65 LAMBDA=gene_expression_1/(gene_expression_2+gene_expression_1);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
66
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
67
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
68 P_LIST=ones(1,size(TOTAL_COUNTS,2));
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
69 %Iterate over the regions
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
70 SKIPPED_TESTS=0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
71 for i=1:length(P_LIST)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
72 if sum(TOTAL_COUNTS(i))==0
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
73 SKIPPED_TESTS=SKIPPED_TESTS+1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
74 continue
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
75 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
76
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
77 MEAN=LAMBDA*TOTAL_COUNTS(i);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
78 VARIANCE=(TOTAL_COUNTS(i)*LAMBDA*(1-LAMBDA)).^0.5;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
79 Y=sum(((MEAN-OBSERVED_COUNTS(1,i))./VARIANCE).^2).^0.5;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
80
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
81 %Calculate the p-value
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
82 P_VALUE=1-gammainc((Y^2)/2,1/2);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
83
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
84 P_LIST(i)=P_VALUE;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
85 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
86 if length(P_LIST)-SKIPPED_TESTS<=0
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
87 P_VALUE=1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
88 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
89 P_VALUE=min(P_LIST)*(length(P_LIST)-SKIPPED_TESTS);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
90 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
91
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
92 RET_STRUCT={};
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
93 return
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
94
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
95
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
96