0
|
1 function [P_VALUE, RET_STRUCT]= rDiff_poisson(CFG,gene,Counts_rDiff_parametric,Gene_expression)
|
|
2
|
|
3 % Calculates the p-Values of a poisson test on each
|
|
4 % alternative regions and combines the p-values using Bonferroni's correction
|
|
5
|
|
6
|
|
7 %Initialize gene.name
|
|
8 NR_OF_TRANS=size(gene.transcripts,2);
|
|
9 if NR_OF_TRANS<=1
|
|
10 RET_STRUCT='NR_OF_TRANS too small';
|
|
11 P_VALUE=1;
|
|
12 return
|
|
13 end
|
|
14
|
|
15
|
|
16 %get the samples that are expressed (have more than 10 reads)
|
|
17 TEMP_SAMPLE1=and(CFG.SAMPLES==1,Gene_expression>=10);
|
|
18 TEMP_SAMPLE2=and(CFG.SAMPLES==2,Gene_expression>=10);
|
|
19 SAMPLE1=find(TEMP_SAMPLE1);
|
|
20 SAMPLE2=find(TEMP_SAMPLE2);
|
|
21
|
|
22 %Check wether Counts_rDiff_parametric is nonempty
|
|
23 for j=1:length(TEMP_SAMPLE1)
|
|
24 TEMP_SAMPLE1(j)=and(not(isempty(Counts_rDiff_parametric{j})), ...
|
|
25 TEMP_SAMPLE1(j));
|
|
26 end
|
|
27 for j=1:length(TEMP_SAMPLE2)
|
|
28 TEMP_SAMPLE2(j)=and(not(isempty(Counts_rDiff_parametric{j})), ...
|
|
29 TEMP_SAMPLE2(j));
|
|
30 end
|
|
31
|
|
32 SAMPLE1=find(TEMP_SAMPLE1);
|
|
33 SAMPLE2=find(TEMP_SAMPLE2);
|
|
34
|
|
35
|
|
36
|
|
37 SAMPLE_LENGTH1=length(SAMPLE1);
|
|
38 SAMPLE_LENGTH2=length(SAMPLE2);
|
|
39
|
|
40 if min(SAMPLE_LENGTH1,SAMPLE_LENGTH2)==0
|
|
41 RET_STRUCT='SAMPLE_LENGTH too small';
|
|
42 P_VALUE=1;
|
|
43 return
|
|
44 end
|
|
45
|
|
46 % Get the region counts
|
|
47 region_counts_1=zeros(SAMPLE_LENGTH1,length(Counts_rDiff_parametric{1,1}));
|
|
48 for j=1:SAMPLE_LENGTH1
|
|
49 region_counts_1(j,:)=Counts_rDiff_parametric{1,SAMPLE1(j)};
|
|
50 end
|
|
51 region_counts_2=zeros(SAMPLE_LENGTH2,length(Counts_rDiff_parametric{1,1}));
|
|
52 for j=1:SAMPLE_LENGTH2
|
|
53 region_counts_2(j,:)=Counts_rDiff_parametric{1,SAMPLE2(j)};
|
|
54 end
|
|
55
|
|
56 % Get the gene expression
|
|
57 gene_expression_1=sum(Gene_expression(SAMPLE1));
|
|
58 gene_expression_2=sum(Gene_expression(SAMPLE2));
|
|
59
|
|
60 %compute the p-values
|
|
61 OBSERVED_COUNTS=[sum(region_counts_1,1);sum(region_counts_2,1)];
|
|
62 TOTAL_COUNTS=sum(OBSERVED_COUNTS,1);
|
|
63
|
|
64 %calculate gene expression ratio
|
|
65 LAMBDA=gene_expression_1/(gene_expression_2+gene_expression_1);
|
|
66
|
|
67
|
|
68 P_LIST=ones(1,size(TOTAL_COUNTS,2));
|
|
69 %Iterate over the regions
|
|
70 SKIPPED_TESTS=0;
|
|
71 for i=1:length(P_LIST)
|
|
72 if sum(TOTAL_COUNTS(i))==0
|
|
73 SKIPPED_TESTS=SKIPPED_TESTS+1;
|
|
74 continue
|
|
75 end
|
|
76
|
|
77 MEAN=LAMBDA*TOTAL_COUNTS(i);
|
|
78 VARIANCE=(TOTAL_COUNTS(i)*LAMBDA*(1-LAMBDA)).^0.5;
|
|
79 Y=sum(((MEAN-OBSERVED_COUNTS(1,i))./VARIANCE).^2).^0.5;
|
|
80
|
|
81 %Calculate the p-value
|
|
82 P_VALUE=1-gammainc((Y^2)/2,1/2);
|
|
83
|
|
84 P_LIST(i)=P_VALUE;
|
|
85 end
|
|
86 if length(P_LIST)-SKIPPED_TESTS<=0
|
|
87 P_VALUE=1;
|
|
88 else
|
|
89 P_VALUE=min(P_LIST)*(length(P_LIST)-SKIPPED_TESTS);
|
|
90 end
|
|
91
|
|
92 RET_STRUCT={};
|
|
93 return
|
|
94
|
|
95
|
|
96
|