Mercurial > repos > vipints > rdiff
comparison rDiff/src/tools/transform_single_end_reads.m @ 0:0f80a5141704
version 0.3 uploaded
author | vipints |
---|---|
date | Thu, 14 Feb 2013 23:38:36 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0f80a5141704 |
---|---|
1 function [UNIQUE_NEW_EXONS, GRAPHNODES, ORDER_OF_GRAPHNODE, EIRS_IN_SEQ] = transform_single_end_reads(gene, SEQUENCED_LENGTH) | |
2 % This function calculates all regions onto which a read may fall. | |
3 % SEQUENCED_LENGTH is the length of a read. | |
4 | |
5 SPLICINGEVENTS=gene.splicingevents; | |
6 SEQUENCE=gene.sequence; | |
7 EXONSEQUENCE=gene.exonsequence; | |
8 | |
9 NB_OF_TRANS = size(EXONSEQUENCE,1); | |
10 | |
11 NEWEXONS = 1:(length(SPLICINGEVENTS) - 1); | |
12 UNIQUE_NEW_EXONS = NEWEXONS(SEQUENCE>0); | |
13 | |
14 NB_OF_EXONS = length(UNIQUE_NEW_EXONS); | |
15 MAX_EXON_NB_TO_POS = cumsum((NEWEXONS .* SEQUENCE) > 0); | |
16 | |
17 % NB_EXONSEQUENCE is for each transcript the position in | |
18 % SPLICINGEVENTS where one of its exonic region starts and 0 if it | |
19 % is intronic | |
20 | |
21 NB_EXONSEQUENCE = EXONSEQUENCE .* repmat(NEWEXONS,NB_OF_TRANS,1); | |
22 | |
23 %%% This contains all exons | |
24 GRAPHNODES = []; | |
25 | |
26 %%% This is the array where a 1 is in column j if that EIR (Exons in | |
27 %a region covered by a read) is contained in the transcript | |
28 EIRS_IN_SEQ = []; | |
29 | |
30 CURRENT_NODE = 0;% To catch errors with non-initalized variable | |
31 | |
32 EIR_TRANSCRIPTS = cell(1,NB_OF_TRANS); | |
33 ORDER_OF_GRAPHNODE = cell(1,NB_OF_TRANS); | |
34 LENGTHS_OF_GRAPHNODE = cell(1,NB_OF_TRANS); | |
35 | |
36 SPLICING_LENGTHS = SPLICINGEVENTS(2:end) - SPLICINGEVENTS(1:end-1); | |
37 | |
38 for i = 1:NB_OF_TRANS | |
39 | |
40 %%% remove zero positions, adjust splicing positions | |
41 CURRENT_EXONS = NB_EXONSEQUENCE(i, EXONSEQUENCE(i,:) > 0); | |
42 | |
43 SPLICING_CORRECT = cumsum(SPLICING_LENGTHS .* (NB_EXONSEQUENCE(i,:) == 0)); | |
44 %SPLICING_CORRECT contains the position of SPLICINGSEQUENCE in | |
45 %the transcript when all introns are spliced out | |
46 SPLICING_CORRECT = [1, SPLICINGEVENTS(2:end) - SPLICING_CORRECT]; | |
47 | |
48 %This ensures that the end of the transcript is also in CURRENT_SPLICINGEVENTS | |
49 IDX = EXONSEQUENCE(i,:) == 1 ; | |
50 IDX(find(EXONSEQUENCE(i,:) == 1, 1, 'last') + 1) = true; | |
51 CURRENT_SPLICINGEVENTS = SPLICING_CORRECT(IDX); | |
52 if length(CURRENT_SPLICINGEVENTS) == 0 | |
53 continue | |
54 end | |
55 LASTPOS = CURRENT_SPLICINGEVENTS(end); | |
56 if LASTPOS <= SEQUENCED_LENGTH | |
57 warning('CURRENT_SPLICINGEVENTS(end) > SEQUENCED_LENGTH') | |
58 end | |
59 %assert(LASTPOS > SEQUENCED_LENGTH,'CURRENT_SPLICINGEVENTS(end) > SEQUENCED_LENGTH') | |
60 % Calculate the positions when the EIRS can change | |
61 | |
62 % Determine positions which start SEQUENCED_LENGTH positions before a splicing event | |
63 % defines a window of size SEQUENCED_LENGTH around the CURRENT_SPLICINGEVENTS | |
64 READEVENTS_START = max([CURRENT_SPLICINGEVENTS(1:end - 1) - SEQUENCED_LENGTH + 1; ones(1,length(CURRENT_SPLICINGEVENTS)-1)],[],1); | |
65 READEVENTS_END = min([CURRENT_SPLICINGEVENTS(2:end); repmat(LASTPOS - SEQUENCED_LENGTH,1,length(CURRENT_SPLICINGEVENTS(2:end)))],[],1); | |
66 | |
67 % Calculate EIRS | |
68 % CHANGE_POINTS are those points in a transcript where a EIR changes, namly the splicesites of that transcript plus and | |
69 % minus the SEQUENCED_LENGTH - the above descibed window | |
70 CHANGE_POINTS = unique([READEVENTS_START, READEVENTS_END]); | |
71 | |
72 for j = 1:(length(CHANGE_POINTS) - 1) | |
73 | |
74 POINTS_OF_INTEREST = ( READEVENTS_START(1,:) <= CHANGE_POINTS(j)) & (READEVENTS_END(1,:) > CHANGE_POINTS(j)); | |
75 | |
76 % MAX_EXON_NB_TO_POS is mapping back to the unspliced coordinates | |
77 CURRENT_EIRS = zeros(1,NB_OF_EXONS); | |
78 CURRENT_EIRS( MAX_EXON_NB_TO_POS(CURRENT_EXONS(POINTS_OF_INTEREST))) = 1; | |
79 | |
80 %%% Already seen such exon composition in sliding | |
81 %%% window? | |
82 [TEMP, CURRENT_NODE] = intersect(GRAPHNODES, CURRENT_EIRS, 'rows'); | |
83 if isempty(TEMP) | |
84 GRAPHNODES = [GRAPHNODES; CURRENT_EIRS]; %Add Key | |
85 EIRS_IN_SEQ = [EIRS_IN_SEQ, zeros(NB_OF_TRANS,1)]; | |
86 CURRENT_NODE = size(GRAPHNODES,1); | |
87 end | |
88 | |
89 EIRS_IN_SEQ(i,CURRENT_NODE) = 1; | |
90 ORDER_OF_GRAPHNODE{i} = [ORDER_OF_GRAPHNODE{i}, CURRENT_NODE]; | |
91 LENGTHS_OF_GRAPHNODE{i} = [LENGTHS_OF_GRAPHNODE{i}, [CHANGE_POINTS(j); CHANGE_POINTS(j+1)]]; | |
92 end | |
93 end | |
94 |