annotate all stuff/difference finder for 2 overlaps proper names 7-7_1-15-2019.R @ 6:8fa6b79a2f19 draft

Uploaded
author jfb
date Fri, 19 Apr 2019 16:41:00 -0400
parents 74ada21ceb70
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
1 #Difference finder for only 2
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
2
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
3 #ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
4 FullMotifsOnly_questionmark<-"NO"
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
5 #If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
6 TruncatedMotifsOnly_questionmark<-"NO"
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
7
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
8 FirstSubstrateSet<- read.csv("Substrates 1A TiO2 and FeNTA no duplicates.csv", stringsAsFactors=FALSE)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
9 Firstsubbackfreq<- read.csv("SBF 1A TiO2 and FeNTA no duplicates.csv", header=FALSE, stringsAsFactors=FALSE)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
10
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
11 SecondSubstrateSet<- read.csv("Substrates 1B TiO2 and FeNTA no duplicates.csv", stringsAsFactors=FALSE)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
12 Secondsubbackfreq<- read.csv("SBF 1B TiO2 and FeNTA no duplicates.csv", header=FALSE, stringsAsFactors=FALSE)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
13
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
14 First_unshared_motifs_table<-"Substrates 1A no duplicates no negatives.csv"
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
15 First_unshared_subbackfreq<-"SBF 1A no duplicates no negatives.csv"
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
16
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
17 Second_unshared_motifs_table<-"Substrates 1B no duplicates no negatives.csv"
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
18 Second_unshared_subbackfreq<-"SBF 1B no duplicates no negatives.csv"
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
19
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
20
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
21 LeftOfYLetters<-7
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
22 RightOfYLetters<-7
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
23
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
24 if (FullMotifsOnly_questionmark=="YES"){
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
25 FirstMotifs=rep(NA,times=nrow(FirstSubstrateSet))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
26 FirstAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
27 leftspaces<-c()
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
28 rightspaces<-c()
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
29 for (i in 1:nrow(FirstSubstrateSet)){
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
30 FirstLetters<-FirstSubstrateSet[i,7:15]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
31 FirstLetters<-FirstLetters[FirstLetters !="XXXXX"]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
32 FirstLetters<-paste(FirstLetters, sep="", collapse="")
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
33
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
34
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
35 YYYmotif <- unlist(strsplit(FirstLetters, split = ""))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
36 YYYposition <- match(x = "x", table = YYYmotif)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
37 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
38 #just 3 letters to the left of x
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
39
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
40 YYYLettersToTheLeft <- YYYposition - 1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
41 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
42 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
43 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
44 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
45 #variable the user puts in is
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
46
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
47 if (YYYLettersToTheLeft > 3 && YYYLettersToTheRight > 3) {
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
48 motif<-YYYmotif
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
49 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
50 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
51 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
52 FirstLetters<-motif
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
53 FirstMotifs[i]<-FirstLetters
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
54 FirstAccessionNumbers[i]<-FirstSubstrateSet[i,3]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
55 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
56
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
57 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
58 # FirstMotifs <- FirstMotifs[!is.na(FirstMotifs)]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
59 # FirstMotifs<-matrix(FirstMotifs,ncol = 1)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
60 #
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
61
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
62 SecondMotifs=rep(NA,times=nrow(FirstSubstrateSet))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
63 SecondAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
64
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
65 for (i in 1:nrow(SecondSubstrateSet)){
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
66 SecondLetters<-SecondSubstrateSet[i,7:15]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
67 SecondLetters<-SecondLetters[SecondLetters !="XXXXX"]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
68 SecondLetters<-paste(SecondLetters, sep="", collapse="")
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
69
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
70
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
71 YYYmotif <- unlist(strsplit(SecondLetters, split = ""))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
72 YYYposition <- match(x = "x", table = YYYmotif)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
73 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
74 #just 3 letters to the left of x
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
75
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
76 YYYLettersToTheLeft <- YYYposition - 1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
77 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
78 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
79 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
80 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
81 #variable the user puts in is
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
82
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
83 if (YYYLettersToTheLeft > 3 && YYYLettersToTheRight > 3) {
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
84 motif<-YYYmotif
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
85 #add blank spaces if the motif has less than 4 letters to the left/right
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
86 motif<-c(leftspaces,YYYmotif,rightspaces)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
87 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
88 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
89 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
90 SecondLetters<-motif
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
91 SecondMotifs[i]<-SecondLetters
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
92 SecondAccessionNumbers[i]<-SecondSubstrateSet[i,3]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
93 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
94 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
95 names(FirstMotifs)<-FirstAccessionNumbers
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
96 names(SecondMotifs)<-SecondAccessionNumbers
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
97
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
98
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
99 # ITDmotifs=rep(NA,times=nrow(FirstSubstrateSet))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
100 # ITDAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
101 if(1==0){
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
102 for (i in 1:nrow(ThirdSubstrateSet)){
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
103 ITDletters<-ThirdSubstrateSet[i,7:15]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
104 ITDletters<-ITDletters[ITDletters !="XXXXX"]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
105 ITDletters<-paste(ITDletters, sep="", collapse="")
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
106 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
107 YYYposition <- match(x = "x", table = YYYmotif)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
108 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
109 #just 3 letters to the left of x
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
110
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
111 YYYLettersToTheLeft <- YYYposition - 1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
112 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
113 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
114 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
115 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
116 #variable the user puts in is
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
117
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
118 if (YYYLettersToTheLeft > 3 && YYYLettersToTheRight > 3) {
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
119 motif<-YYYmotif
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
120 #add blank spaces if the motif has less than 4 letters to the left/right
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
121 motif<-c(leftspaces,YYYmotif,rightspaces)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
122 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
123 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
124 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
125 ITDletters<-motif
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
126 ITDmotifs[i]<-ITDletters
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
127 ITDAccessionNumbers[i]<-ThirdSubstrateSet[i,3]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
128
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
129 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
130 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
131 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
132 #names(ITDmotifs)<-ITDAccessionNumbers
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
133 names(SecondMotifs)<-SecondAccessionNumbers
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
134 names(FirstMotifs)<-FirstAccessionNumbers
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
135 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
136
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
137
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
138 ##############################################3
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
139 #Truncated only
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
140 if (TruncatedMotifsOnly_questionmark=="YES"){
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
141 FirstMotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
142 FirstAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
143
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
144 for (i in 1:nrow(FirstSubstrateSet)){
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
145 FirstLetters<-FirstSubstrateSet[i,7:15]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
146 FirstLetters<-FirstLetters[FirstLetters !="XXXXX"]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
147 FirstLetters<-paste(FirstLetters, sep="", collapse="")
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
148
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
149
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
150 YYYmotif <- unlist(strsplit(FirstLetters, split = ""))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
151 YYYposition <- match(x = "x", table = YYYmotif)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
152 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
153 #just 3 letters to the left of x
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
154
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
155 YYYLettersToTheLeft <- YYYposition - 1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
156 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
157 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
158 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
159 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
160 #variable the user puts in is
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
161
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
162 if (YYYLettersToTheLeft < 4 | YYYLettersToTheRight < 4) {
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
163 leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
164 rightspaces<-rep(" ",times=4-(YYYLettersToTheRight))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
165 #add blank spaces if the motif has less than 4 letters to the left/right
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
166 motif<-c(leftspaces,YYYmotif,rightspaces)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
167 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
168 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
169 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
170 FirstLetters<-motif
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
171 FirstMotifs[i,1]<-FirstLetters
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
172 FirstAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
173 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
174
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
175 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
176
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
177 SecondMotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
178 SecondAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
179 i=2
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
180 for (i in 1:nrow(SecondSubstrateSet)){
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
181 SecondLetters<-SecondSubstrateSet[i,7:15]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
182 SecondLetters<-SecondLetters[SecondLetters !="XXXXX"]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
183 SecondLetters<-paste(SecondLetters, sep="", collapse="")
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
184
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
185
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
186 YYYmotif <- unlist(strsplit(SecondLetters, split = ""))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
187 YYYposition <- match(x = "x", table = YYYmotif)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
188 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
189 #just 3 letters to the left of x
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
190
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
191 YYYLettersToTheLeft <- YYYposition - 1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
192 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
193 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
194 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
195 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
196 #variable the user puts in is
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
197
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
198 if (YYYLettersToTheLeft < 4 | YYYLettersToTheRight < 4) {
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
199 leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
200 rightspaces<-rep(" ",times=4-(YYYLettersToTheRight))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
201 #add blank spaces if the motif has less than 4 letters to the left/right
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
202 motif<-c(leftspaces,YYYmotif,rightspaces)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
203 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
204 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
205 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
206 SecondLetters<-motif
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
207 SecondAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
208 SecondMotifs[i,1]<-SecondLetters
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
209 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
210 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
211
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
212 # ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
213 # ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
214 if(1==0){
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
215 for (i in 1:nrow(ThirdSubstrateSet)){
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
216 ITDletters<-ThirdSubstrateSet[i,7:15]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
217 ITDletters<-ITDletters[ITDletters !="XXXXX"]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
218 ITDletters<-paste(ITDletters, sep="", collapse="")
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
219 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
220 YYYposition <- match(x = "x", table = YYYmotif)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
221 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
222 #just 3 letters to the left of x
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
223
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
224 YYYLettersToTheLeft <- YYYposition - 1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
225 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
226 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
227 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
228 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
229 #variable the user puts in is
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
230
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
231 if (YYYLettersToTheLeft < 4 | YYYLettersToTheRight < 4) {
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
232 leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
233 rightspaces<-rep(" ",times=4-(YYYLettersToTheRight))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
234 #add blank spaces if the motif has less than 4 letters to the left/right
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
235 motif<-c(leftspaces,YYYmotif,rightspaces)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
236 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
237 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
238 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
239 ITDletters<-motif
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
240 ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
241 ITDmotifs[i,1]<-ITDletters
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
242 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
243 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
244 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
245 names(FirstMotifs)<-FirstAccessionNumbers
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
246 names(SecondMotifs)<-SecondAccessionNumbers
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
247 #names(ITDmotifs)<-ITDAccessionNumbers
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
248 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
249
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
250 ###############################################
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
251 #ALL motifs, full and truncated
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
252
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
253 if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
254 #print("!")}
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
255 FirstMotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
256 FirstAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
257
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
258 for (i in 1:nrow(FirstSubstrateSet)){
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
259 FirstLetters<-FirstSubstrateSet[i,7:15]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
260 FirstLetters<-FirstLetters[FirstLetters !="XXXXX"]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
261 FirstLetters<-paste(FirstLetters, sep="", collapse="")
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
262 leftspaces<-c()
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
263 rightspaces<-c()
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
264
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
265 YYYmotif <- unlist(strsplit(FirstLetters, split = ""))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
266 YYYposition <- match(x = "x", table = YYYmotif)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
267 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
268 #just 3 letters to the left of x
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
269
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
270 YYYLettersToTheLeft <- YYYposition - 1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
271 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
272 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
273 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
274 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
275 #variable the user puts in is
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
276
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
277
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
278 if (YYYLettersToTheLeft < 4 | YYYLettersToTheRight < 4) {
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
279 leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
280 rightspaces<-rep(" ",times=4-(YYYLettersToTheRight))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
281 #add blank spaces if the motif has less than 4 letters to the left/right
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
282 motif<-c(leftspaces,YYYmotif,rightspaces)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
283 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
284 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
285 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
286 FirstLetters<-motif
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
287 FirstMotifs[i,1]<-FirstLetters
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
288 FirstAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
289 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
290
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
291 if(YYYLettersToTheLeft>3 && YYYLettersToTheRight>3){
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
292 motif<-YYYmotif
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
293 #add blank spaces if the motif has less than 4 letters to the left/right
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
294 motif<-c(leftspaces,YYYmotif,rightspaces)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
295 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
296 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
297 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
298 FirstLetters<-motif
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
299 FirstMotifs[i,1]<-FirstLetters
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
300 FirstAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
301
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
302
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
303 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
304
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
305 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
306
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
307 SecondMotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
308 SecondAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
309
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
310 for (i in 1:nrow(SecondSubstrateSet)){
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
311 SecondLetters<-SecondSubstrateSet[i,7:15]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
312 SecondLetters<-SecondLetters[SecondLetters !="XXXXX"]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
313 SecondLetters<-paste(SecondLetters, sep="", collapse="")
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
314 leftspaces<-c()
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
315 rightspaces<-c()
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
316
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
317 YYYmotif <- unlist(strsplit(SecondLetters, split = ""))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
318 YYYposition <- match(x = "x", table = YYYmotif)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
319 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
320 #just 3 letters to the left of x
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
321
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
322 YYYLettersToTheLeft <- YYYposition - 1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
323 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
324 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
325 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
326 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
327 #variable the user puts in is
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
328 if (YYYLettersToTheLeft < 4 | YYYLettersToTheRight < 4) {
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
329 leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
330 rightspaces<-rep(" ",times=4-(YYYLettersToTheRight))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
331 #add blank spaces if the motif has less than 4 letters to the left/right
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
332 motif<-c(leftspaces,YYYmotif,rightspaces)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
333 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
334 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
335 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
336 SecondLetters<-motif
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
337 SecondMotifs[i,1]<-SecondLetters
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
338 SecondAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
339 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
340
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
341 if(YYYLettersToTheLeft>3 && YYYLettersToTheRight>3){
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
342 motif<-YYYmotif
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
343 #add blank spaces if the motif has less than 4 letters to the left/right
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
344 motif<-c(leftspaces,YYYmotif,rightspaces)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
345 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
346 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
347 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
348 SecondLetters<-motif
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
349 SecondMotifs[i,1]<-SecondLetters
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
350 SecondAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
351 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
352 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
353
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
354
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
355 #ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
356 #ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
357 if(1==0){
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
358 for (i in 1:nrow(ThirdSubstrateSet)){
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
359 ITDletters<-ThirdSubstrateSet[i,7:15]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
360 ITDletters<-ITDletters[ITDletters !="XXXXX"]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
361 ITDletters<-paste(ITDletters, sep="", collapse="")
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
362 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
363 leftspaces<-c()
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
364 rightspaces<-c()
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
365 YYYposition <- match(x = "x", table = YYYmotif)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
366 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
367 #just 3 letters to the left of x
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
368
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
369 YYYLettersToTheLeft <- YYYposition - 1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
370 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
371 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
372 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
373 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
374 #variable the user puts in is
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
375 if (YYYLettersToTheLeft < 4 | YYYLettersToTheRight < 4) {
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
376 leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
377 rightspaces<-rep(" ",times=4-(YYYLettersToTheRight))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
378 #add blank spaces if the motif has less than 4 letters to the left/right
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
379 motif<-c(leftspaces,YYYmotif,rightspaces)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
380 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
381 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
382 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
383 ITDletters<-motif
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
384 ITDmotifs[i,1]<-ITDletters
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
385 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
386 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
387
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
388 if(YYYLettersToTheLeft>3 && YYYLettersToTheRight>3){
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
389 motif<-YYYmotif
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
390 #add blank spaces if the motif has less than 4 letters to the left/right
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
391 motif<-c(leftspaces,YYYmotif,rightspaces)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
392 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
393 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
394 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
395 ITDletters<-motif
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
396 ITDmotifs[i,1]<-ITDletters
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
397 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
398 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
399 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
400 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
401 names(FirstMotifs)<-FirstAccessionNumbers
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
402 names(SecondMotifs)<-SecondAccessionNumbers
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
403 #names(ITDmotifs)<-ITDAccessionNumbers
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
404 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
405
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
406
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
407 FirstMotifsFINAL<-FirstMotifs[!FirstMotifs %in% SecondMotifs]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
408 #FirstMotifsFINAL<-FirstMotifsFINAL[!FirstMotifsFINAL %in% ITDmotifs]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
409 FirstMotifsFINAL<-FirstMotifsFINAL[!duplicated(FirstMotifsFINAL)]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
410
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
411
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
412 # ITDmotifsFINAL<-ITDmotifs[!ITDmotifs %in% SecondMotifs]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
413 # ITDmotifsFINAL<-ITDmotifsFINAL[!ITDmotifsFINAL %in% FirstMotifs]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
414 # ITDmotifsFINAL<-ITDmotifsFINAL[!duplicated(ITDmotifsFINAL)]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
415
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
416
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
417 SecondMotifsFINAL<-SecondMotifs[!SecondMotifs %in% FirstMotifs]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
418 #SecondMotifsFINAL<-SecondMotifsFINAL[!SecondMotifsFINAL %in% ITDmotifs]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
419 SecondMotifsFINAL<-SecondMotifsFINAL[!duplicated(SecondMotifsFINAL)]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
420
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
421
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
422 columnalheader<-c(rep(NA,36))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
423 FirstFinalMatrix<-matrix(data =columnalheader,nrow = 1)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
424
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
425 for (k in 1:length(FirstMotifsFINAL)) {
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
426 AN<-00000
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
427 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
428 #destroyed immediately after use
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
429 for (m in 2:ncol(Firstsubbackfreq)) {
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
430 AN <- as.character(Firstsubbackfreq[2, m])
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
431 #print(AN)}
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
432 if (grepl(pattern = AN,
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
433 x = names(FirstMotifsFINAL[k]),
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
434 fixed = TRUE) == TRUE) {
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
435 outputmatrix <- as.character(Firstsubbackfreq[, m])
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
436 outputmatrix <- matrix(outputmatrix, nrow = 1)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
437 #with that accession number, find a match in the subbackfreq file and save it here
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
438 FirstFinalMatrix<-rbind(FirstFinalMatrix,outputmatrix)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
439 # print(AN,outputmatrix)}
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
440 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
441 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
442 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
443 FirstFinalMatrix<-FirstFinalMatrix[!duplicated(FirstFinalMatrix),]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
444
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
445 #columnalheader<-c(rep(NA,36))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
446 #ITDFinalMatrix<-matrix(data =columnalheader,nrow = 1)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
447
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
448 if(1==0){
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
449 for (k in 1:length(ITDmotifsFINAL)) {
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
450 AN<-00000
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
451 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
452 #destroyed immediately after use
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
453 for (m in 1:ncol(Thirdsubbackfreq)) {
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
454 AN <- as.character(Thirdsubbackfreq[1, m])
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
455 if (grepl(pattern = AN,
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
456 x = names(ITDmotifsFINAL[k]),
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
457 fixed = TRUE) == TRUE) {
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
458 outputmatrix <- as.character(Thirdsubbackfreq[, m])
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
459 outputmatrix <- matrix(outputmatrix, nrow = 1)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
460 #with that accession number, find a match in the subbackfreq file and save it here
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
461 ITDFinalMatrix<-rbind(ITDFinalMatrix,outputmatrix)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
462 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
463 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
464 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
465 ITDFinalMatrix<-ITDFinalMatrix[!duplicated(ITDFinalMatrix),]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
466 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
467
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
468 columnalheader<-c(rep(NA,36))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
469 SecondFinalMatrix<-matrix(data =columnalheader,nrow = 1)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
470
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
471 for (k in 1:length(SecondMotifsFINAL)) {
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
472 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
473 #destroyed immediately after use
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
474 for (m in 1:ncol(Secondsubbackfreq)) {
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
475 AN <- as.character(Secondsubbackfreq[1, m])
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
476 if (grepl(pattern = AN,
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
477 x = names(SecondMotifsFINAL[k]),
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
478 fixed = TRUE) == TRUE) {
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
479 outputmatrix <- as.character(Secondsubbackfreq[, m])
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
480 outputmatrix <- matrix(outputmatrix, nrow = 1)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
481 #with that accession number, find a match in the subbackfreq file and save it here
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
482 SecondFinalMatrix<-rbind(SecondFinalMatrix,outputmatrix)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
483 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
484 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
485 }
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
486 SecondFinalMatrix<-SecondFinalMatrix[!duplicated(SecondFinalMatrix),]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
487 FTLoutputmatrix<-matrix(data=c(FirstMotifsFINAL,names(FirstMotifsFINAL)),ncol = 2)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
488
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
489
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
490 write.table(x=FTLoutputmatrix,
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
491 file=First_unshared_motifs_table,
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
492 quote=FALSE, sep=",",
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
493 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
494
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
495 columnalheader<-c(as.character(Firstsubbackfreq[1:36,1]))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
496 columnalheader<-matrix(columnalheader,nrow = 1)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
497 write.table(x=columnalheader,
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
498 file=First_unshared_subbackfreq,
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
499 quote=FALSE, sep=",",
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
500 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
501 FirstFinalMatrix<-FirstFinalMatrix[2:nrow(FirstFinalMatrix),]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
502 write.table(x=FirstFinalMatrix,
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
503 file=First_unshared_subbackfreq,
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
504 quote=FALSE, sep=",",
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
505 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
506
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
507 ############################################################################################################
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
508
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
509 D835Youtputmatrix<-matrix(data=c(SecondMotifsFINAL,names(SecondMotifsFINAL)),ncol = 2)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
510
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
511 write.table(x=D835Youtputmatrix,
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
512 file=Second_unshared_motifs_table,
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
513 quote=FALSE, sep=",",
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
514 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
515
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
516 columnalheader<-c(as.character(Firstsubbackfreq[1:36,1]))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
517 columnalheader<-matrix(columnalheader,nrow = 1)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
518 write.table(x=columnalheader,
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
519 file=Second_unshared_subbackfreq,
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
520 quote=FALSE, sep=",",
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
521 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
522 SecondFinalMatrix<-SecondFinalMatrix[2:nrow(SecondFinalMatrix),]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
523 write.table(x=SecondFinalMatrix,
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
524 file=Second_unshared_subbackfreq,
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
525 quote=FALSE, sep=",",
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
526 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
527
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
528 ############################################################################################################
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
529
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
530 # ITDoutputmatrix<-matrix(data = c(ITDmotifsFINAL,names(ITDmotifsFINAL)),ncol = 2)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
531 #
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
532 # write.table(x=ITDoutputmatrix,
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
533 # file=Third_unshared_motifs_table,
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
534 # quote=FALSE, sep=",",
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
535 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
536 #
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
537 # columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1]))
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
538 # columnalheader<-matrix(columnalheader,nrow = 1)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
539 # write.table(x=columnalheader,
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
540 # file=Third_unshared_subbackfreq,
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
541 # quote=FALSE, sep=",",
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
542 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
543 # ITDFinalMatrix<-ITDFinalMatrix[2:nrow(ITDFinalMatrix),]
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
544 # write.table(x=ITDFinalMatrix,
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
545 # file=Third_unshared_subbackfreq,
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
546 # quote=FALSE, sep=",",
74ada21ceb70 Uploaded
jfb
parents:
diff changeset
547 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)