difference_finder: all stuff/difference finder for 2 overlaps proper names 7-7

annotate all stuff/difference finder for 2 overlaps proper names 7-7_1-15-2019.R @ 6:8fa6b79a2f19 draft

Uploaded

author	jfb
date	Fri, 19 Apr 2019 16:41:00 -0400
parents	74ada21ceb70
children

rev	line source
4 74ada21ceb70 Uploaded jfb parents: diff changeset	1 #Difference finder for only 2
74ada21ceb70 Uploaded jfb parents: diff changeset	2
74ada21ceb70 Uploaded jfb parents: diff changeset	3 #ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps
74ada21ceb70 Uploaded jfb parents: diff changeset	4 FullMotifsOnly_questionmark<-"NO"
74ada21ceb70 Uploaded jfb parents: diff changeset	5 #If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps
74ada21ceb70 Uploaded jfb parents: diff changeset	6 TruncatedMotifsOnly_questionmark<-"NO"
74ada21ceb70 Uploaded jfb parents: diff changeset	7
74ada21ceb70 Uploaded jfb parents: diff changeset	8 FirstSubstrateSet<- read.csv("Substrates 1A TiO2 and FeNTA no duplicates.csv", stringsAsFactors=FALSE)
74ada21ceb70 Uploaded jfb parents: diff changeset	9 Firstsubbackfreq<- read.csv("SBF 1A TiO2 and FeNTA no duplicates.csv", header=FALSE, stringsAsFactors=FALSE)
74ada21ceb70 Uploaded jfb parents: diff changeset	10
74ada21ceb70 Uploaded jfb parents: diff changeset	11 SecondSubstrateSet<- read.csv("Substrates 1B TiO2 and FeNTA no duplicates.csv", stringsAsFactors=FALSE)
74ada21ceb70 Uploaded jfb parents: diff changeset	12 Secondsubbackfreq<- read.csv("SBF 1B TiO2 and FeNTA no duplicates.csv", header=FALSE, stringsAsFactors=FALSE)
74ada21ceb70 Uploaded jfb parents: diff changeset	13
74ada21ceb70 Uploaded jfb parents: diff changeset	14 First_unshared_motifs_table<-"Substrates 1A no duplicates no negatives.csv"
74ada21ceb70 Uploaded jfb parents: diff changeset	15 First_unshared_subbackfreq<-"SBF 1A no duplicates no negatives.csv"
74ada21ceb70 Uploaded jfb parents: diff changeset	16
74ada21ceb70 Uploaded jfb parents: diff changeset	17 Second_unshared_motifs_table<-"Substrates 1B no duplicates no negatives.csv"
74ada21ceb70 Uploaded jfb parents: diff changeset	18 Second_unshared_subbackfreq<-"SBF 1B no duplicates no negatives.csv"
74ada21ceb70 Uploaded jfb parents: diff changeset	19
74ada21ceb70 Uploaded jfb parents: diff changeset	20
74ada21ceb70 Uploaded jfb parents: diff changeset	21 LeftOfYLetters<-7
74ada21ceb70 Uploaded jfb parents: diff changeset	22 RightOfYLetters<-7
74ada21ceb70 Uploaded jfb parents: diff changeset	23
74ada21ceb70 Uploaded jfb parents: diff changeset	24 if (FullMotifsOnly_questionmark=="YES"){
74ada21ceb70 Uploaded jfb parents: diff changeset	25 FirstMotifs=rep(NA,times=nrow(FirstSubstrateSet))
74ada21ceb70 Uploaded jfb parents: diff changeset	26 FirstAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet))
74ada21ceb70 Uploaded jfb parents: diff changeset	27 leftspaces<-c()
74ada21ceb70 Uploaded jfb parents: diff changeset	28 rightspaces<-c()
74ada21ceb70 Uploaded jfb parents: diff changeset	29 for (i in 1:nrow(FirstSubstrateSet)){
74ada21ceb70 Uploaded jfb parents: diff changeset	30 FirstLetters<-FirstSubstrateSet[i,7:15]
74ada21ceb70 Uploaded jfb parents: diff changeset	31 FirstLetters<-FirstLetters[FirstLetters !="XXXXX"]
74ada21ceb70 Uploaded jfb parents: diff changeset	32 FirstLetters<-paste(FirstLetters, sep="", collapse="")
74ada21ceb70 Uploaded jfb parents: diff changeset	33
74ada21ceb70 Uploaded jfb parents: diff changeset	34
74ada21ceb70 Uploaded jfb parents: diff changeset	35 YYYmotif <- unlist(strsplit(FirstLetters, split = ""))
74ada21ceb70 Uploaded jfb parents: diff changeset	36 YYYposition <- match(x = "x", table = YYYmotif)
74ada21ceb70 Uploaded jfb parents: diff changeset	37 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
74ada21ceb70 Uploaded jfb parents: diff changeset	38 #just 3 letters to the left of x
74ada21ceb70 Uploaded jfb parents: diff changeset	39
74ada21ceb70 Uploaded jfb parents: diff changeset	40 YYYLettersToTheLeft <- YYYposition - 1
74ada21ceb70 Uploaded jfb parents: diff changeset	41 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
74ada21ceb70 Uploaded jfb parents: diff changeset	42 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
74ada21ceb70 Uploaded jfb parents: diff changeset	43 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
74ada21ceb70 Uploaded jfb parents: diff changeset	44 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
74ada21ceb70 Uploaded jfb parents: diff changeset	45 #variable the user puts in is
74ada21ceb70 Uploaded jfb parents: diff changeset	46
74ada21ceb70 Uploaded jfb parents: diff changeset	47 if (YYYLettersToTheLeft > 3 && YYYLettersToTheRight > 3) {
74ada21ceb70 Uploaded jfb parents: diff changeset	48 motif<-YYYmotif
74ada21ceb70 Uploaded jfb parents: diff changeset	49 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded jfb parents: diff changeset	50 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded jfb parents: diff changeset	51 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded jfb parents: diff changeset	52 FirstLetters<-motif
74ada21ceb70 Uploaded jfb parents: diff changeset	53 FirstMotifs[i]<-FirstLetters
74ada21ceb70 Uploaded jfb parents: diff changeset	54 FirstAccessionNumbers[i]<-FirstSubstrateSet[i,3]
74ada21ceb70 Uploaded jfb parents: diff changeset	55 }
74ada21ceb70 Uploaded jfb parents: diff changeset	56
74ada21ceb70 Uploaded jfb parents: diff changeset	57 }
74ada21ceb70 Uploaded jfb parents: diff changeset	58 # FirstMotifs <- FirstMotifs[!is.na(FirstMotifs)]
74ada21ceb70 Uploaded jfb parents: diff changeset	59 # FirstMotifs<-matrix(FirstMotifs,ncol = 1)
74ada21ceb70 Uploaded jfb parents: diff changeset	60 #
74ada21ceb70 Uploaded jfb parents: diff changeset	61
74ada21ceb70 Uploaded jfb parents: diff changeset	62 SecondMotifs=rep(NA,times=nrow(FirstSubstrateSet))
74ada21ceb70 Uploaded jfb parents: diff changeset	63 SecondAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet))
74ada21ceb70 Uploaded jfb parents: diff changeset	64
74ada21ceb70 Uploaded jfb parents: diff changeset	65 for (i in 1:nrow(SecondSubstrateSet)){
74ada21ceb70 Uploaded jfb parents: diff changeset	66 SecondLetters<-SecondSubstrateSet[i,7:15]
74ada21ceb70 Uploaded jfb parents: diff changeset	67 SecondLetters<-SecondLetters[SecondLetters !="XXXXX"]
74ada21ceb70 Uploaded jfb parents: diff changeset	68 SecondLetters<-paste(SecondLetters, sep="", collapse="")
74ada21ceb70 Uploaded jfb parents: diff changeset	69
74ada21ceb70 Uploaded jfb parents: diff changeset	70
74ada21ceb70 Uploaded jfb parents: diff changeset	71 YYYmotif <- unlist(strsplit(SecondLetters, split = ""))
74ada21ceb70 Uploaded jfb parents: diff changeset	72 YYYposition <- match(x = "x", table = YYYmotif)
74ada21ceb70 Uploaded jfb parents: diff changeset	73 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
74ada21ceb70 Uploaded jfb parents: diff changeset	74 #just 3 letters to the left of x
74ada21ceb70 Uploaded jfb parents: diff changeset	75
74ada21ceb70 Uploaded jfb parents: diff changeset	76 YYYLettersToTheLeft <- YYYposition - 1
74ada21ceb70 Uploaded jfb parents: diff changeset	77 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
74ada21ceb70 Uploaded jfb parents: diff changeset	78 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
74ada21ceb70 Uploaded jfb parents: diff changeset	79 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
74ada21ceb70 Uploaded jfb parents: diff changeset	80 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
74ada21ceb70 Uploaded jfb parents: diff changeset	81 #variable the user puts in is
74ada21ceb70 Uploaded jfb parents: diff changeset	82
74ada21ceb70 Uploaded jfb parents: diff changeset	83 if (YYYLettersToTheLeft > 3 && YYYLettersToTheRight > 3) {
74ada21ceb70 Uploaded jfb parents: diff changeset	84 motif<-YYYmotif
74ada21ceb70 Uploaded jfb parents: diff changeset	85 #add blank spaces if the motif has less than 4 letters to the left/right
74ada21ceb70 Uploaded jfb parents: diff changeset	86 motif<-c(leftspaces,YYYmotif,rightspaces)
74ada21ceb70 Uploaded jfb parents: diff changeset	87 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded jfb parents: diff changeset	88 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded jfb parents: diff changeset	89 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded jfb parents: diff changeset	90 SecondLetters<-motif
74ada21ceb70 Uploaded jfb parents: diff changeset	91 SecondMotifs[i]<-SecondLetters
74ada21ceb70 Uploaded jfb parents: diff changeset	92 SecondAccessionNumbers[i]<-SecondSubstrateSet[i,3]
74ada21ceb70 Uploaded jfb parents: diff changeset	93 }
74ada21ceb70 Uploaded jfb parents: diff changeset	94 }
74ada21ceb70 Uploaded jfb parents: diff changeset	95 names(FirstMotifs)<-FirstAccessionNumbers
74ada21ceb70 Uploaded jfb parents: diff changeset	96 names(SecondMotifs)<-SecondAccessionNumbers
74ada21ceb70 Uploaded jfb parents: diff changeset	97
74ada21ceb70 Uploaded jfb parents: diff changeset	98
74ada21ceb70 Uploaded jfb parents: diff changeset	99 # ITDmotifs=rep(NA,times=nrow(FirstSubstrateSet))
74ada21ceb70 Uploaded jfb parents: diff changeset	100 # ITDAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet))
74ada21ceb70 Uploaded jfb parents: diff changeset	101 if(1==0){
74ada21ceb70 Uploaded jfb parents: diff changeset	102 for (i in 1:nrow(ThirdSubstrateSet)){
74ada21ceb70 Uploaded jfb parents: diff changeset	103 ITDletters<-ThirdSubstrateSet[i,7:15]
74ada21ceb70 Uploaded jfb parents: diff changeset	104 ITDletters<-ITDletters[ITDletters !="XXXXX"]
74ada21ceb70 Uploaded jfb parents: diff changeset	105 ITDletters<-paste(ITDletters, sep="", collapse="")
74ada21ceb70 Uploaded jfb parents: diff changeset	106 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
74ada21ceb70 Uploaded jfb parents: diff changeset	107 YYYposition <- match(x = "x", table = YYYmotif)
74ada21ceb70 Uploaded jfb parents: diff changeset	108 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
74ada21ceb70 Uploaded jfb parents: diff changeset	109 #just 3 letters to the left of x
74ada21ceb70 Uploaded jfb parents: diff changeset	110
74ada21ceb70 Uploaded jfb parents: diff changeset	111 YYYLettersToTheLeft <- YYYposition - 1
74ada21ceb70 Uploaded jfb parents: diff changeset	112 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
74ada21ceb70 Uploaded jfb parents: diff changeset	113 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
74ada21ceb70 Uploaded jfb parents: diff changeset	114 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
74ada21ceb70 Uploaded jfb parents: diff changeset	115 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
74ada21ceb70 Uploaded jfb parents: diff changeset	116 #variable the user puts in is
74ada21ceb70 Uploaded jfb parents: diff changeset	117
74ada21ceb70 Uploaded jfb parents: diff changeset	118 if (YYYLettersToTheLeft > 3 && YYYLettersToTheRight > 3) {
74ada21ceb70 Uploaded jfb parents: diff changeset	119 motif<-YYYmotif
74ada21ceb70 Uploaded jfb parents: diff changeset	120 #add blank spaces if the motif has less than 4 letters to the left/right
74ada21ceb70 Uploaded jfb parents: diff changeset	121 motif<-c(leftspaces,YYYmotif,rightspaces)
74ada21ceb70 Uploaded jfb parents: diff changeset	122 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded jfb parents: diff changeset	123 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded jfb parents: diff changeset	124 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded jfb parents: diff changeset	125 ITDletters<-motif
74ada21ceb70 Uploaded jfb parents: diff changeset	126 ITDmotifs[i]<-ITDletters
74ada21ceb70 Uploaded jfb parents: diff changeset	127 ITDAccessionNumbers[i]<-ThirdSubstrateSet[i,3]
74ada21ceb70 Uploaded jfb parents: diff changeset	128
74ada21ceb70 Uploaded jfb parents: diff changeset	129 }
74ada21ceb70 Uploaded jfb parents: diff changeset	130 }
74ada21ceb70 Uploaded jfb parents: diff changeset	131 }
74ada21ceb70 Uploaded jfb parents: diff changeset	132 #names(ITDmotifs)<-ITDAccessionNumbers
74ada21ceb70 Uploaded jfb parents: diff changeset	133 names(SecondMotifs)<-SecondAccessionNumbers
74ada21ceb70 Uploaded jfb parents: diff changeset	134 names(FirstMotifs)<-FirstAccessionNumbers
74ada21ceb70 Uploaded jfb parents: diff changeset	135 }
74ada21ceb70 Uploaded jfb parents: diff changeset	136
74ada21ceb70 Uploaded jfb parents: diff changeset	137
74ada21ceb70 Uploaded jfb parents: diff changeset	138 ##############################################3
74ada21ceb70 Uploaded jfb parents: diff changeset	139 #Truncated only
74ada21ceb70 Uploaded jfb parents: diff changeset	140 if (TruncatedMotifsOnly_questionmark=="YES"){
74ada21ceb70 Uploaded jfb parents: diff changeset	141 FirstMotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
74ada21ceb70 Uploaded jfb parents: diff changeset	142 FirstAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
74ada21ceb70 Uploaded jfb parents: diff changeset	143
74ada21ceb70 Uploaded jfb parents: diff changeset	144 for (i in 1:nrow(FirstSubstrateSet)){
74ada21ceb70 Uploaded jfb parents: diff changeset	145 FirstLetters<-FirstSubstrateSet[i,7:15]
74ada21ceb70 Uploaded jfb parents: diff changeset	146 FirstLetters<-FirstLetters[FirstLetters !="XXXXX"]
74ada21ceb70 Uploaded jfb parents: diff changeset	147 FirstLetters<-paste(FirstLetters, sep="", collapse="")
74ada21ceb70 Uploaded jfb parents: diff changeset	148
74ada21ceb70 Uploaded jfb parents: diff changeset	149
74ada21ceb70 Uploaded jfb parents: diff changeset	150 YYYmotif <- unlist(strsplit(FirstLetters, split = ""))
74ada21ceb70 Uploaded jfb parents: diff changeset	151 YYYposition <- match(x = "x", table = YYYmotif)
74ada21ceb70 Uploaded jfb parents: diff changeset	152 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
74ada21ceb70 Uploaded jfb parents: diff changeset	153 #just 3 letters to the left of x
74ada21ceb70 Uploaded jfb parents: diff changeset	154
74ada21ceb70 Uploaded jfb parents: diff changeset	155 YYYLettersToTheLeft <- YYYposition - 1
74ada21ceb70 Uploaded jfb parents: diff changeset	156 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
74ada21ceb70 Uploaded jfb parents: diff changeset	157 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
74ada21ceb70 Uploaded jfb parents: diff changeset	158 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
74ada21ceb70 Uploaded jfb parents: diff changeset	159 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
74ada21ceb70 Uploaded jfb parents: diff changeset	160 #variable the user puts in is
74ada21ceb70 Uploaded jfb parents: diff changeset	161
74ada21ceb70 Uploaded jfb parents: diff changeset	162 if (YYYLettersToTheLeft < 4 \| YYYLettersToTheRight < 4) {
74ada21ceb70 Uploaded jfb parents: diff changeset	163 leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft))
74ada21ceb70 Uploaded jfb parents: diff changeset	164 rightspaces<-rep(" ",times=4-(YYYLettersToTheRight))
74ada21ceb70 Uploaded jfb parents: diff changeset	165 #add blank spaces if the motif has less than 4 letters to the left/right
74ada21ceb70 Uploaded jfb parents: diff changeset	166 motif<-c(leftspaces,YYYmotif,rightspaces)
74ada21ceb70 Uploaded jfb parents: diff changeset	167 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded jfb parents: diff changeset	168 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded jfb parents: diff changeset	169 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded jfb parents: diff changeset	170 FirstLetters<-motif
74ada21ceb70 Uploaded jfb parents: diff changeset	171 FirstMotifs[i,1]<-FirstLetters
74ada21ceb70 Uploaded jfb parents: diff changeset	172 FirstAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
74ada21ceb70 Uploaded jfb parents: diff changeset	173 }
74ada21ceb70 Uploaded jfb parents: diff changeset	174
74ada21ceb70 Uploaded jfb parents: diff changeset	175 }
74ada21ceb70 Uploaded jfb parents: diff changeset	176
74ada21ceb70 Uploaded jfb parents: diff changeset	177 SecondMotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
74ada21ceb70 Uploaded jfb parents: diff changeset	178 SecondAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
74ada21ceb70 Uploaded jfb parents: diff changeset	179 i=2
74ada21ceb70 Uploaded jfb parents: diff changeset	180 for (i in 1:nrow(SecondSubstrateSet)){
74ada21ceb70 Uploaded jfb parents: diff changeset	181 SecondLetters<-SecondSubstrateSet[i,7:15]
74ada21ceb70 Uploaded jfb parents: diff changeset	182 SecondLetters<-SecondLetters[SecondLetters !="XXXXX"]
74ada21ceb70 Uploaded jfb parents: diff changeset	183 SecondLetters<-paste(SecondLetters, sep="", collapse="")
74ada21ceb70 Uploaded jfb parents: diff changeset	184
74ada21ceb70 Uploaded jfb parents: diff changeset	185
74ada21ceb70 Uploaded jfb parents: diff changeset	186 YYYmotif <- unlist(strsplit(SecondLetters, split = ""))
74ada21ceb70 Uploaded jfb parents: diff changeset	187 YYYposition <- match(x = "x", table = YYYmotif)
74ada21ceb70 Uploaded jfb parents: diff changeset	188 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
74ada21ceb70 Uploaded jfb parents: diff changeset	189 #just 3 letters to the left of x
74ada21ceb70 Uploaded jfb parents: diff changeset	190
74ada21ceb70 Uploaded jfb parents: diff changeset	191 YYYLettersToTheLeft <- YYYposition - 1
74ada21ceb70 Uploaded jfb parents: diff changeset	192 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
74ada21ceb70 Uploaded jfb parents: diff changeset	193 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
74ada21ceb70 Uploaded jfb parents: diff changeset	194 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
74ada21ceb70 Uploaded jfb parents: diff changeset	195 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
74ada21ceb70 Uploaded jfb parents: diff changeset	196 #variable the user puts in is
74ada21ceb70 Uploaded jfb parents: diff changeset	197
74ada21ceb70 Uploaded jfb parents: diff changeset	198 if (YYYLettersToTheLeft < 4 \| YYYLettersToTheRight < 4) {
74ada21ceb70 Uploaded jfb parents: diff changeset	199 leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft))
74ada21ceb70 Uploaded jfb parents: diff changeset	200 rightspaces<-rep(" ",times=4-(YYYLettersToTheRight))
74ada21ceb70 Uploaded jfb parents: diff changeset	201 #add blank spaces if the motif has less than 4 letters to the left/right
74ada21ceb70 Uploaded jfb parents: diff changeset	202 motif<-c(leftspaces,YYYmotif,rightspaces)
74ada21ceb70 Uploaded jfb parents: diff changeset	203 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded jfb parents: diff changeset	204 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded jfb parents: diff changeset	205 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded jfb parents: diff changeset	206 SecondLetters<-motif
74ada21ceb70 Uploaded jfb parents: diff changeset	207 SecondAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
74ada21ceb70 Uploaded jfb parents: diff changeset	208 SecondMotifs[i,1]<-SecondLetters
74ada21ceb70 Uploaded jfb parents: diff changeset	209 }
74ada21ceb70 Uploaded jfb parents: diff changeset	210 }
74ada21ceb70 Uploaded jfb parents: diff changeset	211
74ada21ceb70 Uploaded jfb parents: diff changeset	212 # ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
74ada21ceb70 Uploaded jfb parents: diff changeset	213 # ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
74ada21ceb70 Uploaded jfb parents: diff changeset	214 if(1==0){
74ada21ceb70 Uploaded jfb parents: diff changeset	215 for (i in 1:nrow(ThirdSubstrateSet)){
74ada21ceb70 Uploaded jfb parents: diff changeset	216 ITDletters<-ThirdSubstrateSet[i,7:15]
74ada21ceb70 Uploaded jfb parents: diff changeset	217 ITDletters<-ITDletters[ITDletters !="XXXXX"]
74ada21ceb70 Uploaded jfb parents: diff changeset	218 ITDletters<-paste(ITDletters, sep="", collapse="")
74ada21ceb70 Uploaded jfb parents: diff changeset	219 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
74ada21ceb70 Uploaded jfb parents: diff changeset	220 YYYposition <- match(x = "x", table = YYYmotif)
74ada21ceb70 Uploaded jfb parents: diff changeset	221 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
74ada21ceb70 Uploaded jfb parents: diff changeset	222 #just 3 letters to the left of x
74ada21ceb70 Uploaded jfb parents: diff changeset	223
74ada21ceb70 Uploaded jfb parents: diff changeset	224 YYYLettersToTheLeft <- YYYposition - 1
74ada21ceb70 Uploaded jfb parents: diff changeset	225 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
74ada21ceb70 Uploaded jfb parents: diff changeset	226 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
74ada21ceb70 Uploaded jfb parents: diff changeset	227 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
74ada21ceb70 Uploaded jfb parents: diff changeset	228 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
74ada21ceb70 Uploaded jfb parents: diff changeset	229 #variable the user puts in is
74ada21ceb70 Uploaded jfb parents: diff changeset	230
74ada21ceb70 Uploaded jfb parents: diff changeset	231 if (YYYLettersToTheLeft < 4 \| YYYLettersToTheRight < 4) {
74ada21ceb70 Uploaded jfb parents: diff changeset	232 leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft))
74ada21ceb70 Uploaded jfb parents: diff changeset	233 rightspaces<-rep(" ",times=4-(YYYLettersToTheRight))
74ada21ceb70 Uploaded jfb parents: diff changeset	234 #add blank spaces if the motif has less than 4 letters to the left/right
74ada21ceb70 Uploaded jfb parents: diff changeset	235 motif<-c(leftspaces,YYYmotif,rightspaces)
74ada21ceb70 Uploaded jfb parents: diff changeset	236 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded jfb parents: diff changeset	237 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded jfb parents: diff changeset	238 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded jfb parents: diff changeset	239 ITDletters<-motif
74ada21ceb70 Uploaded jfb parents: diff changeset	240 ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3]
74ada21ceb70 Uploaded jfb parents: diff changeset	241 ITDmotifs[i,1]<-ITDletters
74ada21ceb70 Uploaded jfb parents: diff changeset	242 }
74ada21ceb70 Uploaded jfb parents: diff changeset	243 }
74ada21ceb70 Uploaded jfb parents: diff changeset	244 }
74ada21ceb70 Uploaded jfb parents: diff changeset	245 names(FirstMotifs)<-FirstAccessionNumbers
74ada21ceb70 Uploaded jfb parents: diff changeset	246 names(SecondMotifs)<-SecondAccessionNumbers
74ada21ceb70 Uploaded jfb parents: diff changeset	247 #names(ITDmotifs)<-ITDAccessionNumbers
74ada21ceb70 Uploaded jfb parents: diff changeset	248 }
74ada21ceb70 Uploaded jfb parents: diff changeset	249
74ada21ceb70 Uploaded jfb parents: diff changeset	250 ###############################################
74ada21ceb70 Uploaded jfb parents: diff changeset	251 #ALL motifs, full and truncated
74ada21ceb70 Uploaded jfb parents: diff changeset	252
74ada21ceb70 Uploaded jfb parents: diff changeset	253 if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){
74ada21ceb70 Uploaded jfb parents: diff changeset	254 #print("!")}
74ada21ceb70 Uploaded jfb parents: diff changeset	255 FirstMotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
74ada21ceb70 Uploaded jfb parents: diff changeset	256 FirstAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
74ada21ceb70 Uploaded jfb parents: diff changeset	257
74ada21ceb70 Uploaded jfb parents: diff changeset	258 for (i in 1:nrow(FirstSubstrateSet)){
74ada21ceb70 Uploaded jfb parents: diff changeset	259 FirstLetters<-FirstSubstrateSet[i,7:15]
74ada21ceb70 Uploaded jfb parents: diff changeset	260 FirstLetters<-FirstLetters[FirstLetters !="XXXXX"]
74ada21ceb70 Uploaded jfb parents: diff changeset	261 FirstLetters<-paste(FirstLetters, sep="", collapse="")
74ada21ceb70 Uploaded jfb parents: diff changeset	262 leftspaces<-c()
74ada21ceb70 Uploaded jfb parents: diff changeset	263 rightspaces<-c()
74ada21ceb70 Uploaded jfb parents: diff changeset	264
74ada21ceb70 Uploaded jfb parents: diff changeset	265 YYYmotif <- unlist(strsplit(FirstLetters, split = ""))
74ada21ceb70 Uploaded jfb parents: diff changeset	266 YYYposition <- match(x = "x", table = YYYmotif)
74ada21ceb70 Uploaded jfb parents: diff changeset	267 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
74ada21ceb70 Uploaded jfb parents: diff changeset	268 #just 3 letters to the left of x
74ada21ceb70 Uploaded jfb parents: diff changeset	269
74ada21ceb70 Uploaded jfb parents: diff changeset	270 YYYLettersToTheLeft <- YYYposition - 1
74ada21ceb70 Uploaded jfb parents: diff changeset	271 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
74ada21ceb70 Uploaded jfb parents: diff changeset	272 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
74ada21ceb70 Uploaded jfb parents: diff changeset	273 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
74ada21ceb70 Uploaded jfb parents: diff changeset	274 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
74ada21ceb70 Uploaded jfb parents: diff changeset	275 #variable the user puts in is
74ada21ceb70 Uploaded jfb parents: diff changeset	276
74ada21ceb70 Uploaded jfb parents: diff changeset	277
74ada21ceb70 Uploaded jfb parents: diff changeset	278 if (YYYLettersToTheLeft < 4 \| YYYLettersToTheRight < 4) {
74ada21ceb70 Uploaded jfb parents: diff changeset	279 leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft))
74ada21ceb70 Uploaded jfb parents: diff changeset	280 rightspaces<-rep(" ",times=4-(YYYLettersToTheRight))
74ada21ceb70 Uploaded jfb parents: diff changeset	281 #add blank spaces if the motif has less than 4 letters to the left/right
74ada21ceb70 Uploaded jfb parents: diff changeset	282 motif<-c(leftspaces,YYYmotif,rightspaces)
74ada21ceb70 Uploaded jfb parents: diff changeset	283 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded jfb parents: diff changeset	284 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded jfb parents: diff changeset	285 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded jfb parents: diff changeset	286 FirstLetters<-motif
74ada21ceb70 Uploaded jfb parents: diff changeset	287 FirstMotifs[i,1]<-FirstLetters
74ada21ceb70 Uploaded jfb parents: diff changeset	288 FirstAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
74ada21ceb70 Uploaded jfb parents: diff changeset	289 }
74ada21ceb70 Uploaded jfb parents: diff changeset	290
74ada21ceb70 Uploaded jfb parents: diff changeset	291 if(YYYLettersToTheLeft>3 && YYYLettersToTheRight>3){
74ada21ceb70 Uploaded jfb parents: diff changeset	292 motif<-YYYmotif
74ada21ceb70 Uploaded jfb parents: diff changeset	293 #add blank spaces if the motif has less than 4 letters to the left/right
74ada21ceb70 Uploaded jfb parents: diff changeset	294 motif<-c(leftspaces,YYYmotif,rightspaces)
74ada21ceb70 Uploaded jfb parents: diff changeset	295 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded jfb parents: diff changeset	296 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded jfb parents: diff changeset	297 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded jfb parents: diff changeset	298 FirstLetters<-motif
74ada21ceb70 Uploaded jfb parents: diff changeset	299 FirstMotifs[i,1]<-FirstLetters
74ada21ceb70 Uploaded jfb parents: diff changeset	300 FirstAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
74ada21ceb70 Uploaded jfb parents: diff changeset	301
74ada21ceb70 Uploaded jfb parents: diff changeset	302
74ada21ceb70 Uploaded jfb parents: diff changeset	303 }
74ada21ceb70 Uploaded jfb parents: diff changeset	304
74ada21ceb70 Uploaded jfb parents: diff changeset	305 }
74ada21ceb70 Uploaded jfb parents: diff changeset	306
74ada21ceb70 Uploaded jfb parents: diff changeset	307 SecondMotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
74ada21ceb70 Uploaded jfb parents: diff changeset	308 SecondAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
74ada21ceb70 Uploaded jfb parents: diff changeset	309
74ada21ceb70 Uploaded jfb parents: diff changeset	310 for (i in 1:nrow(SecondSubstrateSet)){
74ada21ceb70 Uploaded jfb parents: diff changeset	311 SecondLetters<-SecondSubstrateSet[i,7:15]
74ada21ceb70 Uploaded jfb parents: diff changeset	312 SecondLetters<-SecondLetters[SecondLetters !="XXXXX"]
74ada21ceb70 Uploaded jfb parents: diff changeset	313 SecondLetters<-paste(SecondLetters, sep="", collapse="")
74ada21ceb70 Uploaded jfb parents: diff changeset	314 leftspaces<-c()
74ada21ceb70 Uploaded jfb parents: diff changeset	315 rightspaces<-c()
74ada21ceb70 Uploaded jfb parents: diff changeset	316
74ada21ceb70 Uploaded jfb parents: diff changeset	317 YYYmotif <- unlist(strsplit(SecondLetters, split = ""))
74ada21ceb70 Uploaded jfb parents: diff changeset	318 YYYposition <- match(x = "x", table = YYYmotif)
74ada21ceb70 Uploaded jfb parents: diff changeset	319 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
74ada21ceb70 Uploaded jfb parents: diff changeset	320 #just 3 letters to the left of x
74ada21ceb70 Uploaded jfb parents: diff changeset	321
74ada21ceb70 Uploaded jfb parents: diff changeset	322 YYYLettersToTheLeft <- YYYposition - 1
74ada21ceb70 Uploaded jfb parents: diff changeset	323 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
74ada21ceb70 Uploaded jfb parents: diff changeset	324 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
74ada21ceb70 Uploaded jfb parents: diff changeset	325 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
74ada21ceb70 Uploaded jfb parents: diff changeset	326 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
74ada21ceb70 Uploaded jfb parents: diff changeset	327 #variable the user puts in is
74ada21ceb70 Uploaded jfb parents: diff changeset	328 if (YYYLettersToTheLeft < 4 \| YYYLettersToTheRight < 4) {
74ada21ceb70 Uploaded jfb parents: diff changeset	329 leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft))
74ada21ceb70 Uploaded jfb parents: diff changeset	330 rightspaces<-rep(" ",times=4-(YYYLettersToTheRight))
74ada21ceb70 Uploaded jfb parents: diff changeset	331 #add blank spaces if the motif has less than 4 letters to the left/right
74ada21ceb70 Uploaded jfb parents: diff changeset	332 motif<-c(leftspaces,YYYmotif,rightspaces)
74ada21ceb70 Uploaded jfb parents: diff changeset	333 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded jfb parents: diff changeset	334 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded jfb parents: diff changeset	335 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded jfb parents: diff changeset	336 SecondLetters<-motif
74ada21ceb70 Uploaded jfb parents: diff changeset	337 SecondMotifs[i,1]<-SecondLetters
74ada21ceb70 Uploaded jfb parents: diff changeset	338 SecondAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
74ada21ceb70 Uploaded jfb parents: diff changeset	339 }
74ada21ceb70 Uploaded jfb parents: diff changeset	340
74ada21ceb70 Uploaded jfb parents: diff changeset	341 if(YYYLettersToTheLeft>3 && YYYLettersToTheRight>3){
74ada21ceb70 Uploaded jfb parents: diff changeset	342 motif<-YYYmotif
74ada21ceb70 Uploaded jfb parents: diff changeset	343 #add blank spaces if the motif has less than 4 letters to the left/right
74ada21ceb70 Uploaded jfb parents: diff changeset	344 motif<-c(leftspaces,YYYmotif,rightspaces)
74ada21ceb70 Uploaded jfb parents: diff changeset	345 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded jfb parents: diff changeset	346 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded jfb parents: diff changeset	347 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded jfb parents: diff changeset	348 SecondLetters<-motif
74ada21ceb70 Uploaded jfb parents: diff changeset	349 SecondMotifs[i,1]<-SecondLetters
74ada21ceb70 Uploaded jfb parents: diff changeset	350 SecondAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
74ada21ceb70 Uploaded jfb parents: diff changeset	351 }
74ada21ceb70 Uploaded jfb parents: diff changeset	352 }
74ada21ceb70 Uploaded jfb parents: diff changeset	353
74ada21ceb70 Uploaded jfb parents: diff changeset	354
74ada21ceb70 Uploaded jfb parents: diff changeset	355 #ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
74ada21ceb70 Uploaded jfb parents: diff changeset	356 #ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
74ada21ceb70 Uploaded jfb parents: diff changeset	357 if(1==0){
74ada21ceb70 Uploaded jfb parents: diff changeset	358 for (i in 1:nrow(ThirdSubstrateSet)){
74ada21ceb70 Uploaded jfb parents: diff changeset	359 ITDletters<-ThirdSubstrateSet[i,7:15]
74ada21ceb70 Uploaded jfb parents: diff changeset	360 ITDletters<-ITDletters[ITDletters !="XXXXX"]
74ada21ceb70 Uploaded jfb parents: diff changeset	361 ITDletters<-paste(ITDletters, sep="", collapse="")
74ada21ceb70 Uploaded jfb parents: diff changeset	362 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
74ada21ceb70 Uploaded jfb parents: diff changeset	363 leftspaces<-c()
74ada21ceb70 Uploaded jfb parents: diff changeset	364 rightspaces<-c()
74ada21ceb70 Uploaded jfb parents: diff changeset	365 YYYposition <- match(x = "x", table = YYYmotif)
74ada21ceb70 Uploaded jfb parents: diff changeset	366 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
74ada21ceb70 Uploaded jfb parents: diff changeset	367 #just 3 letters to the left of x
74ada21ceb70 Uploaded jfb parents: diff changeset	368
74ada21ceb70 Uploaded jfb parents: diff changeset	369 YYYLettersToTheLeft <- YYYposition - 1
74ada21ceb70 Uploaded jfb parents: diff changeset	370 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
74ada21ceb70 Uploaded jfb parents: diff changeset	371 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
74ada21ceb70 Uploaded jfb parents: diff changeset	372 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
74ada21ceb70 Uploaded jfb parents: diff changeset	373 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
74ada21ceb70 Uploaded jfb parents: diff changeset	374 #variable the user puts in is
74ada21ceb70 Uploaded jfb parents: diff changeset	375 if (YYYLettersToTheLeft < 4 \| YYYLettersToTheRight < 4) {
74ada21ceb70 Uploaded jfb parents: diff changeset	376 leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft))
74ada21ceb70 Uploaded jfb parents: diff changeset	377 rightspaces<-rep(" ",times=4-(YYYLettersToTheRight))
74ada21ceb70 Uploaded jfb parents: diff changeset	378 #add blank spaces if the motif has less than 4 letters to the left/right
74ada21ceb70 Uploaded jfb parents: diff changeset	379 motif<-c(leftspaces,YYYmotif,rightspaces)
74ada21ceb70 Uploaded jfb parents: diff changeset	380 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded jfb parents: diff changeset	381 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded jfb parents: diff changeset	382 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded jfb parents: diff changeset	383 ITDletters<-motif
74ada21ceb70 Uploaded jfb parents: diff changeset	384 ITDmotifs[i,1]<-ITDletters
74ada21ceb70 Uploaded jfb parents: diff changeset	385 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
74ada21ceb70 Uploaded jfb parents: diff changeset	386 }
74ada21ceb70 Uploaded jfb parents: diff changeset	387
74ada21ceb70 Uploaded jfb parents: diff changeset	388 if(YYYLettersToTheLeft>3 && YYYLettersToTheRight>3){
74ada21ceb70 Uploaded jfb parents: diff changeset	389 motif<-YYYmotif
74ada21ceb70 Uploaded jfb parents: diff changeset	390 #add blank spaces if the motif has less than 4 letters to the left/right
74ada21ceb70 Uploaded jfb parents: diff changeset	391 motif<-c(leftspaces,YYYmotif,rightspaces)
74ada21ceb70 Uploaded jfb parents: diff changeset	392 #save that motif, which is the Y and +/- 4 amino acids, including truncation
74ada21ceb70 Uploaded jfb parents: diff changeset	393 motif<-motif[!motif %in% "x"]
74ada21ceb70 Uploaded jfb parents: diff changeset	394 motif<-paste(motif, sep="", collapse="")
74ada21ceb70 Uploaded jfb parents: diff changeset	395 ITDletters<-motif
74ada21ceb70 Uploaded jfb parents: diff changeset	396 ITDmotifs[i,1]<-ITDletters
74ada21ceb70 Uploaded jfb parents: diff changeset	397 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
74ada21ceb70 Uploaded jfb parents: diff changeset	398 }
74ada21ceb70 Uploaded jfb parents: diff changeset	399 }
74ada21ceb70 Uploaded jfb parents: diff changeset	400 }
74ada21ceb70 Uploaded jfb parents: diff changeset	401 names(FirstMotifs)<-FirstAccessionNumbers
74ada21ceb70 Uploaded jfb parents: diff changeset	402 names(SecondMotifs)<-SecondAccessionNumbers
74ada21ceb70 Uploaded jfb parents: diff changeset	403 #names(ITDmotifs)<-ITDAccessionNumbers
74ada21ceb70 Uploaded jfb parents: diff changeset	404 }
74ada21ceb70 Uploaded jfb parents: diff changeset	405
74ada21ceb70 Uploaded jfb parents: diff changeset	406
74ada21ceb70 Uploaded jfb parents: diff changeset	407 FirstMotifsFINAL<-FirstMotifs[!FirstMotifs %in% SecondMotifs]
74ada21ceb70 Uploaded jfb parents: diff changeset	408 #FirstMotifsFINAL<-FirstMotifsFINAL[!FirstMotifsFINAL %in% ITDmotifs]
74ada21ceb70 Uploaded jfb parents: diff changeset	409 FirstMotifsFINAL<-FirstMotifsFINAL[!duplicated(FirstMotifsFINAL)]
74ada21ceb70 Uploaded jfb parents: diff changeset	410
74ada21ceb70 Uploaded jfb parents: diff changeset	411
74ada21ceb70 Uploaded jfb parents: diff changeset	412 # ITDmotifsFINAL<-ITDmotifs[!ITDmotifs %in% SecondMotifs]
74ada21ceb70 Uploaded jfb parents: diff changeset	413 # ITDmotifsFINAL<-ITDmotifsFINAL[!ITDmotifsFINAL %in% FirstMotifs]
74ada21ceb70 Uploaded jfb parents: diff changeset	414 # ITDmotifsFINAL<-ITDmotifsFINAL[!duplicated(ITDmotifsFINAL)]
74ada21ceb70 Uploaded jfb parents: diff changeset	415
74ada21ceb70 Uploaded jfb parents: diff changeset	416
74ada21ceb70 Uploaded jfb parents: diff changeset	417 SecondMotifsFINAL<-SecondMotifs[!SecondMotifs %in% FirstMotifs]
74ada21ceb70 Uploaded jfb parents: diff changeset	418 #SecondMotifsFINAL<-SecondMotifsFINAL[!SecondMotifsFINAL %in% ITDmotifs]
74ada21ceb70 Uploaded jfb parents: diff changeset	419 SecondMotifsFINAL<-SecondMotifsFINAL[!duplicated(SecondMotifsFINAL)]
74ada21ceb70 Uploaded jfb parents: diff changeset	420
74ada21ceb70 Uploaded jfb parents: diff changeset	421
74ada21ceb70 Uploaded jfb parents: diff changeset	422 columnalheader<-c(rep(NA,36))
74ada21ceb70 Uploaded jfb parents: diff changeset	423 FirstFinalMatrix<-matrix(data =columnalheader,nrow = 1)
74ada21ceb70 Uploaded jfb parents: diff changeset	424
74ada21ceb70 Uploaded jfb parents: diff changeset	425 for (k in 1:length(FirstMotifsFINAL)) {
74ada21ceb70 Uploaded jfb parents: diff changeset	426 AN<-00000
74ada21ceb70 Uploaded jfb parents: diff changeset	427 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
74ada21ceb70 Uploaded jfb parents: diff changeset	428 #destroyed immediately after use
74ada21ceb70 Uploaded jfb parents: diff changeset	429 for (m in 2:ncol(Firstsubbackfreq)) {
74ada21ceb70 Uploaded jfb parents: diff changeset	430 AN <- as.character(Firstsubbackfreq[2, m])
74ada21ceb70 Uploaded jfb parents: diff changeset	431 #print(AN)}
74ada21ceb70 Uploaded jfb parents: diff changeset	432 if (grepl(pattern = AN,
74ada21ceb70 Uploaded jfb parents: diff changeset	433 x = names(FirstMotifsFINAL[k]),
74ada21ceb70 Uploaded jfb parents: diff changeset	434 fixed = TRUE) == TRUE) {
74ada21ceb70 Uploaded jfb parents: diff changeset	435 outputmatrix <- as.character(Firstsubbackfreq[, m])
74ada21ceb70 Uploaded jfb parents: diff changeset	436 outputmatrix <- matrix(outputmatrix, nrow = 1)
74ada21ceb70 Uploaded jfb parents: diff changeset	437 #with that accession number, find a match in the subbackfreq file and save it here
74ada21ceb70 Uploaded jfb parents: diff changeset	438 FirstFinalMatrix<-rbind(FirstFinalMatrix,outputmatrix)
74ada21ceb70 Uploaded jfb parents: diff changeset	439 # print(AN,outputmatrix)}
74ada21ceb70 Uploaded jfb parents: diff changeset	440 }
74ada21ceb70 Uploaded jfb parents: diff changeset	441 }
74ada21ceb70 Uploaded jfb parents: diff changeset	442 }
74ada21ceb70 Uploaded jfb parents: diff changeset	443 FirstFinalMatrix<-FirstFinalMatrix[!duplicated(FirstFinalMatrix),]
74ada21ceb70 Uploaded jfb parents: diff changeset	444
74ada21ceb70 Uploaded jfb parents: diff changeset	445 #columnalheader<-c(rep(NA,36))
74ada21ceb70 Uploaded jfb parents: diff changeset	446 #ITDFinalMatrix<-matrix(data =columnalheader,nrow = 1)
74ada21ceb70 Uploaded jfb parents: diff changeset	447
74ada21ceb70 Uploaded jfb parents: diff changeset	448 if(1==0){
74ada21ceb70 Uploaded jfb parents: diff changeset	449 for (k in 1:length(ITDmotifsFINAL)) {
74ada21ceb70 Uploaded jfb parents: diff changeset	450 AN<-00000
74ada21ceb70 Uploaded jfb parents: diff changeset	451 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
74ada21ceb70 Uploaded jfb parents: diff changeset	452 #destroyed immediately after use
74ada21ceb70 Uploaded jfb parents: diff changeset	453 for (m in 1:ncol(Thirdsubbackfreq)) {
74ada21ceb70 Uploaded jfb parents: diff changeset	454 AN <- as.character(Thirdsubbackfreq[1, m])
74ada21ceb70 Uploaded jfb parents: diff changeset	455 if (grepl(pattern = AN,
74ada21ceb70 Uploaded jfb parents: diff changeset	456 x = names(ITDmotifsFINAL[k]),
74ada21ceb70 Uploaded jfb parents: diff changeset	457 fixed = TRUE) == TRUE) {
74ada21ceb70 Uploaded jfb parents: diff changeset	458 outputmatrix <- as.character(Thirdsubbackfreq[, m])
74ada21ceb70 Uploaded jfb parents: diff changeset	459 outputmatrix <- matrix(outputmatrix, nrow = 1)
74ada21ceb70 Uploaded jfb parents: diff changeset	460 #with that accession number, find a match in the subbackfreq file and save it here
74ada21ceb70 Uploaded jfb parents: diff changeset	461 ITDFinalMatrix<-rbind(ITDFinalMatrix,outputmatrix)
74ada21ceb70 Uploaded jfb parents: diff changeset	462 }
74ada21ceb70 Uploaded jfb parents: diff changeset	463 }
74ada21ceb70 Uploaded jfb parents: diff changeset	464 }
74ada21ceb70 Uploaded jfb parents: diff changeset	465 ITDFinalMatrix<-ITDFinalMatrix[!duplicated(ITDFinalMatrix),]
74ada21ceb70 Uploaded jfb parents: diff changeset	466 }
74ada21ceb70 Uploaded jfb parents: diff changeset	467
74ada21ceb70 Uploaded jfb parents: diff changeset	468 columnalheader<-c(rep(NA,36))
74ada21ceb70 Uploaded jfb parents: diff changeset	469 SecondFinalMatrix<-matrix(data =columnalheader,nrow = 1)
74ada21ceb70 Uploaded jfb parents: diff changeset	470
74ada21ceb70 Uploaded jfb parents: diff changeset	471 for (k in 1:length(SecondMotifsFINAL)) {
74ada21ceb70 Uploaded jfb parents: diff changeset	472 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
74ada21ceb70 Uploaded jfb parents: diff changeset	473 #destroyed immediately after use
74ada21ceb70 Uploaded jfb parents: diff changeset	474 for (m in 1:ncol(Secondsubbackfreq)) {
74ada21ceb70 Uploaded jfb parents: diff changeset	475 AN <- as.character(Secondsubbackfreq[1, m])
74ada21ceb70 Uploaded jfb parents: diff changeset	476 if (grepl(pattern = AN,
74ada21ceb70 Uploaded jfb parents: diff changeset	477 x = names(SecondMotifsFINAL[k]),
74ada21ceb70 Uploaded jfb parents: diff changeset	478 fixed = TRUE) == TRUE) {
74ada21ceb70 Uploaded jfb parents: diff changeset	479 outputmatrix <- as.character(Secondsubbackfreq[, m])
74ada21ceb70 Uploaded jfb parents: diff changeset	480 outputmatrix <- matrix(outputmatrix, nrow = 1)
74ada21ceb70 Uploaded jfb parents: diff changeset	481 #with that accession number, find a match in the subbackfreq file and save it here
74ada21ceb70 Uploaded jfb parents: diff changeset	482 SecondFinalMatrix<-rbind(SecondFinalMatrix,outputmatrix)
74ada21ceb70 Uploaded jfb parents: diff changeset	483 }
74ada21ceb70 Uploaded jfb parents: diff changeset	484 }
74ada21ceb70 Uploaded jfb parents: diff changeset	485 }
74ada21ceb70 Uploaded jfb parents: diff changeset	486 SecondFinalMatrix<-SecondFinalMatrix[!duplicated(SecondFinalMatrix),]
74ada21ceb70 Uploaded jfb parents: diff changeset	487 FTLoutputmatrix<-matrix(data=c(FirstMotifsFINAL,names(FirstMotifsFINAL)),ncol = 2)
74ada21ceb70 Uploaded jfb parents: diff changeset	488
74ada21ceb70 Uploaded jfb parents: diff changeset	489
74ada21ceb70 Uploaded jfb parents: diff changeset	490 write.table(x=FTLoutputmatrix,
74ada21ceb70 Uploaded jfb parents: diff changeset	491 file=First_unshared_motifs_table,
74ada21ceb70 Uploaded jfb parents: diff changeset	492 quote=FALSE, sep=",",
74ada21ceb70 Uploaded jfb parents: diff changeset	493 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
74ada21ceb70 Uploaded jfb parents: diff changeset	494
74ada21ceb70 Uploaded jfb parents: diff changeset	495 columnalheader<-c(as.character(Firstsubbackfreq[1:36,1]))
74ada21ceb70 Uploaded jfb parents: diff changeset	496 columnalheader<-matrix(columnalheader,nrow = 1)
74ada21ceb70 Uploaded jfb parents: diff changeset	497 write.table(x=columnalheader,
74ada21ceb70 Uploaded jfb parents: diff changeset	498 file=First_unshared_subbackfreq,
74ada21ceb70 Uploaded jfb parents: diff changeset	499 quote=FALSE, sep=",",
74ada21ceb70 Uploaded jfb parents: diff changeset	500 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
74ada21ceb70 Uploaded jfb parents: diff changeset	501 FirstFinalMatrix<-FirstFinalMatrix[2:nrow(FirstFinalMatrix),]
74ada21ceb70 Uploaded jfb parents: diff changeset	502 write.table(x=FirstFinalMatrix,
74ada21ceb70 Uploaded jfb parents: diff changeset	503 file=First_unshared_subbackfreq,
74ada21ceb70 Uploaded jfb parents: diff changeset	504 quote=FALSE, sep=",",
74ada21ceb70 Uploaded jfb parents: diff changeset	505 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
74ada21ceb70 Uploaded jfb parents: diff changeset	506
74ada21ceb70 Uploaded jfb parents: diff changeset	507 ############################################################################################################
74ada21ceb70 Uploaded jfb parents: diff changeset	508
74ada21ceb70 Uploaded jfb parents: diff changeset	509 D835Youtputmatrix<-matrix(data=c(SecondMotifsFINAL,names(SecondMotifsFINAL)),ncol = 2)
74ada21ceb70 Uploaded jfb parents: diff changeset	510
74ada21ceb70 Uploaded jfb parents: diff changeset	511 write.table(x=D835Youtputmatrix,
74ada21ceb70 Uploaded jfb parents: diff changeset	512 file=Second_unshared_motifs_table,
74ada21ceb70 Uploaded jfb parents: diff changeset	513 quote=FALSE, sep=",",
74ada21ceb70 Uploaded jfb parents: diff changeset	514 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
74ada21ceb70 Uploaded jfb parents: diff changeset	515
74ada21ceb70 Uploaded jfb parents: diff changeset	516 columnalheader<-c(as.character(Firstsubbackfreq[1:36,1]))
74ada21ceb70 Uploaded jfb parents: diff changeset	517 columnalheader<-matrix(columnalheader,nrow = 1)
74ada21ceb70 Uploaded jfb parents: diff changeset	518 write.table(x=columnalheader,
74ada21ceb70 Uploaded jfb parents: diff changeset	519 file=Second_unshared_subbackfreq,
74ada21ceb70 Uploaded jfb parents: diff changeset	520 quote=FALSE, sep=",",
74ada21ceb70 Uploaded jfb parents: diff changeset	521 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
74ada21ceb70 Uploaded jfb parents: diff changeset	522 SecondFinalMatrix<-SecondFinalMatrix[2:nrow(SecondFinalMatrix),]
74ada21ceb70 Uploaded jfb parents: diff changeset	523 write.table(x=SecondFinalMatrix,
74ada21ceb70 Uploaded jfb parents: diff changeset	524 file=Second_unshared_subbackfreq,
74ada21ceb70 Uploaded jfb parents: diff changeset	525 quote=FALSE, sep=",",
74ada21ceb70 Uploaded jfb parents: diff changeset	526 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
74ada21ceb70 Uploaded jfb parents: diff changeset	527
74ada21ceb70 Uploaded jfb parents: diff changeset	528 ############################################################################################################
74ada21ceb70 Uploaded jfb parents: diff changeset	529
74ada21ceb70 Uploaded jfb parents: diff changeset	530 # ITDoutputmatrix<-matrix(data = c(ITDmotifsFINAL,names(ITDmotifsFINAL)),ncol = 2)
74ada21ceb70 Uploaded jfb parents: diff changeset	531 #
74ada21ceb70 Uploaded jfb parents: diff changeset	532 # write.table(x=ITDoutputmatrix,
74ada21ceb70 Uploaded jfb parents: diff changeset	533 # file=Third_unshared_motifs_table,
74ada21ceb70 Uploaded jfb parents: diff changeset	534 # quote=FALSE, sep=",",
74ada21ceb70 Uploaded jfb parents: diff changeset	535 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
74ada21ceb70 Uploaded jfb parents: diff changeset	536 #
74ada21ceb70 Uploaded jfb parents: diff changeset	537 # columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1]))
74ada21ceb70 Uploaded jfb parents: diff changeset	538 # columnalheader<-matrix(columnalheader,nrow = 1)
74ada21ceb70 Uploaded jfb parents: diff changeset	539 # write.table(x=columnalheader,
74ada21ceb70 Uploaded jfb parents: diff changeset	540 # file=Third_unshared_subbackfreq,
74ada21ceb70 Uploaded jfb parents: diff changeset	541 # quote=FALSE, sep=",",
74ada21ceb70 Uploaded jfb parents: diff changeset	542 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
74ada21ceb70 Uploaded jfb parents: diff changeset	543 # ITDFinalMatrix<-ITDFinalMatrix[2:nrow(ITDFinalMatrix),]
74ada21ceb70 Uploaded jfb parents: diff changeset	544 # write.table(x=ITDFinalMatrix,
74ada21ceb70 Uploaded jfb parents: diff changeset	545 # file=Third_unshared_subbackfreq,
74ada21ceb70 Uploaded jfb parents: diff changeset	546 # quote=FALSE, sep=",",
74ada21ceb70 Uploaded jfb parents: diff changeset	547 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)

Mercurial > repos > jfb > difference_finder

annotate all stuff/difference finder for 2 overlaps proper names 7-7_1-15-2019.R @ 6:8fa6b79a2f19 draft