comparison report_clonality/RScript.r @ 40:106275b54470 draft

Uploaded
author davidvanzessen
date Tue, 30 May 2017 07:26:33 -0400
parents b6936fb52ab9
children a70dbcfa5d8a
comparison
equal deleted inserted replaced
39:bad6a9a53ae7 40:106275b54470
120 clonalityFrame$clonaltype = do.call(paste, c(clonalityFrame[unlist(strsplit(clonaltype, ","))], sep = ":")) 120 clonalityFrame$clonaltype = do.call(paste, c(clonalityFrame[unlist(strsplit(clonaltype, ","))], sep = ":"))
121 clonalityFrame$clonality_clonaltype = do.call(paste, c(clonalityFrame[unlist(strsplit(paste(clonaltype, ",Replicate", sep=""), ","))], sep = ":")) 121 clonalityFrame$clonality_clonaltype = do.call(paste, c(clonalityFrame[unlist(strsplit(paste(clonaltype, ",Replicate", sep=""), ","))], sep = ":"))
122 clonalityFrame = clonalityFrame[!duplicated(clonalityFrame$clonality_clonaltype), ] 122 clonalityFrame = clonalityFrame[!duplicated(clonalityFrame$clonality_clonaltype), ]
123 } 123 }
124 124
125 if(nrow(PRODF) == 0){
126 stop("No sequences left after filtering")
127 }
128
125 prod.unique.sample.count = data.frame(data.table(PRODF)[, list(Productive_unique=.N), by=c("Sample")]) 129 prod.unique.sample.count = data.frame(data.table(PRODF)[, list(Productive_unique=.N), by=c("Sample")])
126 prod.unique.rep.count = data.frame(data.table(PRODF)[, list(Productive_unique=.N), by=c("Sample", "Replicate")]) 130 prod.unique.rep.count = data.frame(data.table(PRODF)[, list(Productive_unique=.N), by=c("Sample", "Replicate")])
127 131
128 unprod.unique.sample.count = data.frame(data.table(UNPROD)[, list(Unproductive_unique=.N), by=c("Sample")]) 132 unprod.unique.sample.count = data.frame(data.table(UNPROD)[, list(Unproductive_unique=.N), by=c("Sample")])
129 unprod.unique.rep.count = data.frame(data.table(UNPROD)[, list(Unproductive_unique=.N), by=c("Sample", "Replicate")]) 133 unprod.unique.rep.count = data.frame(data.table(UNPROD)[, list(Unproductive_unique=.N), by=c("Sample", "Replicate")])
134
130 135
131 PRODF$freq = 1 136 PRODF$freq = 1
132 137
133 if(any(grepl(pattern="_", x=PRODF$ID))){ #the frequency can be stored in the ID with the pattern ".*_freq_.*" 138 if(any(grepl(pattern="_", x=PRODF$ID))){ #the frequency can be stored in the ID with the pattern ".*_freq_.*"
134 PRODF$freq = gsub("^[0-9]+_", "", PRODF$ID) 139 PRODF$freq = gsub("^[0-9]+_", "", PRODF$ID)
174 179
175 print("Report Clonality - counting productive/unproductive/unique") 180 print("Report Clonality - counting productive/unproductive/unique")
176 181
177 #create the table on the overview page with the productive/unique counts per sample/replicate 182 #create the table on the overview page with the productive/unique counts per sample/replicate
178 #first for sample 183 #first for sample
184
179 sample.count = merge(input.sample.count, prod.sample.count, by="Sample", all.x=T) 185 sample.count = merge(input.sample.count, prod.sample.count, by="Sample", all.x=T)
180 sample.count$perc_prod = round(sample.count$Productive / sample.count$All * 100) 186 sample.count$perc_prod = round(sample.count$Productive / sample.count$All * 100)
181 sample.count = merge(sample.count, prod.unique.sample.count, by="Sample", all.x=T) 187 sample.count = merge(sample.count, prod.unique.sample.count, by="Sample", all.x=T)
182 sample.count$perc_prod_un = round(sample.count$Productive_unique / sample.count$All * 100) 188 sample.count$perc_prod_un = round(sample.count$Productive_unique / sample.count$All * 100)
183 189
186 sample.count = merge(sample.count, unprod.unique.sample.count, by="Sample", all.x=T) 192 sample.count = merge(sample.count, unprod.unique.sample.count, by="Sample", all.x=T)
187 sample.count$perc_unprod_un = round(sample.count$Unproductive_unique / sample.count$All * 100) 193 sample.count$perc_unprod_un = round(sample.count$Unproductive_unique / sample.count$All * 100)
188 194
189 #then sample/replicate 195 #then sample/replicate
190 rep.count = merge(input.rep.count, prod.rep.count, by=c("Sample", "Replicate"), all.x=T) 196 rep.count = merge(input.rep.count, prod.rep.count, by=c("Sample", "Replicate"), all.x=T)
197
198 print(rep.count)
199
200 fltr = is.na(rep.count$Productive)
201 if(any(fltr)){
202 rep.count[fltr,"Productive"] = 0
203 }
204
205 print(rep.count)
206
191 rep.count$perc_prod = round(rep.count$Productive / rep.count$All * 100) 207 rep.count$perc_prod = round(rep.count$Productive / rep.count$All * 100)
192 rep.count = merge(rep.count, prod.unique.rep.count, by=c("Sample", "Replicate"), all.x=T) 208 rep.count = merge(rep.count, prod.unique.rep.count, by=c("Sample", "Replicate"), all.x=T)
193 rep.count$perc_prod_un = round(rep.count$Productive_unique / rep.count$All * 100) 209 rep.count$perc_prod_un = round(rep.count$Productive_unique / rep.count$All * 100)
194 210
195 rep.count = merge(rep.count, unprod.rep.count, by=c("Sample", "Replicate"), all.x=T) 211 rep.count = merge(rep.count, unprod.rep.count, by=c("Sample", "Replicate"), all.x=T)