# HG changeset patch # User davidvanzessen # Date 1482242595 18000 # Node ID 05c62efdc3931ca72699927cf1910044cd22c5e0 # Parent 767dd932700990fd16b0c6c4e5df064593370cfb Uploaded diff -r 767dd9327009 -r 05c62efdc393 merge_and_filter.r --- a/merge_and_filter.r Fri Dec 16 10:17:16 2016 -0500 +++ b/merge_and_filter.r Tue Dec 20 09:03:15 2016 -0500 @@ -47,6 +47,9 @@ filtering.steps[,2] = as.character(filtering.steps[,2]) #filtering.steps[,3] = as.numeric(filtering.steps[,3]) +print("summary files columns") +print(names(summ)) + summ = merge(summ, gene_identification, by="Sequence.ID") summ = summ[summ$Functionality != "No results",] @@ -67,14 +70,23 @@ filtering.steps = rbind(filtering.steps, c("After functionality filter", nrow(summ))) +#print("mutation analysis files columns") +#print(names(mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])])) + result = merge(summ, mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])], by="Sequence.ID") print(paste("Number of sequences after merging with mutation analysis file:", nrow(result))) +#print("mutation stats files columns") +#print(names(mutationstats[,!(names(mutationstats) %in% names(result)[-1])])) + result = merge(result, mutationstats[,!(names(mutationstats) %in% names(result)[-1])], by="Sequence.ID") print(paste("Number of sequences after merging with mutation stats file:", nrow(result))) +#print("hotspots files columns") +#print(names(hotspots[,!(names(hotspots) %in% names(result)[-1])])) + result = merge(result, hotspots[,!(names(hotspots) %in% names(result)[-1])], by="Sequence.ID") print(paste("Number of sequences after merging with hotspots file:", nrow(result))) diff -r 767dd9327009 -r 05c62efdc393 pattern_plots.r --- a/pattern_plots.r Fri Dec 16 10:17:16 2016 -0500 +++ b/pattern_plots.r Tue Dec 20 09:03:15 2016 -0500 @@ -47,6 +47,11 @@ names(data1) = c("Class", "Type", "value") +chk = is.na(data1$value) +if(any(chk)){ + data1[chk, "value"] = 0 +} + data1 = data1[order(data1$Type),] write.table(data1, plot1.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) @@ -79,6 +84,11 @@ names(data2) = c("Class", "Type", "value") +chk = is.na(data2$value) +if(any(chk)){ + data2[chk, "value"] = 0 +} + data2 = data2[order(data2$Type),] write.table(data2, plot2.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) @@ -94,9 +104,6 @@ data3 = data3[,names(data3)[grepl("\\.x", names(data3))]] names(data3) = gsub(".x", "", names(data3)) -data3[is.na(data3)] = 0 -#data3[is.infinite(data3)] = 0 - data3["G/C transitions",] = round(data3["Transitions at G C (%)",] / (data3["C",] + data3["G",]) * 100, 1) data3["G/C transversions",] = round((data3["Targeting of G C (%)",] - data3["Transitions at G C (%)",]) / (data3["C",] + data3["G",]) * 100, 1) @@ -115,6 +122,11 @@ data3 = melt(t(data3[8:10,])) names(data3) = c("Class", "Type", "value") +chk = is.na(data1$data3) +if(any(chk)){ + data3[chk, "value"] = 0 +} + data3 = data3[order(data3$Type),] write.table(data3, plot3.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)