comparison shm_csr.r @ 93:8fcf31272f6e draft

planemo upload commit a43893724cc769bed8a1f19a5b19ec1ba20cb63c
author rhpvorderman
date Mon, 06 Mar 2023 11:36:32 +0000
parents b6f9a640e098
children
comparison
equal deleted inserted replaced
92:cf8ad181628f 93:8fcf31272f6e
437 437
438 dat.clss$best_match = substr(dat.clss$best_match, 0, 3) 438 dat.clss$best_match = substr(dat.clss$best_match, 0, 3)
439 439
440 dat.clss = rbind(dat, dat.clss) 440 dat.clss = rbind(dat, dat.clss)
441 441
442 write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T)
443
444 if (nrow(dat) > 0) {
442 p = ggplot(dat.clss, aes(best_match, percentage_mutations)) 445 p = ggplot(dat.clss, aes(best_match, percentage_mutations))
443 p = p + geom_point(aes(colour=best_match), position="jitter") + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA) 446 p = p + geom_point(aes(colour=best_match), position="jitter") + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA)
444 p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) 447 p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"))
445 p = p + scale_fill_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4")) 448 p = p + scale_fill_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4"))
446 p = p + scale_colour_manual(guide = guide_legend(title = "Subclass"), values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4")) 449 p = p + scale_colour_manual(guide = guide_legend(title = "Subclass"), values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4"))
447
448 png(filename="scatter.png") 450 png(filename="scatter.png")
449 print(p) 451 print(p)
450 dev.off() 452 dev.off()
451 453
452 pdfplots[["scatter.pdf"]] <- p 454 pdfplots[["scatter.pdf"]] <- p
453 455 }
454 write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T)
455 456
456 print("Plotting frequency ranges plot") 457 print("Plotting frequency ranges plot")
457 458
458 dat$best_match_class = substr(dat$best_match, 0, 3) 459 dat$best_match_class = substr(dat$best_match, 0, 3)
459 freq_labels = c("0", "0-2", "2-5", "5-10", "10-15", "15-20", "20") 460 freq_labels = c("0", "0-2", "2-5", "5-10", "10-15", "15-20", "20")
465 466
466 frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match_class") 467 frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match_class")
467 468
468 frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2) 469 frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2)
469 470
471 if (nrow(frequency_bins_data) > 0) {
470 p = ggplot(frequency_bins_data, aes(frequency_bins, frequency)) 472 p = ggplot(frequency_bins_data, aes(frequency_bins, frequency))
471 p = p + geom_bar(aes(fill=best_match_class), stat="identity", position="dodge") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) 473 p = p + geom_bar(aes(fill=best_match_class), stat="identity", position="dodge") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"))
472 p = p + xlab("Frequency ranges") + ylab("Frequency") + ggtitle("Mutation Frequencies by class") + scale_fill_manual(guide = guide_legend(title = "Class"), values=c("IGA" = "blue4", "IGG" = "olivedrab3", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4")) 474 p = p + xlab("Frequency ranges") + ylab("Frequency") + ggtitle("Mutation Frequencies by class") + scale_fill_manual(guide = guide_legend(title = "Class"), values=c("IGA" = "blue4", "IGG" = "olivedrab3", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4"))
473 475
474 png(filename="frequency_ranges.png") 476 png(filename="frequency_ranges.png")
475 print(p) 477 print(p)
476 dev.off() 478 dev.off()
477 479
478 pdfplots[["frequency_ranges.pdf"]] <- p 480 pdfplots[["frequency_ranges.pdf"]] <- p
481 }
479 482
480 save(pdfplots, file="pdfplots.RData") 483 save(pdfplots, file="pdfplots.RData")
481 484
482 frequency_bins_data_by_class = frequency_bins_data 485 frequency_bins_data_by_class = frequency_bins_data
483 486
484 frequency_bins_data_by_class = frequency_bins_data_by_class[order(frequency_bins_data_by_class$best_match_class, frequency_bins_data_by_class$frequency_bins),] 487 frequency_bins_data_by_class = frequency_bins_data_by_class[order(frequency_bins_data_by_class$best_match_class, frequency_bins_data_by_class$frequency_bins),]
485 488
489
486 frequency_bins_data_by_class$frequency_bins = gsub("-", " to ", frequency_bins_data_by_class$frequency_bins) 490 frequency_bins_data_by_class$frequency_bins = gsub("-", " to ", frequency_bins_data_by_class$frequency_bins)
487 frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "20", c("frequency_bins")] = "20 or higher" 491 if (nrow(frequency_bins_data_by_class) > 0) {
488 frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "0", c("frequency_bins")] = "0 or lower" 492 frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "20", c("frequency_bins")] = "20 or higher"
489 493 frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "0", c("frequency_bins")] = "0 or lower"
494 }
490 write.table(frequency_bins_data_by_class, "frequency_ranges_classes.txt", sep="\t",quote=F,row.names=F,col.names=T) 495 write.table(frequency_bins_data_by_class, "frequency_ranges_classes.txt", sep="\t",quote=F,row.names=F,col.names=T)
491 496
492 frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match", "best_match_class", "frequency_bins")]) 497 frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match", "best_match_class", "frequency_bins")])
493 498
494 frequency_bins_sum = data.frame(data.table(dat)[, list(class_sum=sum(.N)), by=c("best_match")]) 499 frequency_bins_sum = data.frame(data.table(dat)[, list(class_sum=sum(.N)), by=c("best_match")])
497 502
498 frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2) 503 frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2)
499 504
500 frequency_bins_data = frequency_bins_data[order(frequency_bins_data$best_match, frequency_bins_data$frequency_bins),] 505 frequency_bins_data = frequency_bins_data[order(frequency_bins_data$best_match, frequency_bins_data$frequency_bins),]
501 frequency_bins_data$frequency_bins = gsub("-", " to ", frequency_bins_data$frequency_bins) 506 frequency_bins_data$frequency_bins = gsub("-", " to ", frequency_bins_data$frequency_bins)
502 frequency_bins_data[frequency_bins_data$frequency_bins == "20", c("frequency_bins")] = "20 or higher" 507 if (nrow(frequency_bins_data) > 0) {
503 frequency_bins_data[frequency_bins_data$frequency_bins == "0", c("frequency_bins")] = "0 or lower" 508 frequency_bins_data[frequency_bins_data$frequency_bins == "20", c("frequency_bins")] = "20 or higher"
504 509 frequency_bins_data[frequency_bins_data$frequency_bins == "0", c("frequency_bins")] = "0 or lower"
510 }
505 write.table(frequency_bins_data, "frequency_ranges_subclasses.txt", sep="\t",quote=F,row.names=F,col.names=T) 511 write.table(frequency_bins_data, "frequency_ranges_subclasses.txt", sep="\t",quote=F,row.names=F,col.names=T)
506 512
507 513
508 514
509 515