Mercurial > repos > davidvanzessen > shm_csr
comparison shm_csr.r @ 93:8fcf31272f6e draft
planemo upload commit a43893724cc769bed8a1f19a5b19ec1ba20cb63c
author | rhpvorderman |
---|---|
date | Mon, 06 Mar 2023 11:36:32 +0000 |
parents | b6f9a640e098 |
children |
comparison
equal
deleted
inserted
replaced
92:cf8ad181628f | 93:8fcf31272f6e |
---|---|
437 | 437 |
438 dat.clss$best_match = substr(dat.clss$best_match, 0, 3) | 438 dat.clss$best_match = substr(dat.clss$best_match, 0, 3) |
439 | 439 |
440 dat.clss = rbind(dat, dat.clss) | 440 dat.clss = rbind(dat, dat.clss) |
441 | 441 |
442 write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T) | |
443 | |
444 if (nrow(dat) > 0) { | |
442 p = ggplot(dat.clss, aes(best_match, percentage_mutations)) | 445 p = ggplot(dat.clss, aes(best_match, percentage_mutations)) |
443 p = p + geom_point(aes(colour=best_match), position="jitter") + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA) | 446 p = p + geom_point(aes(colour=best_match), position="jitter") + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA) |
444 p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) | 447 p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) |
445 p = p + scale_fill_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4")) | 448 p = p + scale_fill_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4")) |
446 p = p + scale_colour_manual(guide = guide_legend(title = "Subclass"), values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4")) | 449 p = p + scale_colour_manual(guide = guide_legend(title = "Subclass"), values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4")) |
447 | |
448 png(filename="scatter.png") | 450 png(filename="scatter.png") |
449 print(p) | 451 print(p) |
450 dev.off() | 452 dev.off() |
451 | 453 |
452 pdfplots[["scatter.pdf"]] <- p | 454 pdfplots[["scatter.pdf"]] <- p |
453 | 455 } |
454 write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T) | |
455 | 456 |
456 print("Plotting frequency ranges plot") | 457 print("Plotting frequency ranges plot") |
457 | 458 |
458 dat$best_match_class = substr(dat$best_match, 0, 3) | 459 dat$best_match_class = substr(dat$best_match, 0, 3) |
459 freq_labels = c("0", "0-2", "2-5", "5-10", "10-15", "15-20", "20") | 460 freq_labels = c("0", "0-2", "2-5", "5-10", "10-15", "15-20", "20") |
465 | 466 |
466 frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match_class") | 467 frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match_class") |
467 | 468 |
468 frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2) | 469 frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2) |
469 | 470 |
471 if (nrow(frequency_bins_data) > 0) { | |
470 p = ggplot(frequency_bins_data, aes(frequency_bins, frequency)) | 472 p = ggplot(frequency_bins_data, aes(frequency_bins, frequency)) |
471 p = p + geom_bar(aes(fill=best_match_class), stat="identity", position="dodge") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) | 473 p = p + geom_bar(aes(fill=best_match_class), stat="identity", position="dodge") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) |
472 p = p + xlab("Frequency ranges") + ylab("Frequency") + ggtitle("Mutation Frequencies by class") + scale_fill_manual(guide = guide_legend(title = "Class"), values=c("IGA" = "blue4", "IGG" = "olivedrab3", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4")) | 474 p = p + xlab("Frequency ranges") + ylab("Frequency") + ggtitle("Mutation Frequencies by class") + scale_fill_manual(guide = guide_legend(title = "Class"), values=c("IGA" = "blue4", "IGG" = "olivedrab3", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4")) |
473 | 475 |
474 png(filename="frequency_ranges.png") | 476 png(filename="frequency_ranges.png") |
475 print(p) | 477 print(p) |
476 dev.off() | 478 dev.off() |
477 | 479 |
478 pdfplots[["frequency_ranges.pdf"]] <- p | 480 pdfplots[["frequency_ranges.pdf"]] <- p |
481 } | |
479 | 482 |
480 save(pdfplots, file="pdfplots.RData") | 483 save(pdfplots, file="pdfplots.RData") |
481 | 484 |
482 frequency_bins_data_by_class = frequency_bins_data | 485 frequency_bins_data_by_class = frequency_bins_data |
483 | 486 |
484 frequency_bins_data_by_class = frequency_bins_data_by_class[order(frequency_bins_data_by_class$best_match_class, frequency_bins_data_by_class$frequency_bins),] | 487 frequency_bins_data_by_class = frequency_bins_data_by_class[order(frequency_bins_data_by_class$best_match_class, frequency_bins_data_by_class$frequency_bins),] |
485 | 488 |
489 | |
486 frequency_bins_data_by_class$frequency_bins = gsub("-", " to ", frequency_bins_data_by_class$frequency_bins) | 490 frequency_bins_data_by_class$frequency_bins = gsub("-", " to ", frequency_bins_data_by_class$frequency_bins) |
487 frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "20", c("frequency_bins")] = "20 or higher" | 491 if (nrow(frequency_bins_data_by_class) > 0) { |
488 frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "0", c("frequency_bins")] = "0 or lower" | 492 frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "20", c("frequency_bins")] = "20 or higher" |
489 | 493 frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "0", c("frequency_bins")] = "0 or lower" |
494 } | |
490 write.table(frequency_bins_data_by_class, "frequency_ranges_classes.txt", sep="\t",quote=F,row.names=F,col.names=T) | 495 write.table(frequency_bins_data_by_class, "frequency_ranges_classes.txt", sep="\t",quote=F,row.names=F,col.names=T) |
491 | 496 |
492 frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match", "best_match_class", "frequency_bins")]) | 497 frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match", "best_match_class", "frequency_bins")]) |
493 | 498 |
494 frequency_bins_sum = data.frame(data.table(dat)[, list(class_sum=sum(.N)), by=c("best_match")]) | 499 frequency_bins_sum = data.frame(data.table(dat)[, list(class_sum=sum(.N)), by=c("best_match")]) |
497 | 502 |
498 frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2) | 503 frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2) |
499 | 504 |
500 frequency_bins_data = frequency_bins_data[order(frequency_bins_data$best_match, frequency_bins_data$frequency_bins),] | 505 frequency_bins_data = frequency_bins_data[order(frequency_bins_data$best_match, frequency_bins_data$frequency_bins),] |
501 frequency_bins_data$frequency_bins = gsub("-", " to ", frequency_bins_data$frequency_bins) | 506 frequency_bins_data$frequency_bins = gsub("-", " to ", frequency_bins_data$frequency_bins) |
502 frequency_bins_data[frequency_bins_data$frequency_bins == "20", c("frequency_bins")] = "20 or higher" | 507 if (nrow(frequency_bins_data) > 0) { |
503 frequency_bins_data[frequency_bins_data$frequency_bins == "0", c("frequency_bins")] = "0 or lower" | 508 frequency_bins_data[frequency_bins_data$frequency_bins == "20", c("frequency_bins")] = "20 or higher" |
504 | 509 frequency_bins_data[frequency_bins_data$frequency_bins == "0", c("frequency_bins")] = "0 or lower" |
510 } | |
505 write.table(frequency_bins_data, "frequency_ranges_subclasses.txt", sep="\t",quote=F,row.names=F,col.names=T) | 511 write.table(frequency_bins_data, "frequency_ranges_subclasses.txt", sep="\t",quote=F,row.names=F,col.names=T) |
506 | 512 |
507 | 513 |
508 | 514 |
509 | 515 |