comparison graph_homogeneity_normality.r @ 0:fb7b2cbd80bb draft default tip

"planemo upload for repository https://github.com/Marie59/Data_explo_tools commit 60627aba07951226c8fd6bb3115be4bd118edd4e"
author ecology
date Fri, 13 Aug 2021 18:17:38 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:fb7b2cbd80bb
1 #Rscript
2
3 #######################################
4 ## Homogeneity and normality ##
5 #######################################
6
7 #####Packages : car
8 # ggplot2
9 # ggpubr
10 # Cowplot
11
12 #####Load arguments
13
14 args <- commandArgs(trailingOnly = TRUE)
15
16 if (length(args) == 0) {
17 stop("This tool needs at least one argument")
18 }else{
19 table <- args[1]
20 hr <- args[2]
21 date <- as.numeric(args[3])
22 spe <- as.numeric(args[4])
23 var <- as.numeric(args[5])
24 }
25
26 if (hr == "false") {
27 hr <- FALSE
28 }else{
29 hr <- TRUE
30 }
31
32 #####Import data
33 data <- read.table(table, sep = "\t", dec = ".", header = hr, fill = TRUE, encoding = "UTF-8")
34 data <- na.omit(data)
35 coldate <- colnames(data)[date]
36 colspe <- colnames(data)[spe]
37 colvar <- colnames(data)[var]
38
39 #####Your analysis
40
41 ####Homogeneity of the variance####
42
43 ##Test of Levene##
44 testlevene <- function(data, col1, col2) {
45 data[, col1] <- as.numeric(data[, col1])
46 data[, col2] <- as.factor(data[, col2])
47 tb_levene <- car::leveneTest(y = data[, col1], group = data[, col2])
48
49 return(tb_levene)
50 }
51 levene <- capture.output(testlevene(data = data, col1 = colvar, col2 = colspe))
52
53 cat("\nwrite table with levene test. \n--> \"", paste(levene, "\"\n", sep = ""), file = "levene.txt", sep = "", append = TRUE)
54
55 ##Two boxplots to visualize it##
56
57 homog_var <- function(data, col1, col2, col3, mult) {
58 data[, col1] <- as.factor(data[, col1])
59 if (mult) {
60 for (spe in unique(data[, col2])) {
61 data_cut <- data[data[, col2] == spe, ]
62 graph_2 <- ggplot2::ggplot(data_cut, ggplot2::aes_string(x = col1, y = col3, color = col1)) +
63 ggplot2::geom_boxplot() +
64 ggplot2::theme(legend.position = "none", axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1), panel.background = ggplot2::element_rect(fill = "#d9d4c5", colour = "#d9d4c5", linetype = "solid"),
65 panel.grid.major = ggplot2::element_line(linetype = "solid", colour = "white"),
66 panel.grid.minor = ggplot2::element_line(linetype = "solid", colour = "white"))
67
68 ggplot2::ggsave(paste("Homogeneity_of_", spe, ".png"), graph_2, width = 16, height = 9, units = "cm")
69 }
70 }else{
71 graph_1 <- ggplot2::ggplot(data, ggplot2::aes_string(x = col1, y = col3, color = col1)) +
72 ggplot2::geom_boxplot() +
73 ggplot2::theme(legend.position = "none", axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1))
74
75 #Put multiple panels
76 graph_2 <- graph_1 + ggplot2::facet_grid(rows = ggplot2::vars(data[, col2]), scales = "free") +
77 ggplot2::theme(panel.background = ggplot2::element_rect(fill = "#d9d4c5", colour = "#d9d4c5", linetype = "solid"),
78 panel.grid.major = ggplot2::element_line(linetype = "solid", colour = "white"),
79 panel.grid.minor = ggplot2::element_line(linetype = "solid", colour = "white"))
80
81 ggplot2::ggsave("Homogeneity.png", graph_2, width = 16, height = 9, units = "cm")
82 }
83 }
84
85 ####Normality of the distribution####
86 # Kolmogorov-Smirnov test
87
88 ks <- capture.output(ks.test(x = data[, var], y = "pnorm", alternative = "two.sided"))
89
90 cat("\nwrite table with Kolmogorov-Smirnov test. \n--> \"", paste(ks, "\"\n", sep = ""), file = "ks.txt", sep = "", append = TRUE)
91
92 #Histogramm with distribution line
93 graph_hist <- function(data, var1) {
94 graph_hist <- ggplot2::ggplot(data) +
95 ggplot2::geom_histogram(ggplot2::aes_string(x = var1), binwidth = 2, color = "black", fill = "white") +
96 ggplot2::geom_density(ggplot2::aes_string(var1), alpha = 0.12, fill = "red") +
97 ggplot2::ggtitle("Distribution histogram")
98
99 return(graph_hist)
100 }
101
102 #Add the mean dashed line
103 add_mean <- function(graph, var1) {
104 graph_mean <- graph + ggplot2::geom_vline(xintercept = mean(data[, var1]),
105 color = "midnightblue", linetype = "dashed", size = 1)
106
107 return(graph_mean)
108 }
109
110 #Adding a QQplot
111 graph_qqplot <- function(data, var1) {
112 graph2 <- ggpubr::ggqqplot(data, var1, color = "midnightblue") + ggplot2::ggtitle("Q-Q plot")
113
114 return(graph2)
115 }
116
117 #On suppose que les données sont distribuées normalement lorsque les points suivent approximativement la ligne de référence à 45 degrés.
118
119 graph_fin <- function(graph1, graph2) {
120 graph <- cowplot::plot_grid(graph1, graph2, ncol = 2, nrow = 1)
121
122 ggplot2::ggsave("Normal_distribution.png", graph, width = 10, height = 7, units = "cm")
123 }
124
125 mult <- ifelse(length(unique(data[, colspe])) == 2, FALSE, TRUE)
126 homog_var(data, col1 = coldate, col2 = colspe, col3 = colvar, mult = mult)
127
128 graph_hist1 <- graph_hist(data, var1 = colvar)
129 graph_mean <- add_mean(graph = graph_hist1, var1 = colvar)
130 graph_fin(graph1 = graph_mean, graph2 = graph_qqplot(data, var1 = colvar))