Mercurial > repos > workflow4metabolomics > w4mconcatenate
comparison concatenation.R @ 0:c577f13705f2 draft default tip
planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics commit eba1150dad55ad9eca5e93358d9f75e6b6dba2ec
| author | workflow4metabolomics |
|---|---|
| date | Wed, 10 Jul 2024 15:20:11 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:c577f13705f2 |
|---|---|
| 1 if (FALSE) { | |
| 2 rm(list = ls()) | |
| 3 | |
| 4 DM1 <- data.frame(data = c("5d_-kkcùf", "npèt", "5PY4(*3"), | |
| 5 `j 785` = c(0.356426723610756, 0.801750949101246, 0.875199970420953), | |
| 6 `y54j 68y4j6` = c(0.380152310071702, 0.535593104115636, 0.0825428101366147), | |
| 7 `5-6 4` = c(0.0306944207412024, 0.258351312473067, 0.253659010703906), | |
| 8 hrrrrh = c(0.334137638848017, 0.599475573145688, 0.507762246807195), | |
| 9 `5h -` = c(0.298147485608469, 0.0763319665667417, 0.856444177031262)) | |
| 10 | |
| 11 DM2 <- data.frame(data = c("5d_-kkcùf", "npèt", "5PY4(*3"), | |
| 12 `j 785` = c(0.356426723610756, 0.801750949101246, 0.875199970420953), | |
| 13 `y54j 68y4j6` = c(0.380152310071702, 0.535593104115636, 0.0825428101366147), | |
| 14 `5-6 4` = c(0.0306944207412024, 0.258351312473067, 0.253659010703906), | |
| 15 hrrrrh = c(0.334137638848017, 0.599475573145688, 0.507762246807195), | |
| 16 `5h -` = c(0.298147485608469, 0.0763319665667417, 0.856444177031262)) | |
| 17 | |
| 18 M1 <- data.frame(samplename = c("j 785", "y54j 68y4j6", "5-6 4", "hrrrrh", "5h -"), | |
| 19 ABD = c(19, 24, 2, 3, "y"), E = c(9, "p0", 45, 24, 29), | |
| 20 AAA = c("r", "bg", "il", "d", "b"), | |
| 21 fp = c("pj", "z", "e", "r", "t"), | |
| 22 uv = c("s", "d", "f", "s", "d")) | |
| 23 | |
| 24 M2 <- data.frame(samplename = c("j 785", "y54j 68y4j6", "5-6 4", "hrrrrh", "5h -"), | |
| 25 ABD = c(19, 24, 2, 3, "y"), E = c(9, "ici", 45, 24, 29), | |
| 26 AAA = c("r", "bg", "il", "d", "b"), | |
| 27 fp = c("pj", "z", "e", "r", "t"), | |
| 28 uv = c("s", "d", "f", "s", "d")) | |
| 29 type <- "sample" | |
| 30 concatenation <- "unique" | |
| 31 tab1 <- "tab1" | |
| 32 tab2 <- "tab2" | |
| 33 choice_keep <- "oui" | |
| 34 keep <- 0 | |
| 35 concat(DM1, M1, DM2, M2, type, tab1, tab2, concatenation, choice_keep, keep) | |
| 36 } | |
| 37 | |
| 38 ################################################################################################################# | |
| 39 | |
| 40 | |
| 41 concat <- function(DM1, M1, DM2, M2, type, tab1, tab2, concatenation, choice_keep, keep) { | |
| 42 | |
| 43 | |
| 44 | |
| 45 #DM1/DM2 = data.frame containing data Matrix | |
| 46 #M1/M2 = data.frame containing sample Metadata or variable Metadata | |
| 47 #type = "sample" or "variable" depending on Metadata content | |
| 48 #tab1/tab2 = Suffix for Metadata 1/2 | |
| 49 #concatenation = type of concatenation | |
| 50 #choice_keep = choice of keeping columns with the same no or keeping just one | |
| 51 #keep = keep the column in M1 or M2 | |
| 52 #returns the concatenated metadata and the two Data Matrix | |
| 53 | |
| 54 identifiers_1 <- colnames(M1)[1] | |
| 55 identifiers_2 <- colnames(M2)[1] | |
| 56 | |
| 57 err.stock <- NULL | |
| 58 | |
| 59 #Concatenation------------------------------------------------------------------ | |
| 60 | |
| 61 #If Metadatas is Sample_Metadata we transpose | |
| 62 if (type == "sample") { | |
| 63 | |
| 64 rownames(DM1) <- DM1[, 1] | |
| 65 corner_DM1 <- colnames(DM1)[1] | |
| 66 DM1 <- DM1[, -1, drop = FALSE] | |
| 67 DM1 <- t(DM1) | |
| 68 DM1 <- data.frame(sample = row.names(DM1), DM1, check.names = FALSE) | |
| 69 rownames(DM1) <- NULL | |
| 70 | |
| 71 rownames(DM2) <- DM2[, 1] | |
| 72 corner_DM2 <- colnames(DM2)[1] | |
| 73 DM2 <- DM2[, -1, drop = FALSE] | |
| 74 DM2 <- t(DM2) | |
| 75 DM2 <- data.frame(sample = row.names(DM2), DM2, check.names = FALSE) | |
| 76 rownames(DM2) <- NULL | |
| 77 } | |
| 78 | |
| 79 #Add order of sample and Sort by order | |
| 80 | |
| 81 | |
| 82 M1$order1 <- seq(1, nrow(M1)) | |
| 83 M2$order2 <- seq(nrow(M1) + 1, nrow(M2) + nrow(M1)) | |
| 84 | |
| 85 M1_bf <- M1[order(M1[, 1]), ] | |
| 86 M2_bf <- M2[order(M2[, 1]), ] | |
| 87 | |
| 88 | |
| 89 #Check the variables in common and extract them. | |
| 90 | |
| 91 | |
| 92 same <- check_features(M1_bf, M2_bf) | |
| 93 same <- same[- which(same == identifiers_1)] | |
| 94 | |
| 95 #Check that shared variables have the same values. | |
| 96 #If not, they are renamed or deleted according to the parameters chosen by the user. | |
| 97 result2 <- compare_same_columns(M1_bf, M2_bf, same, choice_keep, keep, tab1, tab2) | |
| 98 M1 <- result2$M1 | |
| 99 M2 <- result2$M2 | |
| 100 | |
| 101 #Unique-------------------------------------------------------------------------- | |
| 102 if (concatenation == "unique") { | |
| 103 #Table match check | |
| 104 #We verify that the individuals are all the same | |
| 105 err.stock <- match2_bis(M1, M2, type) | |
| 106 check_err(err.stock) | |
| 107 M_merge <- merge(M1, M2, by = 1) | |
| 108 } | |
| 109 | |
| 110 | |
| 111 #Intersection-------------------------------------------------------------------- | |
| 112 | |
| 113 if (concatenation == "intersection") { | |
| 114 | |
| 115 #select individuals in common | |
| 116 sample_common <- intersect(M1[, 1], M2[, 1]) | |
| 117 | |
| 118 #if the list of individuals in common is null, an error message is sent | |
| 119 if (length(sample_common) == 0) { | |
| 120 err.stock <- c(err.stock, "\nThere are no individuals in common \n") | |
| 121 check_err(err.stock) | |
| 122 } | |
| 123 #if the list of individuals in common is less than 5, then a Warning message is sent | |
| 124 if (length(sample_common) < 5) { | |
| 125 cat("\nWarning: Less than 5 individuals in common\n") | |
| 126 } | |
| 127 M_merge <- merge(M1, M2, by = 1) | |
| 128 } | |
| 129 | |
| 130 #Union -------------------------------------------------------------------------- | |
| 131 if (concatenation == "union") { | |
| 132 | |
| 133 #select common ids | |
| 134 id_common <- intersect(M1[, 1], M2[, 1]) | |
| 135 | |
| 136 if (is.null(id_common)) { | |
| 137 cat("\nT Warning : there are no individuals in common\n") | |
| 138 } | |
| 139 | |
| 140 M2_common <- M2[M2[, 1] %in% id_common, ] | |
| 141 #Store rows with individuals belonging only to M2 | |
| 142 M2_specifique <- M2[! M2[, 1] %in% id_common, ] | |
| 143 #Merge the two tables only with the samples not in common | |
| 144 M_merge <- bind_rows(M1, M2_specifique) | |
| 145 col_names <- colnames(M2_common) | |
| 146 col_names <- col_names[- which(col_names == identifiers_2)] | |
| 147 feature_common <- check_features(M_merge, M2_bf) | |
| 148 #Check if M_merge and M2_bf have columns in common. If so, complete the table with the values not taken. | |
| 149 if (!is.null(feature_common)) { | |
| 150 | |
| 151 identifiers_3 <- M2_specifique[, 1] | |
| 152 #We select the value in M2_bf, the M2 table before undergoing any changes, then insert it in the M_merge table. | |
| 153 for (feature in feature_common) { | |
| 154 for (id in identifiers_3) { | |
| 155 | |
| 156 index_row <- which(M2_bf[, 1] == id) | |
| 157 index_col <- which(colnames(M2_bf) == feature) | |
| 158 new_value <- M2_bf[index_row, index_col] | |
| 159 index_row <- which(M_merge[, 1] == id) | |
| 160 index_col <- which(colnames(M_merge) == feature) | |
| 161 M_merge[index_row, index_col] <- new_value | |
| 162 | |
| 163 } | |
| 164 } | |
| 165 } | |
| 166 #Fill in the table with common values | |
| 167 for (col in col_names) { | |
| 168 for (id in id_common) { | |
| 169 index_row <- which(M2_common[, 1] == id) | |
| 170 index_col <- which(colnames(M2_common) == col) | |
| 171 new_value <- M2_common[index_row, index_col] | |
| 172 index_row <- which(M_merge[, 1] == id) | |
| 173 index_col <- which(colnames(M_merge) == col) | |
| 174 M_merge[index_row, index_col] <- new_value | |
| 175 | |
| 176 } | |
| 177 } | |
| 178 } | |
| 179 M_merge_sort <- M_merge[order(M_merge$order1, M_merge$order2), ] | |
| 180 M_merge_sort <- M_merge_sort[, - which(colnames(M_merge_sort) == "order1")] | |
| 181 M_merge_sort <- M_merge_sort[, - which(colnames(M_merge_sort) == "order2")] | |
| 182 #DataMatrix --------------------------------------------------------------------- | |
| 183 | |
| 184 colnames_1 <- colnames(DM1) | |
| 185 colnames_2 <- colnames(DM2) | |
| 186 #Unique ------------------------------------------------------------------------- | |
| 187 | |
| 188 if (concatenation == "unique") { | |
| 189 | |
| 190 if (type == "sample") { | |
| 191 | |
| 192 rownames(DM1) <- DM1[, 1] | |
| 193 DM1 <- DM1[, -1] | |
| 194 DM1 <- t(DM1) | |
| 195 DM1 <- data.frame(sample = row.names(DM1), DM1, check.names = FALSE) | |
| 196 colnames(DM1)[1] <- corner_DM1 | |
| 197 rownames(DM1) <- NULL | |
| 198 | |
| 199 rownames(DM2) <- DM2[, 1] | |
| 200 DM2 <- DM2[, -1, drop = FALSE] | |
| 201 DM2 <- t(DM2) | |
| 202 DM2 <- data.frame(sample = row.names(DM2), DM2, check.names = FALSE) | |
| 203 colnames(DM2)[1] <- corner_DM2 | |
| 204 rownames(DM2) <- NULL | |
| 205 } | |
| 206 result <- list(M_merge_sort = M_merge_sort, DM1 = DM1, DM2 = DM2) | |
| 207 return(result) | |
| 208 } | |
| 209 | |
| 210 #Intersection-------------------------------------------------------------------- | |
| 211 | |
| 212 if (concatenation == "intersection") { | |
| 213 | |
| 214 id_in_common <- intersect(DM1[, 1], DM2[, 1]) | |
| 215 | |
| 216 DM1_filter <- subset(DM1, DM1[, 1] %in% id_in_common) | |
| 217 DM2_filter <- subset(DM2, DM2[, 1] %in% id_in_common) | |
| 218 | |
| 219 if (type == "sample") { | |
| 220 | |
| 221 rownames(DM1_filter) <- DM1_filter[, 1] | |
| 222 DM1_filter <- DM1_filter[, -1] | |
| 223 DM1_filter <- t(DM1_filter) | |
| 224 DM1_filter <- data.frame(sample = row.names(DM1_filter), DM1_filter, check.names = FALSE) | |
| 225 colnames(DM1_filter)[1] <- corner_DM1 | |
| 226 rownames(DM1_filter) <- NULL | |
| 227 | |
| 228 rownames(DM2_filter) <- DM2_filter[, 1] | |
| 229 DM2_filter <- DM2_filter[, -1, drop = FALSE] | |
| 230 DM2_filter <- t(DM2_filter) | |
| 231 DM2_filter <- data.frame(sample = row.names(DM2_filter), DM2_filter, check.names = FALSE) | |
| 232 colnames(DM2_filter)[1] <- corner_DM2 | |
| 233 rownames(DM2_filter) <- NULL | |
| 234 } | |
| 235 result <- list(M_merge_sort = M_merge_sort, DM1 = DM1_filter, DM2 = DM2_filter) | |
| 236 return(result) | |
| 237 } | |
| 238 | |
| 239 #Union -------------------------------------------------------------------------- | |
| 240 | |
| 241 if (concatenation == "union") { | |
| 242 | |
| 243 common_individuals <- intersect(DM1[, 1], DM2[, 1]) | |
| 244 common_columns <- intersect(colnames_1, colnames_2) | |
| 245 #check whether there are individuals or variables in common | |
| 246 if (is.null(common_individuals) || is.null(common_columns)) { | |
| 247 | |
| 248 comparison_result <- FALSE | |
| 249 #If the individuals in common take the same values for all variables, then comparison_result=TRUE | |
| 250 } else { | |
| 251 | |
| 252 DM1_common <- subset(DM1, DM1[, 1] %in% common_individuals) | |
| 253 DM2_common <- subset(DM2, DM2[, 1] %in% common_individuals) | |
| 254 DM1_common <- DM1_common[, common_columns, drop = FALSE] | |
| 255 DM2_common <- DM2_common[, common_columns, drop = FALSE] | |
| 256 | |
| 257 for (col in common_columns) { | |
| 258 comparison_result <- identical(DM1_common$col, DM2_common$col) | |
| 259 } | |
| 260 | |
| 261 } | |
| 262 | |
| 263 if (comparison_result) { | |
| 264 | |
| 265 | |
| 266 DM1$order1 <- seq(1, nrow(DM1)) | |
| 267 DM2$order2 <- seq(nrow(DM1) + 1, nrow(DM2) + nrow(DM1)) | |
| 268 DM1_sort <- DM1[order(DM1[, 1]), ] | |
| 269 DM2_sort <- DM2[order(DM2[, 1]), ] | |
| 270 id_in_common <- intersect(DM1[, 1], DM2[, 1]) | |
| 271 DM1_filter <- subset(DM1, DM1[, 1] %in% id_in_common) | |
| 272 DM2_filter <- subset(DM2, DM2[, 1] %in% id_in_common) | |
| 273 different_DM2 <- colnames_2[! colnames_2 %in% colnames_1] | |
| 274 DM2_specifique <- DM2[! DM2[, 1] %in% id_in_common, ] | |
| 275 #Merge the two tables only with the samples not in common | |
| 276 DM1_merge <- bind_rows(DM1, DM2_specifique) | |
| 277 | |
| 278 | |
| 279 #Deletion of columns present only in DM2 | |
| 280 DM1_merge <- DM1_merge[, ! names(DM1_merge) %in% different_DM2] | |
| 281 different_DM1 <- colnames_1[! colnames_1 %in% colnames_2] | |
| 282 DM1_specifique <- DM1[! DM1[, 1] %in% id_in_common, ] | |
| 283 #Merge the two tables only with the samples not in common | |
| 284 DM2_merge <- bind_rows(DM2, DM1_specifique) | |
| 285 #Deletion of columns present only in DM2 | |
| 286 DM2_merge <- DM2_merge[, ! names(DM2_merge) %in% different_DM1] | |
| 287 #DM2_merge | |
| 288 | |
| 289 | |
| 290 DM1_merge_sort <- DM1_merge[order(DM1_merge$order1, DM1_merge$order2), ] | |
| 291 DM1_merge_sort <- DM1_merge_sort[, - which(colnames(DM1_merge_sort) == "order1")] | |
| 292 DM1_merge_sort <- DM1_merge_sort[, - which(colnames(DM1_merge_sort) == "order2")] | |
| 293 | |
| 294 DM2_merge_sort <- DM2_merge[order(DM2_merge$order1, DM2_merge$order2), ] | |
| 295 DM2_merge_sort <- DM2_merge_sort[, - which(colnames(DM2_merge_sort) == "order1")] | |
| 296 DM2_merge_sort <- DM2_merge_sort[, - which(colnames(DM2_merge_sort) == "order2")] | |
| 297 | |
| 298 | |
| 299 if (type == "sample") { | |
| 300 | |
| 301 rownames(DM1_merge_sort) <- DM1_merge_sort[, 1] | |
| 302 DM1_merge_sort <- DM1_merge_sort[, -1] | |
| 303 DM1_merge_sort <- t(DM1_merge_sort) | |
| 304 DM1_merge_sort <- data.frame(sample = row.names(DM1_merge_sort), DM1_merge_sort, check.names = FALSE) | |
| 305 colnames(DM1_merge_sort)[1] <- corner_DM1 | |
| 306 rownames(DM1_merge_sort) <- NULL | |
| 307 | |
| 308 rownames(DM2_merge_sort) <- DM2_merge_sort[, 1] | |
| 309 DM2_merge_sort <- DM2_merge_sort[, -1, drop = FALSE] | |
| 310 DM2_merge_sort <- t(DM2_merge_sort) | |
| 311 DM2_merge_sort <- data.frame(sample = row.names(DM2_merge_sort), DM2_merge_sort, check.names = FALSE) | |
| 312 colnames(DM2_merge_sort)[1] <- corner_DM2 | |
| 313 rownames(DM2_merge_sort) <- NULL | |
| 314 } | |
| 315 | |
| 316 result <- list(M_merge_sort = M_merge_sort, DM1 = DM1_merge_sort, DM2 = DM2_merge_sort) | |
| 317 return(result) | |
| 318 | |
| 319 } else { | |
| 320 #selects line ids that are in DM2 and not in DM1 | |
| 321 id_diff_1 <- setdiff(DM2[, 1], DM1[, 1]) | |
| 322 #we store them in a dataframe | |
| 323 row_add_1 <- data.frame(id = id_diff_1) | |
| 324 #renames columns with their names in DM1 | |
| 325 colnames(row_add_1)[1] <- colnames(DM1)[1] | |
| 326 #Merge | |
| 327 DM1_add <- bind_rows(DM1, row_add_1) | |
| 328 id_diff_2 <- setdiff(DM1[, 1], DM2[, 1]) | |
| 329 row_add_2 <- data.frame(id = id_diff_2) | |
| 330 colnames(row_add_2)[1] <- colnames(DM2)[1] | |
| 331 DM2_add <- bind_rows(DM2, row_add_2) | |
| 332 | |
| 333 if (type == "sample") { | |
| 334 rownames(DM1_add) <- DM1_add[, 1] | |
| 335 DM1_add <- DM1_add[, -1] | |
| 336 DM1_add <- t(DM1_add) | |
| 337 DM1_add <- data.frame(sample = row.names(DM1_add), DM1_add, check.names = FALSE) | |
| 338 colnames(DM1_add)[1] <- corner_DM1 | |
| 339 rownames(DM1_add) <- NULL | |
| 340 rownames(DM2_add) <- DM2_add[, 1] | |
| 341 DM2_add <- DM2_add[, -1, drop = FALSE] | |
| 342 DM2_add <- t(DM2_add) | |
| 343 DM2_add <- data.frame(sample = row.names(DM2_add), DM2_add, check.names = FALSE) | |
| 344 colnames(DM2_add)[1] <- corner_DM2 | |
| 345 rownames(DM2_add) <- NULL | |
| 346 } | |
| 347 result <- list(M_merge_sort = M_merge_sort, DM1 = DM1_add, DM2 = DM2_add) | |
| 348 return(result) | |
| 349 | |
| 350 } | |
| 351 } | |
| 352 } |
