Mercurial > repos > workflow4metabolomics > w4mconcatenate
diff fonctions_auxiliaires.R @ 0:c577f13705f2 draft default tip
planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics commit eba1150dad55ad9eca5e93358d9f75e6b6dba2ec
author | workflow4metabolomics |
---|---|
date | Wed, 10 Jul 2024 15:20:11 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fonctions_auxiliaires.R Wed Jul 10 15:20:11 2024 +0000 @@ -0,0 +1,158 @@ +#-------------------------------------------------------------------------------------------------------------------------------------------------------------- +check_features <- function(M1, M2) { + #M1/M2 = data.frame containing sampleMetadata or variableMetadata + #check the variables in the 2 matrices . + #returns the names of the columns in the two metadata + + colnames_1 <- colnames(M1) + colnames_2 <- colnames(M2) + samecolumns <- intersect(colnames_1, colnames_2) + + if (is.null(samecolumns)) { + cat("\nWarning: There are no features in common \n") + } + + return(samecolumns) +} + +#--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +compare_same_columns <- function(M1, M2, same, choice_keep, Keep, tab1, tab2) { + + #M1/M2 = data.frame containing sampleMetadata or variableMetadata + #same = list of column names with the same name in M1 and M2 + #choice_keep = choice of keeping columns with the same no or keeping just one + #keep = keep the column in M1 or M2 + #tab1/tab2 = Suffix for Metadata 1/2 + + #Check that the variables in the 2 matrices have the same values and + #If not, they are renamed or deleted according to the parameters chosen by the user. + #returns the two modified metadata + + compare_results <- list() + non_identical_columns_v <- c() + + + #Creation of 2 sub-tables with shared individuals and variables + common_individuals <- intersect(M1[, 1], M2[, 1]) + common_columns <- intersect(colnames(M1), colnames(M2)) + + M1_common <- subset(M1, M1[, 1] %in% common_individuals) + M2_common <- subset(M2, M2[, 1] %in% common_individuals) + + + M1_common <- M1_common[, common_columns] + M2_common <- M2_common[, common_columns] + + + + common_columns <- common_columns[-1]# delete id column + + + for (col_name in common_columns) { + #Check that the columns are identical, then delete them from M2 + if (!identical(M1_common[[col_name]], M2_common[[col_name]])) { + + non_identical_columns_v <- c(non_identical_columns_v, col_name) + + #otherwise store the columns where the values are not the same in non_identical_columns + } else { + M2 <- M2[, -which(colnames(M2) == col_name)] + } + + } + + #if the list of columns that do not take the same values is null, we return M1/M2 + if (is.null(non_identical_columns_v)) { + + result <- list(M1 = M1, M2 = M2) + + return(result) + + + } else { + + for (non_identical_columns in non_identical_columns_v) { + + #If we decide to keep the 2 columns and they do not take the same values, we change their names by adding a suffix. + + if (choice_keep == "yes") {#keep both columns and give them a new name + + new_name <- paste(tab1, non_identical_columns, sep = "_") + colnames(M1)[colnames(M1) == non_identical_columns] <- new_name + + new_name <- paste(tab2, non_identical_columns, sep = "_") + colnames(M2)[colnames(M2) == non_identical_columns] <- new_name + + } + + if (choice_keep == "no") {#Keep only one and delete the other + if (Keep == 1) { + M2 <- M2[, -which(colnames(M2) == non_identical_columns)] + new_name <- paste(tab1, non_identical_columns, sep = "_") + colnames(M1)[colnames(M1) == non_identical_columns] <- new_name + + } + if (Keep == 2) { + M1 <- M1[, -which(colnames(M1) == non_identical_columns)] + new_name <- paste(tab2, non_identical_columns, sep = "_") + colnames(M2)[colnames(M2) == non_identical_columns] <- new_name + } + + } + } + } + + + result <- list(M1 = M1, M2 = M2) + return(result) + +} +#--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +match2_bis <- function(Metadata_1, Metadata_2, Mtype) { + + + #Metadata1/Metadata2 = data.frame containing sampleMetadata or variableMetadata + #Mtype = "sample" or "variable" depending on Metadata content + #To check if metadata1 and metadata2 match regarding identifiers + #returns a vector containing an error message if the identifiers are not all the same in the two metadatas + err.stock <- NULL#error vector + + + id2 <- Metadata_1[, 1] + id1 <- Metadata_2[, 1] + + if (length(which(id1 %in% id2)) != length(id1) || length(which(id2 %in% id1)) != length(id2)) { + err.stock <- c("\n", Mtype, "Metadata_1 and ", Mtype, "Metadata_2 do not match regarding Metadata_2 identifiers.") + if (length(which(id1 %in% id2)) != length(id1)) { + if (length(which(! (id1 %in% id2))) < 4) { + err.stock <- c(err.stock, "\n The ") + } else { + err.stock <- c(err.stock, "\n For example, the ") + } + err.stock <- c(err.stock, "following identifiers found in the ", Mtype, "Metadata_1 file\n", + " do not appear in the ", Mtype, " Metadata_2 file:\n") + identif <- id1[which(! (id1 %in% id2))][seq_len(min(3, length(which(! (id1 %in% id2)))))] + err.stock <- c(err.stock, " ", paste(identif, collapse = "\n "), "\n") + } + if (length(which(id2 %in% id1)) != length(id2)) { + if (length(which(! (id2 %in% id1))) < 4) { + err.stock <- c(err.stock, "\n The ") + } else { + err.stock <- c(err.stock, "\n For example, the ") + } + err.stock <- c(err.stock, "following identifiers found in the ", Mtype, " Metadata_2 file\n", + " do not appear in the", Mtype, " Metadata_1 file:\n") + identif <- id2[which(! (id2 %in% id1))][seq_len(min(3, length(which(! (id2 %in% id1)))))] + err.stock <- c(err.stock, " ", paste(identif, collapse = "\n "), "\n") + } + err.stock <- c(err.stock, "\nPlease check your data.\n") + } + + return(err.stock) + +}