Mercurial > repos > davidvanzessen > shm_csr
diff check_unique_id.r @ 81:b6f9a640e098 draft
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 19 Feb 2021 15:10:54 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/check_unique_id.r Fri Feb 19 15:10:54 2021 +0000 @@ -0,0 +1,25 @@ +args <- commandArgs(trailingOnly = TRUE) #first argument must be the summary file so it can grab the + +current_file = args[1] + +current = read.table(current_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="", check.names=F) + +if(!("Sequence number" %in% names(current))){ + stop("First argument doesn't contain the 'Sequence number' column") +} + +tbl = table(current[,"Sequence ID"]) +l_tbl = length(tbl) +check = any(tbl > 1) + +#if(l_tbl != nrow(current)){ # non unique IDs? +if(check){ + print("Sequence.ID is not unique for every sequence, adding sequence number to IDs") + for(i in 1:length(args)){ + current_file = args[i] + print(paste("Appending 'Sequence number' column to 'Sequence ID' column in", current_file)) + current = read.table(current_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="", check.names=F) + current[,"Sequence ID"] = paste(current[,"Sequence ID"], current[,"Sequence number"], sep="_") + write.table(x = current, file = current_file, quote = F, sep = "\t", na = "", row.names = F, col.names = T) + } +}