annotate check_unique_id.r @ 81:b6f9a640e098 draft

Uploaded
author davidvanzessen
date Fri, 19 Feb 2021 15:10:54 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
81
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
1 args <- commandArgs(trailingOnly = TRUE) #first argument must be the summary file so it can grab the
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
2
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
3 current_file = args[1]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
4
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
5 current = read.table(current_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="", check.names=F)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
6
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
7 if(!("Sequence number" %in% names(current))){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
8 stop("First argument doesn't contain the 'Sequence number' column")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
9 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
10
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
11 tbl = table(current[,"Sequence ID"])
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
12 l_tbl = length(tbl)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
13 check = any(tbl > 1)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
14
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
15 #if(l_tbl != nrow(current)){ # non unique IDs?
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
16 if(check){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
17 print("Sequence.ID is not unique for every sequence, adding sequence number to IDs")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
18 for(i in 1:length(args)){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
19 current_file = args[i]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
20 print(paste("Appending 'Sequence number' column to 'Sequence ID' column in", current_file))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
21 current = read.table(current_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="", check.names=F)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
22 current[,"Sequence ID"] = paste(current[,"Sequence ID"], current[,"Sequence number"], sep="_")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
23 write.table(x = current, file = current_file, quote = F, sep = "\t", na = "", row.names = F, col.names = T)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
24 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
25 }