annotate summarize_gff_by_attribute.R @ 4:d7bb42f254a2 draft

"planemo upload commit fa6d4eb0c804e873b60bb2ba3d0e85646c9cd764"
author petr-novak
date Mon, 14 Mar 2022 10:16:07 +0000
parents ea6a3059a6af
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ea6a3059a6af Uploaded
petr-novak
parents:
diff changeset
1 #!/usr/bin/env Rscript
ea6a3059a6af Uploaded
petr-novak
parents:
diff changeset
2 suppressPackageStartupMessages(library(rtracklayer))
ea6a3059a6af Uploaded
petr-novak
parents:
diff changeset
3 g = import(commandArgs(T)[1], format = "GFF")
ea6a3059a6af Uploaded
petr-novak
parents:
diff changeset
4 attribute_name = commandArgs(T)[2]
ea6a3059a6af Uploaded
petr-novak
parents:
diff changeset
5
ea6a3059a6af Uploaded
petr-novak
parents:
diff changeset
6 m = mcols(g)
ea6a3059a6af Uploaded
petr-novak
parents:
diff changeset
7 AN = commandArgs(T)[2]
ea6a3059a6af Uploaded
petr-novak
parents:
diff changeset
8 ## check if attribute is in gff:
ea6a3059a6af Uploaded
petr-novak
parents:
diff changeset
9 if (!AN %in% colnames(m)){
ea6a3059a6af Uploaded
petr-novak
parents:
diff changeset
10 stop(paste0("attribute ", AN," not in input gff\n", "Available attributes are:\n", paste(colnames(m), collapse = "\n ")))
ea6a3059a6af Uploaded
petr-novak
parents:
diff changeset
11 }
ea6a3059a6af Uploaded
petr-novak
parents:
diff changeset
12
ea6a3059a6af Uploaded
petr-novak
parents:
diff changeset
13 w = width(g)
ea6a3059a6af Uploaded
petr-novak
parents:
diff changeset
14 total_lengths = by(w, INDICES=m[,attribute_name] , sum)
ea6a3059a6af Uploaded
petr-novak
parents:
diff changeset
15 total_counts = by(w, INDICES=m[,attribute_name] , length)
ea6a3059a6af Uploaded
petr-novak
parents:
diff changeset
16 total_length_summary = do.call(rbind,as.list(by(w, INDICES=m[,attribute_name] , summary)))
ea6a3059a6af Uploaded
petr-novak
parents:
diff changeset
17 colnames(total_length_summary) = paste0("length.", colnames(total_length_summary))
ea6a3059a6af Uploaded
petr-novak
parents:
diff changeset
18
ea6a3059a6af Uploaded
petr-novak
parents:
diff changeset
19 d = data.frame(attribute = names(total_counts), cbind(count = total_counts, total_length=total_lengths, length=total_length_summary))
ea6a3059a6af Uploaded
petr-novak
parents:
diff changeset
20 colnames(d)[1] = attribute_name
ea6a3059a6af Uploaded
petr-novak
parents:
diff changeset
21 d = d[order(d$total_length, decreasing = TRUE),]
ea6a3059a6af Uploaded
petr-novak
parents:
diff changeset
22 write.table(d, sep = "\t", row.names = FALSE, quote = FALSE, file = commandArgs(T)[3])