annotate summarize_gff_by_attribute.R @ 2:3f8ae272f4f3 draft

Uploaded
author petr-novak
date Thu, 07 Oct 2021 07:29:59 +0000
parents cf3cea0a3039
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
1 #!/usr/bin/env Rscript
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
2 suppressPackageStartupMessages(library(rtracklayer))
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
3 g = import(commandArgs(T)[1])
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
4 attribute_name = commandArgs(T)[2]
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
5
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
6 m = mcols(g)
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
7 w = width(g)
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
8 total_lengths = by(w, INDICES=m[,attribute_name] , sum)
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
9 total_counts = by(w, INDICES=m[,attribute_name] , length)
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
10 d = data.frame(attribute = names(total_counts), cbind(counts = total_counts, length=total_lengths))
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
11 colnames(d)[1] = attribute_name
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
12 d = d[order(d$length, decreasing = TRUE),]
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
13 write.table(d, sep = "\t", row.names = FALSE, quote = FALSE)