Mercurial > repos > petr-novak > repeat_annotation_pipeline3
comparison summarize_gff_by_attribute.R @ 0:ea6a3059a6af draft
Uploaded
author | petr-novak |
---|---|
date | Mon, 18 Oct 2021 11:01:20 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ea6a3059a6af |
---|---|
1 #!/usr/bin/env Rscript | |
2 suppressPackageStartupMessages(library(rtracklayer)) | |
3 g = import(commandArgs(T)[1], format = "GFF") | |
4 attribute_name = commandArgs(T)[2] | |
5 | |
6 m = mcols(g) | |
7 AN = commandArgs(T)[2] | |
8 ## check if attribute is in gff: | |
9 if (!AN %in% colnames(m)){ | |
10 stop(paste0("attribute ", AN," not in input gff\n", "Available attributes are:\n", paste(colnames(m), collapse = "\n "))) | |
11 } | |
12 | |
13 w = width(g) | |
14 total_lengths = by(w, INDICES=m[,attribute_name] , sum) | |
15 total_counts = by(w, INDICES=m[,attribute_name] , length) | |
16 total_length_summary = do.call(rbind,as.list(by(w, INDICES=m[,attribute_name] , summary))) | |
17 colnames(total_length_summary) = paste0("length.", colnames(total_length_summary)) | |
18 | |
19 d = data.frame(attribute = names(total_counts), cbind(count = total_counts, total_length=total_lengths, length=total_length_summary)) | |
20 colnames(d)[1] = attribute_name | |
21 d = d[order(d$total_length, decreasing = TRUE),] | |
22 write.table(d, sep = "\t", row.names = FALSE, quote = FALSE, file = commandArgs(T)[3]) |