comparison get_length_and_gc_content.r @ 7:15ce6435ab83 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
author mvdbeek
date Thu, 25 Feb 2016 06:09:34 -0500
parents d4b5942ed347
children 7f8d888e3355
comparison
equal deleted inserted replaced
6:d4b5942ed347 7:15ce6435ab83
19 FASTAfile = args$fasta 19 FASTAfile = args$fasta
20 output = args$output 20 output = args$output
21 21
22 #Load the annotation and reduce it 22 #Load the annotation and reduce it
23 GTF <- import.gff(GTFfile, format="gtf", genome=NA, feature.type="exon") 23 GTF <- import.gff(GTFfile, format="gtf", genome=NA, feature.type="exon")
24 grl <- reduce(split(GTF, elementMetadata(GTF)$gene_name)) 24 grl <- reduce(split(GTF, elementMetadata(GTF)$gene_id))
25 reducedGTF <- unlist(grl, use.names=T) 25 reducedGTF <- unlist(grl, use.names=T)
26 elementMetadata(reducedGTF)$gene_name <- rep(names(grl), elementLengths(grl)) 26 elementMetadata(reducedGTF)$gene_id <- rep(names(grl), elementLengths(grl))
27 27
28 #Open the fasta file 28 #Open the fasta file
29 FASTA <- FaFile(FASTAfile) 29 FASTA <- FaFile(FASTAfile)
30 open(FASTA) 30 open(FASTA)
31 31
37 calc_GC_length <- function(x) { 37 calc_GC_length <- function(x) {
38 nGCs = sum(elementMetadata(x)$nGCs) 38 nGCs = sum(elementMetadata(x)$nGCs)
39 width = sum(elementMetadata(x)$widths) 39 width = sum(elementMetadata(x)$widths)
40 c(width, nGCs/width) 40 c(width, nGCs/width)
41 } 41 }
42 output <- t(sapply(split(reducedGTF, elementMetadata(reducedGTF)$gene_name), calc_GC_length)) 42 output <- t(sapply(split(reducedGTF, elementMetadata(reducedGTF)$gene_id), calc_GC_length))
43 colnames(output) <- c("Length", "GC") 43 colnames(output) <- c("Length", "GC")
44 44
45 write.table(output, file="GC_lengths.tsv", sep="\t") 45 write.table(output, file="GC_lengths.tsv", sep="\t")