# HG changeset patch # User iuc # Date 1647007691 0 # Node ID e3ba567abdf5f0530dea3c4510ad7b5bd56495f1 # Parent f088370d2a3c8d366524a688b0c0223a40aa48e7 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/length_and_gc_content commit 7b6b07d22f3e6fed77b2c237de2b0d96fa939711" diff -r f088370d2a3c -r e3ba567abdf5 get_length_and_gc_content.r --- a/get_length_and_gc_content.r Sun Jan 28 04:04:58 2018 -0500 +++ b/get_length_and_gc_content.r Fri Mar 11 14:08:11 2022 +0000 @@ -1,59 +1,94 @@ # originally by Devon Ryan, https://www.biostars.org/p/84467/ -options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) +options(show.error.messages = F, + error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, F) + }) # we need that to not crash galaxy with an UTF8 error on German LC settings. loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") suppressPackageStartupMessages({ - library("GenomicRanges") - library("rtracklayer") - library("Rsamtools") - library("optparse") - library("data.table") + library("GenomicRanges") + library("rtracklayer") + library("Rsamtools") + library("optparse") + library("data.table") }) option_list <- list( - make_option(c("-g","--gtf"), type="character", help="Input GTF file with gene / exon information."), - make_option(c("-f","--fasta"), type="character", default=FALSE, help="FASTA file that corresponds to the supplied GTF."), - make_option(c("-l","--length"), type="character", default=FALSE, help="Output file with Gene ID and length."), - make_option(c("-gc","--gc_content"), type="character", default=FALSE, help="Output file with Gene ID and GC content.") - ) + make_option(c("-g", "--gtf"), type = "character", + help = "Input gtf file with gene / exon information."), + make_option(c("-f", "--fasta"), type = "character", default = NULL, + help = "fasta file that corresponds to the supplied gtf."), + make_option(c("-l", "--length"), type = "character", default = NULL, + help = "Output file with Gene ID and length."), + make_option(c("-c", "--gc_content"), type = "character", default = NULL, + help = "Output file with Gene ID and GC content.") +) -parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) -args = parse_args(parser) +parser <- OptionParser(usage = "%prog [options] file", + option_list = option_list) +args <- parse_args(parser) -GTFfile = args$gtf -FASTAfile = args$fasta -length = args$length -gc_content = args$gc_content +gtf_file <- args$gtf +fasta_file <- args$fasta +length <- args$length +gc_content <- args$gc_content + +# Check args: +if (is.null(fasta_file) & !is.null(gc_content)) { + stop("gc_content output requires fasta input") +} +if (is.null(length) & is.null(gc_content)) { + stop("neither gc_content nor length was set nothing to do.") +} #Load the annotation and reduce it -GTF <- import.gff(GTFfile, format="gtf", genome=NA, feature.type="exon") -grl <- reduce(split(GTF, elementMetadata(GTF)$gene_id)) -reducedGTF <- unlist(grl, use.names=T) -elementMetadata(reducedGTF)$gene_id <- rep(names(grl), elementNROWS(grl)) +gtf <- import.gff(gtf_file, format = "gtf", genome = NA, feature.type = "exon") +grl <- reduce(split(gtf, elementMetadata(gtf)$gene_id)) +reduced_gtf <- unlist(grl, use.names = T) +elementMetadata(reduced_gtf)$gene_id <- rep(names(grl), elementNROWS(grl)) -#Open the fasta file -FASTA <- FaFile(FASTAfile) -open(FASTA) +if (! is.null(gc_content)) { + #Open the fasta file + fasta <- FaFile(fasta_file) + open(fasta) -#Add the GC numbers -elementMetadata(reducedGTF)$nGCs <- letterFrequency(getSeq(FASTA, reducedGTF), "GC")[,1] -elementMetadata(reducedGTF)$widths <- width(reducedGTF) + #Add the GC numbers + elementMetadata(reduced_gtf)$n_gcs <- + letterFrequency(getSeq(fasta, reduced_gtf), "GC")[, 1] +} +elementMetadata(reduced_gtf)$widths <- width(reduced_gtf) #Create a list of the ensembl_id/GC/length -calc_GC_length <- function(x) { - nGCs = sum(elementMetadata(x)$nGCs) - width = sum(elementMetadata(x)$widths) - c(width, nGCs/width) +if (! is.null(gc_content)) { + calc_gc_length <- function(x) { + n_gcs <- sum(elementMetadata(x)$n_gcs) + width <- sum(elementMetadata(x)$widths) + c(width, n_gcs / width) + } + output <- t(sapply(split(reduced_gtf, elementMetadata(reduced_gtf)$gene_id), + calc_gc_length)) + output <- data.frame(setDT(data.frame(output), keep.rownames = TRUE)[]) + write.table(output[, c(1, 3)], file = gc_content, + col.names = FALSE, row.names = FALSE, + quote = FALSE, sep = "\t") +} else { + all_widths <- sapply(split(reduced_gtf, elementMetadata(reduced_gtf)$gene_id), + function(x) { + sum(elementMetadata(x)$widths) + }) + output <- data.frame(gene_id = names(all_widths), + length = all_widths) } -output <- t(sapply(split(reducedGTF, elementMetadata(reducedGTF)$gene_id), calc_GC_length)) -output <- data.frame(setDT(data.frame(output), keep.rownames = TRUE)[]) - -write.table(output[,c(1,2)], file=length, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t") -write.table(output[,c(1,3)], file=gc_content, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t") +if (! is.null(length)) { + write.table(output[, c(1, 2)], file = length, + col.names = FALSE, row.names = FALSE, + quote = FALSE, sep = "\t") +} sessionInfo() diff -r f088370d2a3c -r e3ba567abdf5 get_length_and_gc_content.xml --- a/get_length_and_gc_content.xml Sun Jan 28 04:04:58 2018 -0500 +++ b/get_length_and_gc_content.xml Fri Mar 11 14:08:11 2022 +0000 @@ -1,11 +1,9 @@ - + from GTF and FASTA file - - r-optparse - r-reshape2 - r-data.table - bioconductor-rtracklayer - + + macros.xml + + /dev/null | grep -v -i "WARNING: ")", reshape2 version" $(R --vanilla --slave -e "library(reshape2); cat(sessionInfo()\$otherPkgs\$reshape2\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rtracklayer version" $(R --vanilla --slave -e "library(rtracklayer); cat(sessionInfo()\$otherPkgs\$rtracklayer\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", data.table version" $(R --vanilla --slave -e "library(data.table); cat(sessionInfo()\$otherPkgs\$data.table\$Version)" 2> /dev/null | grep -v -i "WARNING: ") + echo $(R --version | grep version | grep -v GNU)", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rtracklayer version" $(R --vanilla --slave -e "library(rtracklayer); cat(sessionInfo()\$otherPkgs\$rtracklayer\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", data.table version" $(R --vanilla --slave -e "library(data.table); cat(sessionInfo()\$otherPkgs\$data.table\$Version)" 2> /dev/null | grep -v -i "WARNING: ") ]]> - - - - + + + + + - - - - - - - + + - - + + + - - - - - - length_out is True + analysis['analysis_select'] != "gc" - gc_out is True + analysis['analysis_select'] != "length" @@ -119,6 +109,8 @@ + @@ -138,15 +130,14 @@ - - + - + @@ -156,14 +147,15 @@ .. class:: infomark -This tool calculates the length and GC content for the genes in a GTF file. It requires a FASTA file that is the same genome version as the GTF. +This tool calculates the length and/or GC content for the genes in a GTF file. +For the GC content, it requires a FASTA file that is the same genome version as the GTF. ----- **Inputs** - a GTF file -- a FASTA file +- a FASTA file (if GC content is requested) ----- diff -r f088370d2a3c -r e3ba567abdf5 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Mar 11 14:08:11 2022 +0000 @@ -0,0 +1,28 @@ + + + + r-optparse + r-data.table + bioconductor-rtracklayer + + + + + + + + + + + + + + + + + + + + + + diff -r f088370d2a3c -r e3ba567abdf5 test-data/cached_locally/ref.fasta --- a/test-data/cached_locally/ref.fasta Sun Jan 28 04:04:58 2018 -0500 +++ b/test-data/cached_locally/ref.fasta Fri Mar 11 14:08:11 2022 +0000 @@ -1,2 +1,173 @@ ->1 -AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGAAAAAAAAAAAAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGAAAAAAAAAATTTTT \ No newline at end of file +>fake_chr2 hg38_dna range=chr2:176116522-176125113 5'pad=0 3'pad=0 strand=+ repeatMasking=none +TGGGGCGGGCTGGCCGAGCGAGCCCTGGAGAGGCGGACAGGAGGGCGGCG +GAGAGCGCTGGGCCGGTTGTCTCCAGCGCGCACTATCGCGGGCGCGTAGT +AGATGTCGCTGTTGTCCGTGCTTACCCGGCCGGCCGGCCAGGCTCTGGAG +CACGTGACCCGAGAGGAGGCTGCGGCTCAAGGCCATTTTCAAATCTCATT +GGCTTGGTTGTCATGTGGTCGGCAGAGGCATCCACAATTACACGGGGAAT +GTTTTCCTAGAGATGTCAGCCTACAAAGGACACAATCTCTCTTCTTCAAA +TTCTTCCCCAAAATGTCCTTTCCCAACAGCTCTCCTGCTGCTAATACTTT +TTTAGTAGATTCCTTGATCAGTGCCTGCAGGAGTGACAGTTTTTATTCCA +GCAGCGCCAGCATGTACATGCCACCACCTAGCGCAGACATGGGGACCTAT +GGAATGCAAACCTGTGGACTGCTCCCGTCTCTGGCCAAAAGAGAAGTGAA +CCACCAAAATATGGGTATGAATGTGCATCCTTATATACCTCAAGTAGACA +GTTGGACAGATCCGAACAGATCTTGTCGAATAGAGCAACCTGTTACACAG +CAAGTCCCCACTTGCTCCTTCACCACCAACATTAAGGAAGAATCCAATTG +CTGCATGTATTCTGATAAGCGCAACAAACTCATTTCGGCCGAGGTCCCTT +CGTACCAGAGGCTGGTCCCTGAGTCTTGTCCCGTTGAGAACCCTGAGGTT +CCCGTCCCTGGATATTTTAGACTGAGTCAGACCTACGCCACCGGGAAAAC +CCAAGAGTACAATAATAGCCCCGAAGGCAGCTCCACTGTCATGCTCCAGC +TCAACCCTCGTGGCGCGGCCAAGCCGCAGCTCTCCGCTGCCCAGCTGCAG +ATGGAAAAGAAGATGAACGAGCCCGTGAGCGGCCAGGAGCCCACCAAAGT +CTCCCAGGTGGAGAGCCCCGAGGCCAAAGGCGGCCTTCCCGAAGAGAGGA +GCTGCCTGGCTGAGGTCTCCGTGTCCAGTCCCGAAGTGCAGGAGAAGGAA +AGCAAAGGTCGGTATGAGCAGAGTTGCCACCCCAGCGGGGCGCGCAGCCC +GGGAACCCGGCAGAGAGGGAGTGCCGGGGTGCCCAGCGCCGAGCCGGAGC +CCGACTTGGCAGGTGCTGCTCCGCCTGGTTTTAGAGGGGTGATCTCAGCC +CTGAGATAGTCCCCGCTTCTCCCCTGCTGCCCTGGCCCTCTCCGCCAGTC +CTGGCCCCACGCTGATGGCGCCCGGGCAGAGGAAAAGCTTGCCGGTTTTA +TTTTTCCTGAGCTAGACCTGAACACAACAAAAGAGCGCAAAGGAGACCTG +CGGCTCATAAACACGACCACAGAGCCTCTTTTCTCCTGCTCAGATTTGCA +GTTCCAGTTTTGCCTTGAGCCCAATGATCATGTTAAGGTGATCCAGGGCA +CCGTGTTCGTGTTCAAGTGTATGCACCCCGCATCCTGCGAGCTTGGGGGT +GGTGAGGGGAAAGAGATGGCTGGGCTGGTTGGTGCTTGAGTTGGGAAACA +GGGCTTACTGCCTTTGCTGGGCTAGGTAACCTTGGCTTTGTTTAGGAAAA +GTGCTGCAGTCTTTGCAATCCGTCGGCAAAGAGGGCAAAGGCGGAGGGGG +AGAGTGGAACCCGCATTGCCCTCCCTGCAAGGCCAGCCTTAGGGCTGGGC +TAAGGCAAAGAGCCAGGGATCTGGCTTTTTGAGAAGGAACCCTCCTCCTC +TCCCCCAGTGCTTAGAGGTGGGCCACAGTAGGGGGCTCCCTTTCTGGGGG +AATGCTTTAGTGTGGGGGCAAGAAGACATGAAAATTAAGGAAATTCTGGG +GAATGCAACAATACCCAGGCAAGGTGGGGGAAGGTGTCTCGCTTCCCCAT +TTATCTTTTGAAAGAGAATGGGCACCTATAAACCTGACTGTCAGGATTCC +TGACTGCCTAGGAGAGGTGGGGAAGAAGTGGCAGATTTGGGGACCTGAGG +CAGCAGTGGGGTTGGTAGGCTTGTCCAGGTCGTGGCGTATTCCCCTCCGT +CCCTGTTAGGAGCTGAACCCTTAGAATGTTGCTGGGGAGATCTGGAAAGT +TTACTATTCTACTAATGTTTTGTACAAGTGAGAAAGTTGAAAGAGAGAGC +GAGAACCCAAATGCAGACTGTCCTGCCATCATGTCATTTAAGTAATGTGG +CATCAATGTAAGATTCCCTTCCAAGGCCCACTTCATGTGAGTAATGTTTA +ATACTAGCATTTTCCAAAGCGGCCTGGCTGCCAGCAGGGTCACGGCCAAG +GGTACATTTGAACAGTCTGAAGAAAAAAACAAAAACGAAAACCAAAACCA +AAACCAAAACAAAAACAAAAACAAAAACAAACAAACAAAAAACCTCTTGA +TTTTTTTCTTCTTCTCCCTTTAATTTTGTTAGAGGAAATCAAGTCTGATA +CACCAACCAGCAATTGGCTCACTGCAAAGAGTGGCAGAAAGAAGAGGTGC +CCTTACACTAAGCACCAAACGCTGGAATTAGAAAAAGAGTTCTTGTTCAA +TATGTACCTCACCCGCGAGCGCCGCCTAGAGATCAGTAAGAGCGTTAACC +TCACCGACAGGCAGGTCAAGATTTGGTTTCAAAACCGCCGAATGAAACTC +AAGAAGATGAGCCGAGAGAACCGGATCCGAGAACTGACCGCCAACCTCAC +GTTTTCTTAGGTCTGAGGCCGGTCTGAGGCCGGTCAGAGGCCAGGATTGG +AGAGGGGGCACCGCGTTCCAGGGCCCAGTGCTGGAGGACTGGGAAAGCGG +AAACAAAACCTTCACCGCTCTTTGTTTGTTGTTTTGTTGTATTTTGTTTT +CCTGCTAGAATGTGACTTTGGGGTCATTATGTTCGTGCTGCAAGTGATCT +GTAATCCCTATGAGTATATATATATATATATATATATATATATAAAAACT +TAGCACGTGTAATTTATTATTTTTTCATCGTAATGCAGGGTAACTATTAT +TGCGCATTTTCATTTGGGTCTTAACTTATTGGAACTGTAGAGCATCCATC +CATCCATCCATCCAGCAATGTGACTTTTTCATGTCTTTCCTAACACAAAA +GGTCTATGTGTGTGGTTAGTCCATGAACTCATGGCATTTTGAATACATCC +AGTACTTTAAAAATGACATATATATTTAAAAAAAAAAGATTAAGAAAACC +CACAAGTTGGAGGGAGGGGGACTTAAAAAGCACATTACAATGTATCTTTT +CACAAATGAATTTAGCAGTTGTCCTTGGTGAGATGGGATATTGGCGATTT +ATGCCTTGTAGCCTTTCCCTTGTGGTGCATCTGTGGTTTGGTAGAAGTAC +AACAGCAACCTGTCCTTTCTGTGCATGTTCTGGTCGCATGTATAATGCAA +TAAACTCTGGAAATGAGTTCACTCCCTCTGCTTTCTGAAATGGAAATATG +TTATGGTGGAAATGAAAGCCTATGGTGAGATTATCTTCTGGTTACACTCC +CTGTTTGGGGCATTTGGGCAGGGGAGTGATAGACTAGTAGGGGAAGGGAG +ATGGGGGAGAAAAGCTGGAGGAGGCCTAGGGTGTTGGATTTTGGCAGTGG +TTGGGGGAGAGGAATTATAAGCTAGCTTGAGAGTGAAGTTTTCATAATTG +GGAGGAAGGGGAGTCTCCTCTTTCCTTTCCCAGTCCCCAGTGATAGTAAC +ATAATTGCGCTCTCAATGGGTGTGAGCTTTCCTCTGGCCTGAACCTGGTA +AGTAAGCCTATACCCCAAGCCACTTTCTCCTCAAAGCTTCCCATTTGTGT +GTTTTCTCCTCTTTGGTTTTGGTTGTGTTGTTTTTAATGCTTTCAGTGGC +ATCTTGGTGATTTCTGGCTGGCGAGCAATCATCAGGGGCTAGGTTGAAGC +TAGTCTTGCCCACCTGGAAGTTGCCGGCCTCCATTACAGGAGCAAGGACA +AACAGCAGTGTAGCACTGCAGCGGATCCAATTCTGCCCCCTTTCCCTCAG +CCCTACCCCCATCCCAAGCGCAAGACAGCCAGACCCCAGAGAAGCCGAGG +ATGGGTGAGTTTTCCCATCCCACTTCGCCTTGATCTCCTTGTGGACGGGT +TTTATGCTCAGTCATTACCTTTTAGTGGCCCACATGAAATTTTGTTAAAG +GAAGAAATGAAAAGATTTTCCCCAGTCAGTCTTTCCTCTATTTAATTACA +AAATGCTGGTGGGAACTGCTGCATCTGGGATGCAAGAAAATGCAGAAAGG +GTGACTGAAAATTTTGCAAATGAACATGACTTCCCATGAAGTCTAATGTT +CCATTCGCTGCCATGGTCCAGGGGACTCCCACCAGCTTCCACCGGCTTCA +ACAGGATCTCCACTAGAGAGCCCAGACTTATCTAGTCCTGTCGGGGAAAA +GGGAGAAGAGGCCTTGCAGGAGAAAGCTAACAGAAAATTCGTTACCTGAG +GTCCTGCCTGCAGTTTCAAATAGCTTCCAGCAGTTTTACAAAACACATCC +TTTCCATTTCTTCCTTTTAAATGTTTCCCTAAGAACGATCCATTTAGGTG +CTATAAGTCCTCAGCCAGGGAGTCTCTGGGACACTGGCATTCAAAATTTT +AAACTTCCGCCCCAAAACCAGGAACATTCCAAGACAGAACTCTTTTAGGG +GGCCATTTCCTGGGGGTGGGGGAGAGGGCTTGGAATCAATGCTAGATTGA +AAACGTTGTAATAGCTTTGCCCCAGACTTAACACCGGTTGGGCAGGAGGA +GGGTAATTTTTATTTAGCCGTTTCTCCGATCATGTGGGGAATACCATTAG +CTGTTGATAGCGGGCCATGTATCCGAGGAAAGCCTGAGCTACAAGGCAAA +GGCATCCCATCTGGAACAAAATCAGAAAGCTATTGGCAAAGGTAATCAAT +CAGGCCATAAATAGCCATTTACCCGCTTCCTTTTCGGGGCTGGAGGTGGG +CCGGGAGCCCTCCAAGGGTGAGCTGGGCAACTTGTAGAGCAAGGAATATG +CCCTCCGCTGCCGGCGCCCCGGCCGCTTTTGTCTGGGCTCCCAGCCGGGC +TTCCGAGGCTTTGTACCATGGATTTGGGAGTGACAATGGGCATTTCCCTC +AGATTCAAGGCTGCTCAACCTCACCTCTGTAGGGGGAAAAAAATCAGAAG +GGAGTGTCCCAAGGACCTAGCCATTCGGCCGAATTTTTTAGACATTTTGG +GAGTCTCCTCCGAGGCCTTTAAGTGCGAACCGCGCGAAGCGGCCCTGCCC +GGGGAGACTCGCTGAGGCAGGGCTGAGGCGGCGGGCGGGAGCAAGCTGCT +CTAGCATTTGGGTTCTGCCCTGTGGCGTGTTCTCTTCCAGGGCCTTTCCA +GCATCATCGGAGAAGACGAAGCACCCTGGCCGCCACTGTCCGTGCTGCGC +CAACTCGCCCGGCCGCCCGCCCTTCCGAGGGCAGGCAGAAGCCCCTCTGT +GTCCTCCACCGCCGCGCCCCGGCTCGCCCCTCGGGCCGCGGCGTGTGCCC +AGCCTCACGTCGGGGTGTGTGTGGCCGCGCGGGCGTGTGTGAGTGTGGCA +GGGGGAGGGGGCCCTCCGATCTGCTCCATCCGTCCGTTTTATTAGGGACA +CATTAATCTATAATCAAATACACCTCATAAAATTTTTATTGAAAGGCATA +ATATCATTACAGAGGTCTTCCACCTGTTTTAAACAACACGACAAGCTGTG +AGCAAGCGTGTGTGTGGGGATGTGTGGGGAGGGGTGGGTGTGAGTAGGGA +GAGAGGCGAGGGGAGAACAGCTCCCCTCGGGCGCTAGGGGCCGCCCCGAG +GGCCCGCCTGCCTCGGGCGACACCGGCCTGGCGCCCCCGCGGCCGCTCCG +TGTGCCCTGGACTCGCCGCCCGCGGCTCGGAAGCTGGAGAGTCAGCGACG +GGGCCCGACTGCGGGACCGAGGGCTGCAAGAAGAAGCGAACAAATAGTCC +CCAGCGCCTCCTCTGGATGCGGTCGCGTCTGTGGTCCTGGCAGCCGCTGG +GCGGGCCAGGCCAGGTCGGGCCGGGCCGAGCCGGGCACATGGACCTGGGC +CTGCGGGCTCTAATTGCGGCGCTTATGTTGATGATTTTTTTTTTAATCAC +AGCAGCCCCCAGTTTAGCGGACTGATTTACTCCCGGTATTGGTAAATATG +ATCACGTGGGCCGCGCGACCAATGGTGGAGGCTGCAGCCTGCGAACTAGT +CGGTGGCTCGGGCGCCGGCGGGGAGCTGCTCGGCGGCGGACAGTGTAATG +TTGGGTGGGAGTGCGGGACGCCTCAAAATGTCTTCCAGTGGCACCCTCAG +CAACTACTACGTGGACTCGCTTATAGGCCATGAGGGCGACGAGGTGTTCG +CGGCGCGCTTCGGGCCGCCGGGGCCAGGCGCGCAGGGCCGGCCTGCAGGT +GTGGCTGATGGCCCGGCCGCCACCGCCGCCGAGTTCGCCTCGTGTAGTTT +TGCCCCCAGATCGGCCGTGTTCTCTGCCTCGTGGTCCGCGGTGCCCTCCC +AGCCCCCGGCAGCGGCGGCGATGAGCGGCCTCTACCACCCGTACGTTCCC +CCGCCGCCCCTGGCCGCCTCTGCCTCCGAGCCCGGCCGCTACGTGCGCTC +CTGGATGGAGCCGCTGCCCGGCTTCCCGGGCGGTGCGGGCGGTGGCGGTG +GTGGTGGAGGCGGCGGTCCGGGCCGCGGTCCCAGCCCTGGCCCCAGCGGC +CCAGCCAACGGGCGCCACTACGGGATTAAGCCTGAAACCCGAGCGGCCCC +GGCCCCCGCCACGGCCGCCTCCACCACCTCCTCCTCCTCCACTTCCTTAT +CCTCCTCCTCCAAACGGACTGAGTGCTCCGTGGCCCGGGAGTCCCAGGGG +AGCAGCGGCCCCGAGTTCTCGTGCAACTCGTTCCTGCAGGAGAAGGCGGC +AGCGGCGACGGGGGGAACCGGGCCTGGGGCAGGGATCGGGGCCGCGACTG +GGACGGGCGGCTCGTCGGAGCCCTCAGCTTGCAGCGACCACCCGATCCCA +GGCTGTTCGCTGAAGGAGGAGGAGAAGCAGCATTCGCAGCCGCAGCAGCA +GCAACTTGACCCAAGTAAGTGCAAAAGAAATTGCCCCCTGATTTATTGCT +GAAACCTGTAAGGCTCGAATGTGCAAAACTGATAGTTTTACTAACCTATA +AAAACGTCTAGACGCCTACCCAAGCCTAGGCGAACAACATGCATCCATAA +AAAGAGCTTCCCATAACCACCTACCCTGGGCGCTCAGTTAGTACGGTAAA +CAGAGCGCGAGCATTAAGGCTTTTTATGATAATTCCCCACAAGTTGTGAA +AAGCGACCATCCTTGGTGAAATTAATTTAACGACCTCTCTTCCCCACCCT +GTGGTCTCTCCCTGCCTCCCCTCCTCTCCTCTCTCCCCGTCTCCAAACCT +CCCTCTTTGTAGACAACCCCGCCGCGAACTGGATCCACGCTCGCTCCACC +CGGAAAAAGCGCTGTCCCTACACCAAATACCAGACGCTTGAGCTGGAGAA +AGAATTCCTCTTCAACATGTACCTCACCCGGGACCGGCGCTACGAGGTGG +CCAGGATTCTCAACCTAACAGAGAGACAGGTCAAAATCTGGTTTCAGAAC +CGTAGGATGAAAATGAAAAAGATGAGCAAGGAGAAATGCCCCAAAGGAGA +CTGACCCGGCGCGGTGCTGGCGGGAGCGCTCAAGGGCAGCGGATTTGTTG +TTGTTGCTGTTTTCCTTTGTGGGTGTTTGGTGCTTGATTTCCAGAAACTC +TCCAGCGACTTGGACTTCTTCTTCTTTTTTTTTTTCTTTTTAGATAGAAG +TGACTGTGTGGTTGGTCTCTGAGGTATTTGGGGGACTCTGTATTTGCTCG +TTTACGTGTTGGAAAAACCAAGTGGCTTTGGGGTTTCGCCCTATCCCACT +CCCTCTCTTTCCTGCTCCATTGGTTCCTTAAGAAATGCTATATTTTGTGA +GTGCAAGCTGGCTTGGGGAGCCCTCTCTTGTGTAAATGTCCCCCATGTTT +CTGAAAAGTGCTGTAGTTTAGTCCCCTCACCCCCAGCACTGCCCAAACAG +GGGCCAAGTGCGCCCCAATTCCAAGAATGAAGGCAGAGCGACAACAGTGC +GGACACCCCGGCTGCTAGCCCACGGTGAAGCCCGGCGGGGTTGCCCACCA +GTTGCGAAAGCCCCCTTTCCTCAGGGAGCACGCGGGACCTCGGTGGAGAT +CTCCAGTGAGGCTTAGAGGAGCCCAGGGCCTCGGGCGGGTTGGGGTTTGT +CCTCAGTGCATTGGACGCGCTGCTCTCTCCCCTGAAGGCTGGGCTCGCGT +GGGCGGCCGCGGGTGGTGGCCCTCCCGGTTCCTGCCCGAGGACCAGTTGT +AAATGTTACTGCTTCCTACTAATAAATGCTGACCTGATCAAATGGAGCCC +AGACGCTGGCCCTAAACATTGTGTGCCTGCTTTCTCTGCCTCTCTGCAAA +ATATCACACTCAGGATATTTCTCCTCTACCCCTGGGAGTGAGACATTGTT +AAAAATTCAGGGCCCTTCCACCTGACAGATCTCTCTGATGTGTCTCTGCC +TTCTCTGCCTCACATCCCTTTGTGTAGGCAGATGCAGCAGCA diff -r f088370d2a3c -r e3ba567abdf5 test-data/cached_locally/ref.gtf --- a/test-data/cached_locally/ref.gtf Sun Jan 28 04:04:58 2018 -0500 +++ b/test-data/cached_locally/ref.gtf Fri Mar 11 14:08:11 2022 +0000 @@ -1,6 +1,20 @@ -1 ensembl_havana gene 1 103 . + . gene_id "ENSG00000162526"; gene_version "4"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; -1 ensembl_havana transcript 1 103 . + . gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA"; -1 ensembl_havana exon 1 103 . + . gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; exon_id "ENSE00002319515"; exon_version "1"; tag "basic"; transcript_support_level "NA"; -1 ensembl_havana CDS 1 100 . + 0 gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; protein_id "ENSP00000334393"; protein_version "3"; tag "basic"; transcript_support_level "NA"; -1 ensembl_havana start_codon 1 3 . + 0 gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA"; -1 ensembl_havana stop_codon 101 103 . + 0 gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA"; +fake_chr2 HAVANA gene 257 3416 . + . gene_id "ENSG00000128710.6"; gene_type "protein_coding"; gene_name "HOXD10"; level 1; hgnc_id "HGNC:5133"; tag "overlapping_locus"; havana_gene "OTTHUMG00000132511.5"; +fake_chr2 HAVANA transcript 257 3416 . + . gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3"; +fake_chr2 HAVANA exon 257 1057 . + . gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 1; exon_id "ENSE00000882912.6"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3"; +fake_chr2 HAVANA CDS 313 1057 . + 0 gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 1; exon_id "ENSE00000882912.6"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3"; +fake_chr2 HAVANA start_codon 313 315 . + 0 gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 1; exon_id "ENSE00000882912.6"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3"; +fake_chr2 HAVANA exon 2433 3416 . + . gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 2; exon_id "ENSE00001816717.3"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3"; +fake_chr2 HAVANA CDS 2433 2707 . + 2 gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 2; exon_id "ENSE00001816717.3"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3"; +fake_chr2 HAVANA stop_codon 2708 2710 . + 0 gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 2; exon_id "ENSE00001816717.3"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3"; +fake_chr2 HAVANA UTR 257 312 . + . gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 1; exon_id "ENSE00000882912.6"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3"; +fake_chr2 HAVANA UTR 2708 3416 . + . gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 2; exon_id "ENSE00001816717.3"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3"; +fake_chr2 HAVANA gene 6198 8416 . + . gene_id "ENSG00000128709.13"; gene_type "protein_coding"; gene_name "HOXD9"; level 2; hgnc_id "HGNC:5140"; havana_gene "OTTHUMG00000132516.6"; +fake_chr2 HAVANA transcript 6198 8416 . + . gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6"; +fake_chr2 HAVANA exon 6198 7064 . + . gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 1; exon_id "ENSE00000882914.5"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6"; +fake_chr2 HAVANA CDS 6248 7064 . + 0 gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 1; exon_id "ENSE00000882914.5"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6"; +fake_chr2 HAVANA start_codon 6248 6250 . + 0 gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 1; exon_id "ENSE00000882914.5"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6"; +fake_chr2 HAVANA exon 7413 8416 . + . gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6"; +fake_chr2 HAVANA CDS 7413 7651 . + 2 gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6"; +fake_chr2 HAVANA stop_codon 7652 7654 . + 0 gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6"; +fake_chr2 HAVANA UTR 6198 6247 . + . gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 1; exon_id "ENSE00000882914.5"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6"; +fake_chr2 HAVANA UTR 7652 8416 . + . gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6"; diff -r f088370d2a3c -r e3ba567abdf5 test-data/gc.tab --- a/test-data/gc.tab Sun Jan 28 04:04:58 2018 -0500 +++ b/test-data/gc.tab Fri Mar 11 14:08:11 2022 +0000 @@ -1,1 +1,2 @@ -ENSG00000162526 0.388349514563107 +ENSG00000128709.13 0.626402993051844 +ENSG00000128710.6 0.467226890756303 diff -r f088370d2a3c -r e3ba567abdf5 test-data/in.fasta --- a/test-data/in.fasta Sun Jan 28 04:04:58 2018 -0500 +++ b/test-data/in.fasta Fri Mar 11 14:08:11 2022 +0000 @@ -1,2 +1,173 @@ ->1 -AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGAAAAAAAAAAAAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGAAAAAAAAAATTTTT \ No newline at end of file +>fake_chr2 hg38_dna range=chr2:176116522-176125113 5'pad=0 3'pad=0 strand=+ repeatMasking=none +TGGGGCGGGCTGGCCGAGCGAGCCCTGGAGAGGCGGACAGGAGGGCGGCG +GAGAGCGCTGGGCCGGTTGTCTCCAGCGCGCACTATCGCGGGCGCGTAGT +AGATGTCGCTGTTGTCCGTGCTTACCCGGCCGGCCGGCCAGGCTCTGGAG +CACGTGACCCGAGAGGAGGCTGCGGCTCAAGGCCATTTTCAAATCTCATT +GGCTTGGTTGTCATGTGGTCGGCAGAGGCATCCACAATTACACGGGGAAT +GTTTTCCTAGAGATGTCAGCCTACAAAGGACACAATCTCTCTTCTTCAAA +TTCTTCCCCAAAATGTCCTTTCCCAACAGCTCTCCTGCTGCTAATACTTT +TTTAGTAGATTCCTTGATCAGTGCCTGCAGGAGTGACAGTTTTTATTCCA +GCAGCGCCAGCATGTACATGCCACCACCTAGCGCAGACATGGGGACCTAT +GGAATGCAAACCTGTGGACTGCTCCCGTCTCTGGCCAAAAGAGAAGTGAA +CCACCAAAATATGGGTATGAATGTGCATCCTTATATACCTCAAGTAGACA +GTTGGACAGATCCGAACAGATCTTGTCGAATAGAGCAACCTGTTACACAG +CAAGTCCCCACTTGCTCCTTCACCACCAACATTAAGGAAGAATCCAATTG +CTGCATGTATTCTGATAAGCGCAACAAACTCATTTCGGCCGAGGTCCCTT +CGTACCAGAGGCTGGTCCCTGAGTCTTGTCCCGTTGAGAACCCTGAGGTT +CCCGTCCCTGGATATTTTAGACTGAGTCAGACCTACGCCACCGGGAAAAC +CCAAGAGTACAATAATAGCCCCGAAGGCAGCTCCACTGTCATGCTCCAGC +TCAACCCTCGTGGCGCGGCCAAGCCGCAGCTCTCCGCTGCCCAGCTGCAG +ATGGAAAAGAAGATGAACGAGCCCGTGAGCGGCCAGGAGCCCACCAAAGT +CTCCCAGGTGGAGAGCCCCGAGGCCAAAGGCGGCCTTCCCGAAGAGAGGA +GCTGCCTGGCTGAGGTCTCCGTGTCCAGTCCCGAAGTGCAGGAGAAGGAA +AGCAAAGGTCGGTATGAGCAGAGTTGCCACCCCAGCGGGGCGCGCAGCCC +GGGAACCCGGCAGAGAGGGAGTGCCGGGGTGCCCAGCGCCGAGCCGGAGC +CCGACTTGGCAGGTGCTGCTCCGCCTGGTTTTAGAGGGGTGATCTCAGCC +CTGAGATAGTCCCCGCTTCTCCCCTGCTGCCCTGGCCCTCTCCGCCAGTC +CTGGCCCCACGCTGATGGCGCCCGGGCAGAGGAAAAGCTTGCCGGTTTTA +TTTTTCCTGAGCTAGACCTGAACACAACAAAAGAGCGCAAAGGAGACCTG +CGGCTCATAAACACGACCACAGAGCCTCTTTTCTCCTGCTCAGATTTGCA +GTTCCAGTTTTGCCTTGAGCCCAATGATCATGTTAAGGTGATCCAGGGCA +CCGTGTTCGTGTTCAAGTGTATGCACCCCGCATCCTGCGAGCTTGGGGGT +GGTGAGGGGAAAGAGATGGCTGGGCTGGTTGGTGCTTGAGTTGGGAAACA +GGGCTTACTGCCTTTGCTGGGCTAGGTAACCTTGGCTTTGTTTAGGAAAA +GTGCTGCAGTCTTTGCAATCCGTCGGCAAAGAGGGCAAAGGCGGAGGGGG +AGAGTGGAACCCGCATTGCCCTCCCTGCAAGGCCAGCCTTAGGGCTGGGC +TAAGGCAAAGAGCCAGGGATCTGGCTTTTTGAGAAGGAACCCTCCTCCTC +TCCCCCAGTGCTTAGAGGTGGGCCACAGTAGGGGGCTCCCTTTCTGGGGG +AATGCTTTAGTGTGGGGGCAAGAAGACATGAAAATTAAGGAAATTCTGGG +GAATGCAACAATACCCAGGCAAGGTGGGGGAAGGTGTCTCGCTTCCCCAT +TTATCTTTTGAAAGAGAATGGGCACCTATAAACCTGACTGTCAGGATTCC +TGACTGCCTAGGAGAGGTGGGGAAGAAGTGGCAGATTTGGGGACCTGAGG +CAGCAGTGGGGTTGGTAGGCTTGTCCAGGTCGTGGCGTATTCCCCTCCGT +CCCTGTTAGGAGCTGAACCCTTAGAATGTTGCTGGGGAGATCTGGAAAGT +TTACTATTCTACTAATGTTTTGTACAAGTGAGAAAGTTGAAAGAGAGAGC +GAGAACCCAAATGCAGACTGTCCTGCCATCATGTCATTTAAGTAATGTGG +CATCAATGTAAGATTCCCTTCCAAGGCCCACTTCATGTGAGTAATGTTTA +ATACTAGCATTTTCCAAAGCGGCCTGGCTGCCAGCAGGGTCACGGCCAAG +GGTACATTTGAACAGTCTGAAGAAAAAAACAAAAACGAAAACCAAAACCA +AAACCAAAACAAAAACAAAAACAAAAACAAACAAACAAAAAACCTCTTGA +TTTTTTTCTTCTTCTCCCTTTAATTTTGTTAGAGGAAATCAAGTCTGATA +CACCAACCAGCAATTGGCTCACTGCAAAGAGTGGCAGAAAGAAGAGGTGC +CCTTACACTAAGCACCAAACGCTGGAATTAGAAAAAGAGTTCTTGTTCAA +TATGTACCTCACCCGCGAGCGCCGCCTAGAGATCAGTAAGAGCGTTAACC +TCACCGACAGGCAGGTCAAGATTTGGTTTCAAAACCGCCGAATGAAACTC +AAGAAGATGAGCCGAGAGAACCGGATCCGAGAACTGACCGCCAACCTCAC +GTTTTCTTAGGTCTGAGGCCGGTCTGAGGCCGGTCAGAGGCCAGGATTGG +AGAGGGGGCACCGCGTTCCAGGGCCCAGTGCTGGAGGACTGGGAAAGCGG +AAACAAAACCTTCACCGCTCTTTGTTTGTTGTTTTGTTGTATTTTGTTTT +CCTGCTAGAATGTGACTTTGGGGTCATTATGTTCGTGCTGCAAGTGATCT +GTAATCCCTATGAGTATATATATATATATATATATATATATATAAAAACT +TAGCACGTGTAATTTATTATTTTTTCATCGTAATGCAGGGTAACTATTAT +TGCGCATTTTCATTTGGGTCTTAACTTATTGGAACTGTAGAGCATCCATC +CATCCATCCATCCAGCAATGTGACTTTTTCATGTCTTTCCTAACACAAAA +GGTCTATGTGTGTGGTTAGTCCATGAACTCATGGCATTTTGAATACATCC +AGTACTTTAAAAATGACATATATATTTAAAAAAAAAAGATTAAGAAAACC +CACAAGTTGGAGGGAGGGGGACTTAAAAAGCACATTACAATGTATCTTTT +CACAAATGAATTTAGCAGTTGTCCTTGGTGAGATGGGATATTGGCGATTT +ATGCCTTGTAGCCTTTCCCTTGTGGTGCATCTGTGGTTTGGTAGAAGTAC +AACAGCAACCTGTCCTTTCTGTGCATGTTCTGGTCGCATGTATAATGCAA +TAAACTCTGGAAATGAGTTCACTCCCTCTGCTTTCTGAAATGGAAATATG +TTATGGTGGAAATGAAAGCCTATGGTGAGATTATCTTCTGGTTACACTCC +CTGTTTGGGGCATTTGGGCAGGGGAGTGATAGACTAGTAGGGGAAGGGAG +ATGGGGGAGAAAAGCTGGAGGAGGCCTAGGGTGTTGGATTTTGGCAGTGG +TTGGGGGAGAGGAATTATAAGCTAGCTTGAGAGTGAAGTTTTCATAATTG +GGAGGAAGGGGAGTCTCCTCTTTCCTTTCCCAGTCCCCAGTGATAGTAAC +ATAATTGCGCTCTCAATGGGTGTGAGCTTTCCTCTGGCCTGAACCTGGTA +AGTAAGCCTATACCCCAAGCCACTTTCTCCTCAAAGCTTCCCATTTGTGT +GTTTTCTCCTCTTTGGTTTTGGTTGTGTTGTTTTTAATGCTTTCAGTGGC +ATCTTGGTGATTTCTGGCTGGCGAGCAATCATCAGGGGCTAGGTTGAAGC +TAGTCTTGCCCACCTGGAAGTTGCCGGCCTCCATTACAGGAGCAAGGACA +AACAGCAGTGTAGCACTGCAGCGGATCCAATTCTGCCCCCTTTCCCTCAG +CCCTACCCCCATCCCAAGCGCAAGACAGCCAGACCCCAGAGAAGCCGAGG +ATGGGTGAGTTTTCCCATCCCACTTCGCCTTGATCTCCTTGTGGACGGGT +TTTATGCTCAGTCATTACCTTTTAGTGGCCCACATGAAATTTTGTTAAAG +GAAGAAATGAAAAGATTTTCCCCAGTCAGTCTTTCCTCTATTTAATTACA +AAATGCTGGTGGGAACTGCTGCATCTGGGATGCAAGAAAATGCAGAAAGG +GTGACTGAAAATTTTGCAAATGAACATGACTTCCCATGAAGTCTAATGTT +CCATTCGCTGCCATGGTCCAGGGGACTCCCACCAGCTTCCACCGGCTTCA +ACAGGATCTCCACTAGAGAGCCCAGACTTATCTAGTCCTGTCGGGGAAAA +GGGAGAAGAGGCCTTGCAGGAGAAAGCTAACAGAAAATTCGTTACCTGAG +GTCCTGCCTGCAGTTTCAAATAGCTTCCAGCAGTTTTACAAAACACATCC +TTTCCATTTCTTCCTTTTAAATGTTTCCCTAAGAACGATCCATTTAGGTG +CTATAAGTCCTCAGCCAGGGAGTCTCTGGGACACTGGCATTCAAAATTTT +AAACTTCCGCCCCAAAACCAGGAACATTCCAAGACAGAACTCTTTTAGGG +GGCCATTTCCTGGGGGTGGGGGAGAGGGCTTGGAATCAATGCTAGATTGA +AAACGTTGTAATAGCTTTGCCCCAGACTTAACACCGGTTGGGCAGGAGGA +GGGTAATTTTTATTTAGCCGTTTCTCCGATCATGTGGGGAATACCATTAG +CTGTTGATAGCGGGCCATGTATCCGAGGAAAGCCTGAGCTACAAGGCAAA +GGCATCCCATCTGGAACAAAATCAGAAAGCTATTGGCAAAGGTAATCAAT +CAGGCCATAAATAGCCATTTACCCGCTTCCTTTTCGGGGCTGGAGGTGGG +CCGGGAGCCCTCCAAGGGTGAGCTGGGCAACTTGTAGAGCAAGGAATATG +CCCTCCGCTGCCGGCGCCCCGGCCGCTTTTGTCTGGGCTCCCAGCCGGGC +TTCCGAGGCTTTGTACCATGGATTTGGGAGTGACAATGGGCATTTCCCTC +AGATTCAAGGCTGCTCAACCTCACCTCTGTAGGGGGAAAAAAATCAGAAG +GGAGTGTCCCAAGGACCTAGCCATTCGGCCGAATTTTTTAGACATTTTGG +GAGTCTCCTCCGAGGCCTTTAAGTGCGAACCGCGCGAAGCGGCCCTGCCC +GGGGAGACTCGCTGAGGCAGGGCTGAGGCGGCGGGCGGGAGCAAGCTGCT +CTAGCATTTGGGTTCTGCCCTGTGGCGTGTTCTCTTCCAGGGCCTTTCCA +GCATCATCGGAGAAGACGAAGCACCCTGGCCGCCACTGTCCGTGCTGCGC +CAACTCGCCCGGCCGCCCGCCCTTCCGAGGGCAGGCAGAAGCCCCTCTGT +GTCCTCCACCGCCGCGCCCCGGCTCGCCCCTCGGGCCGCGGCGTGTGCCC +AGCCTCACGTCGGGGTGTGTGTGGCCGCGCGGGCGTGTGTGAGTGTGGCA +GGGGGAGGGGGCCCTCCGATCTGCTCCATCCGTCCGTTTTATTAGGGACA +CATTAATCTATAATCAAATACACCTCATAAAATTTTTATTGAAAGGCATA +ATATCATTACAGAGGTCTTCCACCTGTTTTAAACAACACGACAAGCTGTG +AGCAAGCGTGTGTGTGGGGATGTGTGGGGAGGGGTGGGTGTGAGTAGGGA +GAGAGGCGAGGGGAGAACAGCTCCCCTCGGGCGCTAGGGGCCGCCCCGAG +GGCCCGCCTGCCTCGGGCGACACCGGCCTGGCGCCCCCGCGGCCGCTCCG +TGTGCCCTGGACTCGCCGCCCGCGGCTCGGAAGCTGGAGAGTCAGCGACG +GGGCCCGACTGCGGGACCGAGGGCTGCAAGAAGAAGCGAACAAATAGTCC +CCAGCGCCTCCTCTGGATGCGGTCGCGTCTGTGGTCCTGGCAGCCGCTGG +GCGGGCCAGGCCAGGTCGGGCCGGGCCGAGCCGGGCACATGGACCTGGGC +CTGCGGGCTCTAATTGCGGCGCTTATGTTGATGATTTTTTTTTTAATCAC +AGCAGCCCCCAGTTTAGCGGACTGATTTACTCCCGGTATTGGTAAATATG +ATCACGTGGGCCGCGCGACCAATGGTGGAGGCTGCAGCCTGCGAACTAGT +CGGTGGCTCGGGCGCCGGCGGGGAGCTGCTCGGCGGCGGACAGTGTAATG +TTGGGTGGGAGTGCGGGACGCCTCAAAATGTCTTCCAGTGGCACCCTCAG +CAACTACTACGTGGACTCGCTTATAGGCCATGAGGGCGACGAGGTGTTCG +CGGCGCGCTTCGGGCCGCCGGGGCCAGGCGCGCAGGGCCGGCCTGCAGGT +GTGGCTGATGGCCCGGCCGCCACCGCCGCCGAGTTCGCCTCGTGTAGTTT +TGCCCCCAGATCGGCCGTGTTCTCTGCCTCGTGGTCCGCGGTGCCCTCCC +AGCCCCCGGCAGCGGCGGCGATGAGCGGCCTCTACCACCCGTACGTTCCC +CCGCCGCCCCTGGCCGCCTCTGCCTCCGAGCCCGGCCGCTACGTGCGCTC +CTGGATGGAGCCGCTGCCCGGCTTCCCGGGCGGTGCGGGCGGTGGCGGTG +GTGGTGGAGGCGGCGGTCCGGGCCGCGGTCCCAGCCCTGGCCCCAGCGGC +CCAGCCAACGGGCGCCACTACGGGATTAAGCCTGAAACCCGAGCGGCCCC +GGCCCCCGCCACGGCCGCCTCCACCACCTCCTCCTCCTCCACTTCCTTAT +CCTCCTCCTCCAAACGGACTGAGTGCTCCGTGGCCCGGGAGTCCCAGGGG +AGCAGCGGCCCCGAGTTCTCGTGCAACTCGTTCCTGCAGGAGAAGGCGGC +AGCGGCGACGGGGGGAACCGGGCCTGGGGCAGGGATCGGGGCCGCGACTG +GGACGGGCGGCTCGTCGGAGCCCTCAGCTTGCAGCGACCACCCGATCCCA +GGCTGTTCGCTGAAGGAGGAGGAGAAGCAGCATTCGCAGCCGCAGCAGCA +GCAACTTGACCCAAGTAAGTGCAAAAGAAATTGCCCCCTGATTTATTGCT +GAAACCTGTAAGGCTCGAATGTGCAAAACTGATAGTTTTACTAACCTATA +AAAACGTCTAGACGCCTACCCAAGCCTAGGCGAACAACATGCATCCATAA +AAAGAGCTTCCCATAACCACCTACCCTGGGCGCTCAGTTAGTACGGTAAA +CAGAGCGCGAGCATTAAGGCTTTTTATGATAATTCCCCACAAGTTGTGAA +AAGCGACCATCCTTGGTGAAATTAATTTAACGACCTCTCTTCCCCACCCT +GTGGTCTCTCCCTGCCTCCCCTCCTCTCCTCTCTCCCCGTCTCCAAACCT +CCCTCTTTGTAGACAACCCCGCCGCGAACTGGATCCACGCTCGCTCCACC +CGGAAAAAGCGCTGTCCCTACACCAAATACCAGACGCTTGAGCTGGAGAA +AGAATTCCTCTTCAACATGTACCTCACCCGGGACCGGCGCTACGAGGTGG +CCAGGATTCTCAACCTAACAGAGAGACAGGTCAAAATCTGGTTTCAGAAC +CGTAGGATGAAAATGAAAAAGATGAGCAAGGAGAAATGCCCCAAAGGAGA +CTGACCCGGCGCGGTGCTGGCGGGAGCGCTCAAGGGCAGCGGATTTGTTG +TTGTTGCTGTTTTCCTTTGTGGGTGTTTGGTGCTTGATTTCCAGAAACTC +TCCAGCGACTTGGACTTCTTCTTCTTTTTTTTTTTCTTTTTAGATAGAAG +TGACTGTGTGGTTGGTCTCTGAGGTATTTGGGGGACTCTGTATTTGCTCG +TTTACGTGTTGGAAAAACCAAGTGGCTTTGGGGTTTCGCCCTATCCCACT +CCCTCTCTTTCCTGCTCCATTGGTTCCTTAAGAAATGCTATATTTTGTGA +GTGCAAGCTGGCTTGGGGAGCCCTCTCTTGTGTAAATGTCCCCCATGTTT +CTGAAAAGTGCTGTAGTTTAGTCCCCTCACCCCCAGCACTGCCCAAACAG +GGGCCAAGTGCGCCCCAATTCCAAGAATGAAGGCAGAGCGACAACAGTGC +GGACACCCCGGCTGCTAGCCCACGGTGAAGCCCGGCGGGGTTGCCCACCA +GTTGCGAAAGCCCCCTTTCCTCAGGGAGCACGCGGGACCTCGGTGGAGAT +CTCCAGTGAGGCTTAGAGGAGCCCAGGGCCTCGGGCGGGTTGGGGTTTGT +CCTCAGTGCATTGGACGCGCTGCTCTCTCCCCTGAAGGCTGGGCTCGCGT +GGGCGGCCGCGGGTGGTGGCCCTCCCGGTTCCTGCCCGAGGACCAGTTGT +AAATGTTACTGCTTCCTACTAATAAATGCTGACCTGATCAAATGGAGCCC +AGACGCTGGCCCTAAACATTGTGTGCCTGCTTTCTCTGCCTCTCTGCAAA +ATATCACACTCAGGATATTTCTCCTCTACCCCTGGGAGTGAGACATTGTT +AAAAATTCAGGGCCCTTCCACCTGACAGATCTCTCTGATGTGTCTCTGCC +TTCTCTGCCTCACATCCCTTTGTGTAGGCAGATGCAGCAGCA diff -r f088370d2a3c -r e3ba567abdf5 test-data/in.gtf --- a/test-data/in.gtf Sun Jan 28 04:04:58 2018 -0500 +++ b/test-data/in.gtf Fri Mar 11 14:08:11 2022 +0000 @@ -1,6 +1,20 @@ -1 ensembl_havana gene 1 103 . + . gene_id "ENSG00000162526"; gene_version "4"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; -1 ensembl_havana transcript 1 103 . + . gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA"; -1 ensembl_havana exon 1 103 . + . gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; exon_id "ENSE00002319515"; exon_version "1"; tag "basic"; transcript_support_level "NA"; -1 ensembl_havana CDS 1 100 . + 0 gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; protein_id "ENSP00000334393"; protein_version "3"; tag "basic"; transcript_support_level "NA"; -1 ensembl_havana start_codon 1 3 . + 0 gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA"; -1 ensembl_havana stop_codon 101 103 . + 0 gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA"; +fake_chr2 HAVANA gene 257 3416 . + . gene_id "ENSG00000128710.6"; gene_type "protein_coding"; gene_name "HOXD10"; level 1; hgnc_id "HGNC:5133"; tag "overlapping_locus"; havana_gene "OTTHUMG00000132511.5"; +fake_chr2 HAVANA transcript 257 3416 . + . gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3"; +fake_chr2 HAVANA exon 257 1057 . + . gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 1; exon_id "ENSE00000882912.6"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3"; +fake_chr2 HAVANA CDS 313 1057 . + 0 gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 1; exon_id "ENSE00000882912.6"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3"; +fake_chr2 HAVANA start_codon 313 315 . + 0 gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 1; exon_id "ENSE00000882912.6"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3"; +fake_chr2 HAVANA exon 2433 3416 . + . gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 2; exon_id "ENSE00001816717.3"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3"; +fake_chr2 HAVANA CDS 2433 2707 . + 2 gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 2; exon_id "ENSE00001816717.3"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3"; +fake_chr2 HAVANA stop_codon 2708 2710 . + 0 gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 2; exon_id "ENSE00001816717.3"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3"; +fake_chr2 HAVANA UTR 257 312 . + . gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 1; exon_id "ENSE00000882912.6"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3"; +fake_chr2 HAVANA UTR 2708 3416 . + . gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 2; exon_id "ENSE00001816717.3"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3"; +fake_chr2 HAVANA gene 6198 8416 . + . gene_id "ENSG00000128709.13"; gene_type "protein_coding"; gene_name "HOXD9"; level 2; hgnc_id "HGNC:5140"; havana_gene "OTTHUMG00000132516.6"; +fake_chr2 HAVANA transcript 6198 8416 . + . gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6"; +fake_chr2 HAVANA exon 6198 7064 . + . gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 1; exon_id "ENSE00000882914.5"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6"; +fake_chr2 HAVANA CDS 6248 7064 . + 0 gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 1; exon_id "ENSE00000882914.5"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6"; +fake_chr2 HAVANA start_codon 6248 6250 . + 0 gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 1; exon_id "ENSE00000882914.5"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6"; +fake_chr2 HAVANA exon 7413 8416 . + . gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6"; +fake_chr2 HAVANA CDS 7413 7651 . + 2 gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6"; +fake_chr2 HAVANA stop_codon 7652 7654 . + 0 gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6"; +fake_chr2 HAVANA UTR 6198 6247 . + . gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 1; exon_id "ENSE00000882914.5"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6"; +fake_chr2 HAVANA UTR 7652 8416 . + . gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6"; diff -r f088370d2a3c -r e3ba567abdf5 test-data/length.tab --- a/test-data/length.tab Sun Jan 28 04:04:58 2018 -0500 +++ b/test-data/length.tab Fri Mar 11 14:08:11 2022 +0000 @@ -1,1 +1,2 @@ -ENSG00000162526 103 +ENSG00000128709.13 1871 +ENSG00000128710.6 1785