changeset 2:e3ba567abdf5 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/length_and_gc_content commit 7b6b07d22f3e6fed77b2c237de2b0d96fa939711"
author iuc
date Fri, 11 Mar 2022 14:08:11 +0000
parents f088370d2a3c
children
files get_length_and_gc_content.r get_length_and_gc_content.xml macros.xml test-data/cached_locally/ref.fasta test-data/cached_locally/ref.gtf test-data/gc.tab test-data/in.fasta test-data/in.gtf test-data/length.tab
diffstat 9 files changed, 523 insertions(+), 96 deletions(-) [+]
line wrap: on
line diff
--- a/get_length_and_gc_content.r	Sun Jan 28 04:04:58 2018 -0500
+++ b/get_length_and_gc_content.r	Fri Mar 11 14:08:11 2022 +0000
@@ -1,59 +1,94 @@
 # originally by Devon Ryan, https://www.biostars.org/p/84467/
 
-options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+options(show.error.messages = F,
+        error = function() {
+          cat(geterrmessage(), file = stderr())
+          q("no", 1, F)
+        })
 
 # we need that to not crash galaxy with an UTF8 error on German LC settings.
 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
 
 suppressPackageStartupMessages({
-    library("GenomicRanges")
-    library("rtracklayer")
-    library("Rsamtools")
-    library("optparse")
-    library("data.table")
+  library("GenomicRanges")
+  library("rtracklayer")
+  library("Rsamtools")
+  library("optparse")
+  library("data.table")
 })
 
 option_list <- list(
-    make_option(c("-g","--gtf"), type="character", help="Input GTF file with gene / exon information."),
-    make_option(c("-f","--fasta"), type="character", default=FALSE, help="FASTA file that corresponds to the supplied GTF."),
-    make_option(c("-l","--length"), type="character", default=FALSE, help="Output file with Gene ID and length."),
-    make_option(c("-gc","--gc_content"), type="character", default=FALSE, help="Output file with Gene ID and GC content.")
-  )
+  make_option(c("-g", "--gtf"), type = "character",
+              help = "Input gtf file with gene / exon information."),
+  make_option(c("-f", "--fasta"), type = "character", default = NULL,
+              help = "fasta file that corresponds to the supplied gtf."),
+  make_option(c("-l", "--length"), type = "character", default = NULL,
+              help = "Output file with Gene ID and length."),
+  make_option(c("-c", "--gc_content"), type = "character", default = NULL,
+              help = "Output file with Gene ID and GC content.")
+)
 
-parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
-args = parse_args(parser)
+parser <- OptionParser(usage = "%prog [options] file",
+                       option_list = option_list)
+args <- parse_args(parser)
 
-GTFfile = args$gtf
-FASTAfile = args$fasta
-length = args$length
-gc_content = args$gc_content
+gtf_file <- args$gtf
+fasta_file <- args$fasta
+length <- args$length
+gc_content <- args$gc_content
+
+# Check args:
+if (is.null(fasta_file) & !is.null(gc_content)) {
+  stop("gc_content output requires fasta input")
+}
+if (is.null(length) & is.null(gc_content)) {
+  stop("neither gc_content nor length was set nothing to do.")
+}
 
 #Load the annotation and reduce it
-GTF <- import.gff(GTFfile, format="gtf", genome=NA, feature.type="exon")
-grl <- reduce(split(GTF, elementMetadata(GTF)$gene_id))
-reducedGTF <- unlist(grl, use.names=T)
-elementMetadata(reducedGTF)$gene_id <- rep(names(grl), elementNROWS(grl))
+gtf <- import.gff(gtf_file, format = "gtf", genome = NA, feature.type = "exon")
+grl <- reduce(split(gtf, elementMetadata(gtf)$gene_id))
+reduced_gtf <- unlist(grl, use.names = T)
+elementMetadata(reduced_gtf)$gene_id <- rep(names(grl), elementNROWS(grl))
 
-#Open the fasta file
-FASTA <- FaFile(FASTAfile)
-open(FASTA)
+if (! is.null(gc_content)) {
+  #Open the fasta file
+  fasta <- FaFile(fasta_file)
+  open(fasta)
 
-#Add the GC numbers
-elementMetadata(reducedGTF)$nGCs <- letterFrequency(getSeq(FASTA, reducedGTF), "GC")[,1]
-elementMetadata(reducedGTF)$widths <- width(reducedGTF)
+  #Add the GC numbers
+  elementMetadata(reduced_gtf)$n_gcs <-
+    letterFrequency(getSeq(fasta, reduced_gtf), "GC")[, 1]
+}
+elementMetadata(reduced_gtf)$widths <- width(reduced_gtf)
 
 #Create a list of the ensembl_id/GC/length
-calc_GC_length <- function(x) {
-    nGCs = sum(elementMetadata(x)$nGCs)
-    width = sum(elementMetadata(x)$widths)
-    c(width, nGCs/width)
+if (! is.null(gc_content)) {
+  calc_gc_length <- function(x) {
+    n_gcs <- sum(elementMetadata(x)$n_gcs)
+    width <- sum(elementMetadata(x)$widths)
+    c(width, n_gcs / width)
+  }
+  output <- t(sapply(split(reduced_gtf, elementMetadata(reduced_gtf)$gene_id),
+                     calc_gc_length))
+  output <- data.frame(setDT(data.frame(output), keep.rownames = TRUE)[])
+  write.table(output[, c(1, 3)], file = gc_content,
+              col.names = FALSE, row.names = FALSE,
+              quote = FALSE, sep = "\t")
+} else {
+  all_widths <- sapply(split(reduced_gtf, elementMetadata(reduced_gtf)$gene_id),
+                       function(x) {
+                         sum(elementMetadata(x)$widths)
+                        })
+  output <- data.frame(gene_id = names(all_widths),
+                       length = all_widths)
 }
-output <- t(sapply(split(reducedGTF, elementMetadata(reducedGTF)$gene_id), calc_GC_length))
-output <- data.frame(setDT(data.frame(output), keep.rownames = TRUE)[])
 
-
-write.table(output[,c(1,2)], file=length, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t")
-write.table(output[,c(1,3)], file=gc_content, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t")
+if (! is.null(length)) {
+  write.table(output[, c(1, 2)], file = length,
+              col.names = FALSE, row.names = FALSE,
+              quote = FALSE, sep = "\t")
+}
 
 
 sessionInfo()
--- a/get_length_and_gc_content.xml	Sun Jan 28 04:04:58 2018 -0500
+++ b/get_length_and_gc_content.xml	Fri Mar 11 14:08:11 2022 +0000
@@ -1,11 +1,9 @@
-<tool id="length_and_gc_content" name="Gene length and GC content" version="0.1.1">
+<tool id="length_and_gc_content" name="Gene length and GC content" version="0.1.2">
     <description>from GTF and FASTA file</description>
-    <requirements>
-        <requirement type="package" version="1.3.2">r-optparse</requirement>
-        <requirement type="package" version="1.4.2">r-reshape2</requirement>
-        <requirement type="package" version="1.10.4">r-data.table</requirement>
-        <requirement type="package" version="1.34.2">bioconductor-rtracklayer</requirement>
-    </requirements>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
     <stdio>
         <regex match="Execution halted"
                source="both"
@@ -21,7 +19,7 @@
                description="An undefined error occured, please check your input carefully and contact your administrator." />
     </stdio>
     <version_command><![CDATA[
-        echo $(R --version | grep version | grep -v GNU)", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", reshape2 version" $(R --vanilla --slave -e "library(reshape2); cat(sessionInfo()\$otherPkgs\$reshape2\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rtracklayer version" $(R --vanilla --slave -e "library(rtracklayer); cat(sessionInfo()\$otherPkgs\$rtracklayer\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", data.table version" $(R --vanilla --slave -e "library(data.table); cat(sessionInfo()\$otherPkgs\$data.table\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
+        echo $(R --version | grep version | grep -v GNU)", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rtracklayer version" $(R --vanilla --slave -e "library(rtracklayer); cat(sessionInfo()\$otherPkgs\$rtracklayer\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", data.table version" $(R --vanilla --slave -e "library(data.table); cat(sessionInfo()\$otherPkgs\$data.table\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
     ]]></version_command>
     <command><![CDATA[
 
@@ -37,24 +35,24 @@
 
 ## Get FASTA
 
-#if $fasta_file.fastaSource == 'indexed':
-    ln -s '$fasta_file.fasta_pre_installed.fields.path' fasta
-#else:
-    ln -s '$fasta_file.fasta_history' fasta
+#if $analysis.analysis_select != "length":
+    #if $analysis.fasta_file.fastaSource == 'indexed':
+        ln -s '$analysis.fasta_file.fasta_pre_installed.fields.path' fasta &&
+    #else:
+        ln -s '$analysis.fasta_file.fasta_history' fasta &&
+    #end if
 #end if
 
-&&
-
 Rscript '$__tool_directory__/get_length_and_gc_content.r'
 
 --gtf gtf
---fasta fasta
 
-#if $length_out:
+#if $analysis.analysis_select != "gc":
     --length '$length'
 #end if
 
-#if $gc_out:
+#if $analysis.analysis_select != "length":
+    --fasta fasta
     --gc_content '$gc_content'
 #end if
 
@@ -79,39 +77,31 @@
             </when>
         </conditional>
 
-        <conditional name="fasta_file">
-            <param name="fastaSource" type="select" label="Select a built-in FASTA or one from your history" help="Choose history if you don't see the correct FASTA. The FASTA must be the same genome version as the GTF.">
-                <option value="indexed" selected="true">Use a built-in FASTA </option>
-                <option value="history">Use a FASTA from history</option>
+        <conditional name="analysis">
+            <param name="analysis_select" type="select" label="Analysis to perform">
+                <option value="all" selected="true">GC-content and gene lengths</option>
+                <option value="gc">GC-content only</option>
+                <option value="length">gene lengths only</option>
             </param>
-            <when value="indexed">
-                <param name="fasta_pre_installed" type="select" help="Select the FASTA file from a list of pre-installed genomes" label="Select a FASTA file">
-                    <options from_data_table="all_fasta">
-                        <filter type="sort_by" column="2" />
-                    </options>
-                    <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
-                </param>
+            <when value="all">
+                <expand macro="fasta" />
             </when>
-            <when value="history">
-                <param name="fasta_history" type="data" format="fasta" label="Select a FASTA file that matches the supplied GTF file" />
+            <when value="gc">
+                <expand macro="fasta" />
             </when>
+            <when value="length"/>
         </conditional>
-
-
-        <param name="length_out" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Output length file?" help="Default: Yes" />
-        <param name="gc_out" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Output GC content file?" help="Default: Yes" />
-
     </inputs>
 
     <outputs>
         <data name="length" format="tabular" label="Gene length">
-            <filter>length_out is True</filter>
+            <filter>analysis['analysis_select'] != "gc"</filter>
             <actions>
                 <action name="column_names" type="metadata" default="GeneID,Length" />
             </actions>
         </data>
         <data name="gc_content" format="tabular" label="Gene GC content">
-            <filter>gc_out is True</filter>
+            <filter>analysis['analysis_select'] != "length"</filter>
              <actions>
                 <action name="column_names" type="metadata" default="GeneID,GC_content" />
             </actions>
@@ -119,6 +109,8 @@
     </outputs>
 
     <tests>
+        <!-- The gtf file was generated by
+        zcat gencode.v39.basic.annotation.gtf.gz | grep "HOXD" | awk -F "\t" -v OFS="\t" '$0~/HOXD10/ || $0~/HOXD9/ {$1="fake_chr2";$4-=176116521;$5-=176116521; print} -->
         <!-- Ensure length and GC files are output -->
         <test expect_num_outputs="2">
             <param name="gtfSource" value="history" />
@@ -138,15 +130,14 @@
         <!-- Ensure optional gc content works  -->
         <test expect_num_outputs="1">
             <param name="gtfSource" value="cached" />
-            <param name="fastaSource" value="indexed" />
-            <param name="gc_out" value="False" />
+            <param name="analysis_select" value="length" />
             <output name="length" file="length.tab" />
         </test>
         <!-- Ensure optional length works -->
         <test expect_num_outputs="1">
             <param name="gtfSource" value="cached" />
             <param name="fastaSource" value="indexed" />
-            <param name="length_out" value="False" />
+            <param name="analysis_select" value="gc" />
             <output name="gc_content" file="gc.tab" />
         </test>
     </tests>
@@ -156,14 +147,15 @@
 
 .. class:: infomark
 
-This tool calculates the length and GC content for the genes in a GTF file. It requires a FASTA file that is the same genome version as the GTF.
+This tool calculates the length and/or GC content for the genes in a GTF file.
+For the GC content, it requires a FASTA file that is the same genome version as the GTF.
 
 -----
 
 **Inputs**
 
 - a GTF file
-- a FASTA file
+- a FASTA file (if GC content is requested)
 
 -----
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Fri Mar 11 14:08:11 2022 +0000
@@ -0,0 +1,28 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="1.7.1">r-optparse</requirement>
+            <requirement type="package" version="1.14.2">r-data.table</requirement>
+            <requirement type="package" version="1.54.0">bioconductor-rtracklayer</requirement>
+        </requirements>
+    </xml>
+    <xml name="fasta">
+        <conditional name="fasta_file">
+            <param name="fastaSource" type="select" label="Select a built-in FASTA or one from your history" help="Choose history if you don't see the correct FASTA. The FASTA must be the same genome version as the GTF.">
+                <option value="indexed" selected="true">Use a built-in FASTA </option>
+                <option value="history">Use a FASTA from history</option>
+            </param>
+            <when value="indexed">
+                <param name="fasta_pre_installed" type="select" help="Select the FASTA file from a list of pre-installed genomes" label="Select a FASTA file">
+                    <options from_data_table="all_fasta">
+                        <filter type="sort_by" column="2" />
+                    </options>
+                    <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="fasta_history" type="data" format="fasta" label="Select a FASTA file that matches the supplied GTF file" />
+            </when>
+        </conditional>
+    </xml>
+</macros>
--- a/test-data/cached_locally/ref.fasta	Sun Jan 28 04:04:58 2018 -0500
+++ b/test-data/cached_locally/ref.fasta	Fri Mar 11 14:08:11 2022 +0000
@@ -1,2 +1,173 @@
->1
-AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGAAAAAAAAAAAAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGAAAAAAAAAATTTTT
\ No newline at end of file
+>fake_chr2 hg38_dna range=chr2:176116522-176125113 5'pad=0 3'pad=0 strand=+ repeatMasking=none
+TGGGGCGGGCTGGCCGAGCGAGCCCTGGAGAGGCGGACAGGAGGGCGGCG
+GAGAGCGCTGGGCCGGTTGTCTCCAGCGCGCACTATCGCGGGCGCGTAGT
+AGATGTCGCTGTTGTCCGTGCTTACCCGGCCGGCCGGCCAGGCTCTGGAG
+CACGTGACCCGAGAGGAGGCTGCGGCTCAAGGCCATTTTCAAATCTCATT
+GGCTTGGTTGTCATGTGGTCGGCAGAGGCATCCACAATTACACGGGGAAT
+GTTTTCCTAGAGATGTCAGCCTACAAAGGACACAATCTCTCTTCTTCAAA
+TTCTTCCCCAAAATGTCCTTTCCCAACAGCTCTCCTGCTGCTAATACTTT
+TTTAGTAGATTCCTTGATCAGTGCCTGCAGGAGTGACAGTTTTTATTCCA
+GCAGCGCCAGCATGTACATGCCACCACCTAGCGCAGACATGGGGACCTAT
+GGAATGCAAACCTGTGGACTGCTCCCGTCTCTGGCCAAAAGAGAAGTGAA
+CCACCAAAATATGGGTATGAATGTGCATCCTTATATACCTCAAGTAGACA
+GTTGGACAGATCCGAACAGATCTTGTCGAATAGAGCAACCTGTTACACAG
+CAAGTCCCCACTTGCTCCTTCACCACCAACATTAAGGAAGAATCCAATTG
+CTGCATGTATTCTGATAAGCGCAACAAACTCATTTCGGCCGAGGTCCCTT
+CGTACCAGAGGCTGGTCCCTGAGTCTTGTCCCGTTGAGAACCCTGAGGTT
+CCCGTCCCTGGATATTTTAGACTGAGTCAGACCTACGCCACCGGGAAAAC
+CCAAGAGTACAATAATAGCCCCGAAGGCAGCTCCACTGTCATGCTCCAGC
+TCAACCCTCGTGGCGCGGCCAAGCCGCAGCTCTCCGCTGCCCAGCTGCAG
+ATGGAAAAGAAGATGAACGAGCCCGTGAGCGGCCAGGAGCCCACCAAAGT
+CTCCCAGGTGGAGAGCCCCGAGGCCAAAGGCGGCCTTCCCGAAGAGAGGA
+GCTGCCTGGCTGAGGTCTCCGTGTCCAGTCCCGAAGTGCAGGAGAAGGAA
+AGCAAAGGTCGGTATGAGCAGAGTTGCCACCCCAGCGGGGCGCGCAGCCC
+GGGAACCCGGCAGAGAGGGAGTGCCGGGGTGCCCAGCGCCGAGCCGGAGC
+CCGACTTGGCAGGTGCTGCTCCGCCTGGTTTTAGAGGGGTGATCTCAGCC
+CTGAGATAGTCCCCGCTTCTCCCCTGCTGCCCTGGCCCTCTCCGCCAGTC
+CTGGCCCCACGCTGATGGCGCCCGGGCAGAGGAAAAGCTTGCCGGTTTTA
+TTTTTCCTGAGCTAGACCTGAACACAACAAAAGAGCGCAAAGGAGACCTG
+CGGCTCATAAACACGACCACAGAGCCTCTTTTCTCCTGCTCAGATTTGCA
+GTTCCAGTTTTGCCTTGAGCCCAATGATCATGTTAAGGTGATCCAGGGCA
+CCGTGTTCGTGTTCAAGTGTATGCACCCCGCATCCTGCGAGCTTGGGGGT
+GGTGAGGGGAAAGAGATGGCTGGGCTGGTTGGTGCTTGAGTTGGGAAACA
+GGGCTTACTGCCTTTGCTGGGCTAGGTAACCTTGGCTTTGTTTAGGAAAA
+GTGCTGCAGTCTTTGCAATCCGTCGGCAAAGAGGGCAAAGGCGGAGGGGG
+AGAGTGGAACCCGCATTGCCCTCCCTGCAAGGCCAGCCTTAGGGCTGGGC
+TAAGGCAAAGAGCCAGGGATCTGGCTTTTTGAGAAGGAACCCTCCTCCTC
+TCCCCCAGTGCTTAGAGGTGGGCCACAGTAGGGGGCTCCCTTTCTGGGGG
+AATGCTTTAGTGTGGGGGCAAGAAGACATGAAAATTAAGGAAATTCTGGG
+GAATGCAACAATACCCAGGCAAGGTGGGGGAAGGTGTCTCGCTTCCCCAT
+TTATCTTTTGAAAGAGAATGGGCACCTATAAACCTGACTGTCAGGATTCC
+TGACTGCCTAGGAGAGGTGGGGAAGAAGTGGCAGATTTGGGGACCTGAGG
+CAGCAGTGGGGTTGGTAGGCTTGTCCAGGTCGTGGCGTATTCCCCTCCGT
+CCCTGTTAGGAGCTGAACCCTTAGAATGTTGCTGGGGAGATCTGGAAAGT
+TTACTATTCTACTAATGTTTTGTACAAGTGAGAAAGTTGAAAGAGAGAGC
+GAGAACCCAAATGCAGACTGTCCTGCCATCATGTCATTTAAGTAATGTGG
+CATCAATGTAAGATTCCCTTCCAAGGCCCACTTCATGTGAGTAATGTTTA
+ATACTAGCATTTTCCAAAGCGGCCTGGCTGCCAGCAGGGTCACGGCCAAG
+GGTACATTTGAACAGTCTGAAGAAAAAAACAAAAACGAAAACCAAAACCA
+AAACCAAAACAAAAACAAAAACAAAAACAAACAAACAAAAAACCTCTTGA
+TTTTTTTCTTCTTCTCCCTTTAATTTTGTTAGAGGAAATCAAGTCTGATA
+CACCAACCAGCAATTGGCTCACTGCAAAGAGTGGCAGAAAGAAGAGGTGC
+CCTTACACTAAGCACCAAACGCTGGAATTAGAAAAAGAGTTCTTGTTCAA
+TATGTACCTCACCCGCGAGCGCCGCCTAGAGATCAGTAAGAGCGTTAACC
+TCACCGACAGGCAGGTCAAGATTTGGTTTCAAAACCGCCGAATGAAACTC
+AAGAAGATGAGCCGAGAGAACCGGATCCGAGAACTGACCGCCAACCTCAC
+GTTTTCTTAGGTCTGAGGCCGGTCTGAGGCCGGTCAGAGGCCAGGATTGG
+AGAGGGGGCACCGCGTTCCAGGGCCCAGTGCTGGAGGACTGGGAAAGCGG
+AAACAAAACCTTCACCGCTCTTTGTTTGTTGTTTTGTTGTATTTTGTTTT
+CCTGCTAGAATGTGACTTTGGGGTCATTATGTTCGTGCTGCAAGTGATCT
+GTAATCCCTATGAGTATATATATATATATATATATATATATATAAAAACT
+TAGCACGTGTAATTTATTATTTTTTCATCGTAATGCAGGGTAACTATTAT
+TGCGCATTTTCATTTGGGTCTTAACTTATTGGAACTGTAGAGCATCCATC
+CATCCATCCATCCAGCAATGTGACTTTTTCATGTCTTTCCTAACACAAAA
+GGTCTATGTGTGTGGTTAGTCCATGAACTCATGGCATTTTGAATACATCC
+AGTACTTTAAAAATGACATATATATTTAAAAAAAAAAGATTAAGAAAACC
+CACAAGTTGGAGGGAGGGGGACTTAAAAAGCACATTACAATGTATCTTTT
+CACAAATGAATTTAGCAGTTGTCCTTGGTGAGATGGGATATTGGCGATTT
+ATGCCTTGTAGCCTTTCCCTTGTGGTGCATCTGTGGTTTGGTAGAAGTAC
+AACAGCAACCTGTCCTTTCTGTGCATGTTCTGGTCGCATGTATAATGCAA
+TAAACTCTGGAAATGAGTTCACTCCCTCTGCTTTCTGAAATGGAAATATG
+TTATGGTGGAAATGAAAGCCTATGGTGAGATTATCTTCTGGTTACACTCC
+CTGTTTGGGGCATTTGGGCAGGGGAGTGATAGACTAGTAGGGGAAGGGAG
+ATGGGGGAGAAAAGCTGGAGGAGGCCTAGGGTGTTGGATTTTGGCAGTGG
+TTGGGGGAGAGGAATTATAAGCTAGCTTGAGAGTGAAGTTTTCATAATTG
+GGAGGAAGGGGAGTCTCCTCTTTCCTTTCCCAGTCCCCAGTGATAGTAAC
+ATAATTGCGCTCTCAATGGGTGTGAGCTTTCCTCTGGCCTGAACCTGGTA
+AGTAAGCCTATACCCCAAGCCACTTTCTCCTCAAAGCTTCCCATTTGTGT
+GTTTTCTCCTCTTTGGTTTTGGTTGTGTTGTTTTTAATGCTTTCAGTGGC
+ATCTTGGTGATTTCTGGCTGGCGAGCAATCATCAGGGGCTAGGTTGAAGC
+TAGTCTTGCCCACCTGGAAGTTGCCGGCCTCCATTACAGGAGCAAGGACA
+AACAGCAGTGTAGCACTGCAGCGGATCCAATTCTGCCCCCTTTCCCTCAG
+CCCTACCCCCATCCCAAGCGCAAGACAGCCAGACCCCAGAGAAGCCGAGG
+ATGGGTGAGTTTTCCCATCCCACTTCGCCTTGATCTCCTTGTGGACGGGT
+TTTATGCTCAGTCATTACCTTTTAGTGGCCCACATGAAATTTTGTTAAAG
+GAAGAAATGAAAAGATTTTCCCCAGTCAGTCTTTCCTCTATTTAATTACA
+AAATGCTGGTGGGAACTGCTGCATCTGGGATGCAAGAAAATGCAGAAAGG
+GTGACTGAAAATTTTGCAAATGAACATGACTTCCCATGAAGTCTAATGTT
+CCATTCGCTGCCATGGTCCAGGGGACTCCCACCAGCTTCCACCGGCTTCA
+ACAGGATCTCCACTAGAGAGCCCAGACTTATCTAGTCCTGTCGGGGAAAA
+GGGAGAAGAGGCCTTGCAGGAGAAAGCTAACAGAAAATTCGTTACCTGAG
+GTCCTGCCTGCAGTTTCAAATAGCTTCCAGCAGTTTTACAAAACACATCC
+TTTCCATTTCTTCCTTTTAAATGTTTCCCTAAGAACGATCCATTTAGGTG
+CTATAAGTCCTCAGCCAGGGAGTCTCTGGGACACTGGCATTCAAAATTTT
+AAACTTCCGCCCCAAAACCAGGAACATTCCAAGACAGAACTCTTTTAGGG
+GGCCATTTCCTGGGGGTGGGGGAGAGGGCTTGGAATCAATGCTAGATTGA
+AAACGTTGTAATAGCTTTGCCCCAGACTTAACACCGGTTGGGCAGGAGGA
+GGGTAATTTTTATTTAGCCGTTTCTCCGATCATGTGGGGAATACCATTAG
+CTGTTGATAGCGGGCCATGTATCCGAGGAAAGCCTGAGCTACAAGGCAAA
+GGCATCCCATCTGGAACAAAATCAGAAAGCTATTGGCAAAGGTAATCAAT
+CAGGCCATAAATAGCCATTTACCCGCTTCCTTTTCGGGGCTGGAGGTGGG
+CCGGGAGCCCTCCAAGGGTGAGCTGGGCAACTTGTAGAGCAAGGAATATG
+CCCTCCGCTGCCGGCGCCCCGGCCGCTTTTGTCTGGGCTCCCAGCCGGGC
+TTCCGAGGCTTTGTACCATGGATTTGGGAGTGACAATGGGCATTTCCCTC
+AGATTCAAGGCTGCTCAACCTCACCTCTGTAGGGGGAAAAAAATCAGAAG
+GGAGTGTCCCAAGGACCTAGCCATTCGGCCGAATTTTTTAGACATTTTGG
+GAGTCTCCTCCGAGGCCTTTAAGTGCGAACCGCGCGAAGCGGCCCTGCCC
+GGGGAGACTCGCTGAGGCAGGGCTGAGGCGGCGGGCGGGAGCAAGCTGCT
+CTAGCATTTGGGTTCTGCCCTGTGGCGTGTTCTCTTCCAGGGCCTTTCCA
+GCATCATCGGAGAAGACGAAGCACCCTGGCCGCCACTGTCCGTGCTGCGC
+CAACTCGCCCGGCCGCCCGCCCTTCCGAGGGCAGGCAGAAGCCCCTCTGT
+GTCCTCCACCGCCGCGCCCCGGCTCGCCCCTCGGGCCGCGGCGTGTGCCC
+AGCCTCACGTCGGGGTGTGTGTGGCCGCGCGGGCGTGTGTGAGTGTGGCA
+GGGGGAGGGGGCCCTCCGATCTGCTCCATCCGTCCGTTTTATTAGGGACA
+CATTAATCTATAATCAAATACACCTCATAAAATTTTTATTGAAAGGCATA
+ATATCATTACAGAGGTCTTCCACCTGTTTTAAACAACACGACAAGCTGTG
+AGCAAGCGTGTGTGTGGGGATGTGTGGGGAGGGGTGGGTGTGAGTAGGGA
+GAGAGGCGAGGGGAGAACAGCTCCCCTCGGGCGCTAGGGGCCGCCCCGAG
+GGCCCGCCTGCCTCGGGCGACACCGGCCTGGCGCCCCCGCGGCCGCTCCG
+TGTGCCCTGGACTCGCCGCCCGCGGCTCGGAAGCTGGAGAGTCAGCGACG
+GGGCCCGACTGCGGGACCGAGGGCTGCAAGAAGAAGCGAACAAATAGTCC
+CCAGCGCCTCCTCTGGATGCGGTCGCGTCTGTGGTCCTGGCAGCCGCTGG
+GCGGGCCAGGCCAGGTCGGGCCGGGCCGAGCCGGGCACATGGACCTGGGC
+CTGCGGGCTCTAATTGCGGCGCTTATGTTGATGATTTTTTTTTTAATCAC
+AGCAGCCCCCAGTTTAGCGGACTGATTTACTCCCGGTATTGGTAAATATG
+ATCACGTGGGCCGCGCGACCAATGGTGGAGGCTGCAGCCTGCGAACTAGT
+CGGTGGCTCGGGCGCCGGCGGGGAGCTGCTCGGCGGCGGACAGTGTAATG
+TTGGGTGGGAGTGCGGGACGCCTCAAAATGTCTTCCAGTGGCACCCTCAG
+CAACTACTACGTGGACTCGCTTATAGGCCATGAGGGCGACGAGGTGTTCG
+CGGCGCGCTTCGGGCCGCCGGGGCCAGGCGCGCAGGGCCGGCCTGCAGGT
+GTGGCTGATGGCCCGGCCGCCACCGCCGCCGAGTTCGCCTCGTGTAGTTT
+TGCCCCCAGATCGGCCGTGTTCTCTGCCTCGTGGTCCGCGGTGCCCTCCC
+AGCCCCCGGCAGCGGCGGCGATGAGCGGCCTCTACCACCCGTACGTTCCC
+CCGCCGCCCCTGGCCGCCTCTGCCTCCGAGCCCGGCCGCTACGTGCGCTC
+CTGGATGGAGCCGCTGCCCGGCTTCCCGGGCGGTGCGGGCGGTGGCGGTG
+GTGGTGGAGGCGGCGGTCCGGGCCGCGGTCCCAGCCCTGGCCCCAGCGGC
+CCAGCCAACGGGCGCCACTACGGGATTAAGCCTGAAACCCGAGCGGCCCC
+GGCCCCCGCCACGGCCGCCTCCACCACCTCCTCCTCCTCCACTTCCTTAT
+CCTCCTCCTCCAAACGGACTGAGTGCTCCGTGGCCCGGGAGTCCCAGGGG
+AGCAGCGGCCCCGAGTTCTCGTGCAACTCGTTCCTGCAGGAGAAGGCGGC
+AGCGGCGACGGGGGGAACCGGGCCTGGGGCAGGGATCGGGGCCGCGACTG
+GGACGGGCGGCTCGTCGGAGCCCTCAGCTTGCAGCGACCACCCGATCCCA
+GGCTGTTCGCTGAAGGAGGAGGAGAAGCAGCATTCGCAGCCGCAGCAGCA
+GCAACTTGACCCAAGTAAGTGCAAAAGAAATTGCCCCCTGATTTATTGCT
+GAAACCTGTAAGGCTCGAATGTGCAAAACTGATAGTTTTACTAACCTATA
+AAAACGTCTAGACGCCTACCCAAGCCTAGGCGAACAACATGCATCCATAA
+AAAGAGCTTCCCATAACCACCTACCCTGGGCGCTCAGTTAGTACGGTAAA
+CAGAGCGCGAGCATTAAGGCTTTTTATGATAATTCCCCACAAGTTGTGAA
+AAGCGACCATCCTTGGTGAAATTAATTTAACGACCTCTCTTCCCCACCCT
+GTGGTCTCTCCCTGCCTCCCCTCCTCTCCTCTCTCCCCGTCTCCAAACCT
+CCCTCTTTGTAGACAACCCCGCCGCGAACTGGATCCACGCTCGCTCCACC
+CGGAAAAAGCGCTGTCCCTACACCAAATACCAGACGCTTGAGCTGGAGAA
+AGAATTCCTCTTCAACATGTACCTCACCCGGGACCGGCGCTACGAGGTGG
+CCAGGATTCTCAACCTAACAGAGAGACAGGTCAAAATCTGGTTTCAGAAC
+CGTAGGATGAAAATGAAAAAGATGAGCAAGGAGAAATGCCCCAAAGGAGA
+CTGACCCGGCGCGGTGCTGGCGGGAGCGCTCAAGGGCAGCGGATTTGTTG
+TTGTTGCTGTTTTCCTTTGTGGGTGTTTGGTGCTTGATTTCCAGAAACTC
+TCCAGCGACTTGGACTTCTTCTTCTTTTTTTTTTTCTTTTTAGATAGAAG
+TGACTGTGTGGTTGGTCTCTGAGGTATTTGGGGGACTCTGTATTTGCTCG
+TTTACGTGTTGGAAAAACCAAGTGGCTTTGGGGTTTCGCCCTATCCCACT
+CCCTCTCTTTCCTGCTCCATTGGTTCCTTAAGAAATGCTATATTTTGTGA
+GTGCAAGCTGGCTTGGGGAGCCCTCTCTTGTGTAAATGTCCCCCATGTTT
+CTGAAAAGTGCTGTAGTTTAGTCCCCTCACCCCCAGCACTGCCCAAACAG
+GGGCCAAGTGCGCCCCAATTCCAAGAATGAAGGCAGAGCGACAACAGTGC
+GGACACCCCGGCTGCTAGCCCACGGTGAAGCCCGGCGGGGTTGCCCACCA
+GTTGCGAAAGCCCCCTTTCCTCAGGGAGCACGCGGGACCTCGGTGGAGAT
+CTCCAGTGAGGCTTAGAGGAGCCCAGGGCCTCGGGCGGGTTGGGGTTTGT
+CCTCAGTGCATTGGACGCGCTGCTCTCTCCCCTGAAGGCTGGGCTCGCGT
+GGGCGGCCGCGGGTGGTGGCCCTCCCGGTTCCTGCCCGAGGACCAGTTGT
+AAATGTTACTGCTTCCTACTAATAAATGCTGACCTGATCAAATGGAGCCC
+AGACGCTGGCCCTAAACATTGTGTGCCTGCTTTCTCTGCCTCTCTGCAAA
+ATATCACACTCAGGATATTTCTCCTCTACCCCTGGGAGTGAGACATTGTT
+AAAAATTCAGGGCCCTTCCACCTGACAGATCTCTCTGATGTGTCTCTGCC
+TTCTCTGCCTCACATCCCTTTGTGTAGGCAGATGCAGCAGCA
--- a/test-data/cached_locally/ref.gtf	Sun Jan 28 04:04:58 2018 -0500
+++ b/test-data/cached_locally/ref.gtf	Fri Mar 11 14:08:11 2022 +0000
@@ -1,6 +1,20 @@
-1	ensembl_havana	gene	1	103	.	+	.	gene_id "ENSG00000162526"; gene_version "4"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1";
-1	ensembl_havana	transcript	1	103	.	+	.	gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";
-1	ensembl_havana	exon	1	103	.	+	.	gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; exon_id "ENSE00002319515"; exon_version "1"; tag "basic"; transcript_support_level "NA";
-1	ensembl_havana	CDS	1	100	.	+	0	gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; protein_id "ENSP00000334393"; protein_version "3"; tag "basic"; transcript_support_level "NA";
-1	ensembl_havana	start_codon	1	3	.	+	0	gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";
-1	ensembl_havana	stop_codon	101	103	.	+	0	gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";
+fake_chr2	HAVANA	gene	257	3416	.	+	.	gene_id "ENSG00000128710.6"; gene_type "protein_coding"; gene_name "HOXD10"; level 1; hgnc_id "HGNC:5133"; tag "overlapping_locus"; havana_gene "OTTHUMG00000132511.5";
+fake_chr2	HAVANA	transcript	257	3416	.	+	.	gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3";
+fake_chr2	HAVANA	exon	257	1057	.	+	.	gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 1; exon_id "ENSE00000882912.6"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3";
+fake_chr2	HAVANA	CDS	313	1057	.	+	0	gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 1; exon_id "ENSE00000882912.6"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3";
+fake_chr2	HAVANA	start_codon	313	315	.	+	0	gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 1; exon_id "ENSE00000882912.6"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3";
+fake_chr2	HAVANA	exon	2433	3416	.	+	.	gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 2; exon_id "ENSE00001816717.3"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3";
+fake_chr2	HAVANA	CDS	2433	2707	.	+	2	gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 2; exon_id "ENSE00001816717.3"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3";
+fake_chr2	HAVANA	stop_codon	2708	2710	.	+	0	gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 2; exon_id "ENSE00001816717.3"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3";
+fake_chr2	HAVANA	UTR	257	312	.	+	.	gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 1; exon_id "ENSE00000882912.6"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3";
+fake_chr2	HAVANA	UTR	2708	3416	.	+	.	gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 2; exon_id "ENSE00001816717.3"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3";
+fake_chr2	HAVANA	gene	6198	8416	.	+	.	gene_id "ENSG00000128709.13"; gene_type "protein_coding"; gene_name "HOXD9"; level 2; hgnc_id "HGNC:5140"; havana_gene "OTTHUMG00000132516.6";
+fake_chr2	HAVANA	transcript	6198	8416	.	+	.	gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";
+fake_chr2	HAVANA	exon	6198	7064	.	+	.	gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 1; exon_id "ENSE00000882914.5"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";
+fake_chr2	HAVANA	CDS	6248	7064	.	+	0	gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 1; exon_id "ENSE00000882914.5"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";
+fake_chr2	HAVANA	start_codon	6248	6250	.	+	0	gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 1; exon_id "ENSE00000882914.5"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";
+fake_chr2	HAVANA	exon	7413	8416	.	+	.	gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";
+fake_chr2	HAVANA	CDS	7413	7651	.	+	2	gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";
+fake_chr2	HAVANA	stop_codon	7652	7654	.	+	0	gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";
+fake_chr2	HAVANA	UTR	6198	6247	.	+	.	gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 1; exon_id "ENSE00000882914.5"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";
+fake_chr2	HAVANA	UTR	7652	8416	.	+	.	gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";
--- a/test-data/gc.tab	Sun Jan 28 04:04:58 2018 -0500
+++ b/test-data/gc.tab	Fri Mar 11 14:08:11 2022 +0000
@@ -1,1 +1,2 @@
-ENSG00000162526	0.388349514563107
+ENSG00000128709.13	0.626402993051844
+ENSG00000128710.6	0.467226890756303
--- a/test-data/in.fasta	Sun Jan 28 04:04:58 2018 -0500
+++ b/test-data/in.fasta	Fri Mar 11 14:08:11 2022 +0000
@@ -1,2 +1,173 @@
->1
-AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGAAAAAAAAAAAAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGAAAAAAAAAATTTTT
\ No newline at end of file
+>fake_chr2 hg38_dna range=chr2:176116522-176125113 5'pad=0 3'pad=0 strand=+ repeatMasking=none
+TGGGGCGGGCTGGCCGAGCGAGCCCTGGAGAGGCGGACAGGAGGGCGGCG
+GAGAGCGCTGGGCCGGTTGTCTCCAGCGCGCACTATCGCGGGCGCGTAGT
+AGATGTCGCTGTTGTCCGTGCTTACCCGGCCGGCCGGCCAGGCTCTGGAG
+CACGTGACCCGAGAGGAGGCTGCGGCTCAAGGCCATTTTCAAATCTCATT
+GGCTTGGTTGTCATGTGGTCGGCAGAGGCATCCACAATTACACGGGGAAT
+GTTTTCCTAGAGATGTCAGCCTACAAAGGACACAATCTCTCTTCTTCAAA
+TTCTTCCCCAAAATGTCCTTTCCCAACAGCTCTCCTGCTGCTAATACTTT
+TTTAGTAGATTCCTTGATCAGTGCCTGCAGGAGTGACAGTTTTTATTCCA
+GCAGCGCCAGCATGTACATGCCACCACCTAGCGCAGACATGGGGACCTAT
+GGAATGCAAACCTGTGGACTGCTCCCGTCTCTGGCCAAAAGAGAAGTGAA
+CCACCAAAATATGGGTATGAATGTGCATCCTTATATACCTCAAGTAGACA
+GTTGGACAGATCCGAACAGATCTTGTCGAATAGAGCAACCTGTTACACAG
+CAAGTCCCCACTTGCTCCTTCACCACCAACATTAAGGAAGAATCCAATTG
+CTGCATGTATTCTGATAAGCGCAACAAACTCATTTCGGCCGAGGTCCCTT
+CGTACCAGAGGCTGGTCCCTGAGTCTTGTCCCGTTGAGAACCCTGAGGTT
+CCCGTCCCTGGATATTTTAGACTGAGTCAGACCTACGCCACCGGGAAAAC
+CCAAGAGTACAATAATAGCCCCGAAGGCAGCTCCACTGTCATGCTCCAGC
+TCAACCCTCGTGGCGCGGCCAAGCCGCAGCTCTCCGCTGCCCAGCTGCAG
+ATGGAAAAGAAGATGAACGAGCCCGTGAGCGGCCAGGAGCCCACCAAAGT
+CTCCCAGGTGGAGAGCCCCGAGGCCAAAGGCGGCCTTCCCGAAGAGAGGA
+GCTGCCTGGCTGAGGTCTCCGTGTCCAGTCCCGAAGTGCAGGAGAAGGAA
+AGCAAAGGTCGGTATGAGCAGAGTTGCCACCCCAGCGGGGCGCGCAGCCC
+GGGAACCCGGCAGAGAGGGAGTGCCGGGGTGCCCAGCGCCGAGCCGGAGC
+CCGACTTGGCAGGTGCTGCTCCGCCTGGTTTTAGAGGGGTGATCTCAGCC
+CTGAGATAGTCCCCGCTTCTCCCCTGCTGCCCTGGCCCTCTCCGCCAGTC
+CTGGCCCCACGCTGATGGCGCCCGGGCAGAGGAAAAGCTTGCCGGTTTTA
+TTTTTCCTGAGCTAGACCTGAACACAACAAAAGAGCGCAAAGGAGACCTG
+CGGCTCATAAACACGACCACAGAGCCTCTTTTCTCCTGCTCAGATTTGCA
+GTTCCAGTTTTGCCTTGAGCCCAATGATCATGTTAAGGTGATCCAGGGCA
+CCGTGTTCGTGTTCAAGTGTATGCACCCCGCATCCTGCGAGCTTGGGGGT
+GGTGAGGGGAAAGAGATGGCTGGGCTGGTTGGTGCTTGAGTTGGGAAACA
+GGGCTTACTGCCTTTGCTGGGCTAGGTAACCTTGGCTTTGTTTAGGAAAA
+GTGCTGCAGTCTTTGCAATCCGTCGGCAAAGAGGGCAAAGGCGGAGGGGG
+AGAGTGGAACCCGCATTGCCCTCCCTGCAAGGCCAGCCTTAGGGCTGGGC
+TAAGGCAAAGAGCCAGGGATCTGGCTTTTTGAGAAGGAACCCTCCTCCTC
+TCCCCCAGTGCTTAGAGGTGGGCCACAGTAGGGGGCTCCCTTTCTGGGGG
+AATGCTTTAGTGTGGGGGCAAGAAGACATGAAAATTAAGGAAATTCTGGG
+GAATGCAACAATACCCAGGCAAGGTGGGGGAAGGTGTCTCGCTTCCCCAT
+TTATCTTTTGAAAGAGAATGGGCACCTATAAACCTGACTGTCAGGATTCC
+TGACTGCCTAGGAGAGGTGGGGAAGAAGTGGCAGATTTGGGGACCTGAGG
+CAGCAGTGGGGTTGGTAGGCTTGTCCAGGTCGTGGCGTATTCCCCTCCGT
+CCCTGTTAGGAGCTGAACCCTTAGAATGTTGCTGGGGAGATCTGGAAAGT
+TTACTATTCTACTAATGTTTTGTACAAGTGAGAAAGTTGAAAGAGAGAGC
+GAGAACCCAAATGCAGACTGTCCTGCCATCATGTCATTTAAGTAATGTGG
+CATCAATGTAAGATTCCCTTCCAAGGCCCACTTCATGTGAGTAATGTTTA
+ATACTAGCATTTTCCAAAGCGGCCTGGCTGCCAGCAGGGTCACGGCCAAG
+GGTACATTTGAACAGTCTGAAGAAAAAAACAAAAACGAAAACCAAAACCA
+AAACCAAAACAAAAACAAAAACAAAAACAAACAAACAAAAAACCTCTTGA
+TTTTTTTCTTCTTCTCCCTTTAATTTTGTTAGAGGAAATCAAGTCTGATA
+CACCAACCAGCAATTGGCTCACTGCAAAGAGTGGCAGAAAGAAGAGGTGC
+CCTTACACTAAGCACCAAACGCTGGAATTAGAAAAAGAGTTCTTGTTCAA
+TATGTACCTCACCCGCGAGCGCCGCCTAGAGATCAGTAAGAGCGTTAACC
+TCACCGACAGGCAGGTCAAGATTTGGTTTCAAAACCGCCGAATGAAACTC
+AAGAAGATGAGCCGAGAGAACCGGATCCGAGAACTGACCGCCAACCTCAC
+GTTTTCTTAGGTCTGAGGCCGGTCTGAGGCCGGTCAGAGGCCAGGATTGG
+AGAGGGGGCACCGCGTTCCAGGGCCCAGTGCTGGAGGACTGGGAAAGCGG
+AAACAAAACCTTCACCGCTCTTTGTTTGTTGTTTTGTTGTATTTTGTTTT
+CCTGCTAGAATGTGACTTTGGGGTCATTATGTTCGTGCTGCAAGTGATCT
+GTAATCCCTATGAGTATATATATATATATATATATATATATATAAAAACT
+TAGCACGTGTAATTTATTATTTTTTCATCGTAATGCAGGGTAACTATTAT
+TGCGCATTTTCATTTGGGTCTTAACTTATTGGAACTGTAGAGCATCCATC
+CATCCATCCATCCAGCAATGTGACTTTTTCATGTCTTTCCTAACACAAAA
+GGTCTATGTGTGTGGTTAGTCCATGAACTCATGGCATTTTGAATACATCC
+AGTACTTTAAAAATGACATATATATTTAAAAAAAAAAGATTAAGAAAACC
+CACAAGTTGGAGGGAGGGGGACTTAAAAAGCACATTACAATGTATCTTTT
+CACAAATGAATTTAGCAGTTGTCCTTGGTGAGATGGGATATTGGCGATTT
+ATGCCTTGTAGCCTTTCCCTTGTGGTGCATCTGTGGTTTGGTAGAAGTAC
+AACAGCAACCTGTCCTTTCTGTGCATGTTCTGGTCGCATGTATAATGCAA
+TAAACTCTGGAAATGAGTTCACTCCCTCTGCTTTCTGAAATGGAAATATG
+TTATGGTGGAAATGAAAGCCTATGGTGAGATTATCTTCTGGTTACACTCC
+CTGTTTGGGGCATTTGGGCAGGGGAGTGATAGACTAGTAGGGGAAGGGAG
+ATGGGGGAGAAAAGCTGGAGGAGGCCTAGGGTGTTGGATTTTGGCAGTGG
+TTGGGGGAGAGGAATTATAAGCTAGCTTGAGAGTGAAGTTTTCATAATTG
+GGAGGAAGGGGAGTCTCCTCTTTCCTTTCCCAGTCCCCAGTGATAGTAAC
+ATAATTGCGCTCTCAATGGGTGTGAGCTTTCCTCTGGCCTGAACCTGGTA
+AGTAAGCCTATACCCCAAGCCACTTTCTCCTCAAAGCTTCCCATTTGTGT
+GTTTTCTCCTCTTTGGTTTTGGTTGTGTTGTTTTTAATGCTTTCAGTGGC
+ATCTTGGTGATTTCTGGCTGGCGAGCAATCATCAGGGGCTAGGTTGAAGC
+TAGTCTTGCCCACCTGGAAGTTGCCGGCCTCCATTACAGGAGCAAGGACA
+AACAGCAGTGTAGCACTGCAGCGGATCCAATTCTGCCCCCTTTCCCTCAG
+CCCTACCCCCATCCCAAGCGCAAGACAGCCAGACCCCAGAGAAGCCGAGG
+ATGGGTGAGTTTTCCCATCCCACTTCGCCTTGATCTCCTTGTGGACGGGT
+TTTATGCTCAGTCATTACCTTTTAGTGGCCCACATGAAATTTTGTTAAAG
+GAAGAAATGAAAAGATTTTCCCCAGTCAGTCTTTCCTCTATTTAATTACA
+AAATGCTGGTGGGAACTGCTGCATCTGGGATGCAAGAAAATGCAGAAAGG
+GTGACTGAAAATTTTGCAAATGAACATGACTTCCCATGAAGTCTAATGTT
+CCATTCGCTGCCATGGTCCAGGGGACTCCCACCAGCTTCCACCGGCTTCA
+ACAGGATCTCCACTAGAGAGCCCAGACTTATCTAGTCCTGTCGGGGAAAA
+GGGAGAAGAGGCCTTGCAGGAGAAAGCTAACAGAAAATTCGTTACCTGAG
+GTCCTGCCTGCAGTTTCAAATAGCTTCCAGCAGTTTTACAAAACACATCC
+TTTCCATTTCTTCCTTTTAAATGTTTCCCTAAGAACGATCCATTTAGGTG
+CTATAAGTCCTCAGCCAGGGAGTCTCTGGGACACTGGCATTCAAAATTTT
+AAACTTCCGCCCCAAAACCAGGAACATTCCAAGACAGAACTCTTTTAGGG
+GGCCATTTCCTGGGGGTGGGGGAGAGGGCTTGGAATCAATGCTAGATTGA
+AAACGTTGTAATAGCTTTGCCCCAGACTTAACACCGGTTGGGCAGGAGGA
+GGGTAATTTTTATTTAGCCGTTTCTCCGATCATGTGGGGAATACCATTAG
+CTGTTGATAGCGGGCCATGTATCCGAGGAAAGCCTGAGCTACAAGGCAAA
+GGCATCCCATCTGGAACAAAATCAGAAAGCTATTGGCAAAGGTAATCAAT
+CAGGCCATAAATAGCCATTTACCCGCTTCCTTTTCGGGGCTGGAGGTGGG
+CCGGGAGCCCTCCAAGGGTGAGCTGGGCAACTTGTAGAGCAAGGAATATG
+CCCTCCGCTGCCGGCGCCCCGGCCGCTTTTGTCTGGGCTCCCAGCCGGGC
+TTCCGAGGCTTTGTACCATGGATTTGGGAGTGACAATGGGCATTTCCCTC
+AGATTCAAGGCTGCTCAACCTCACCTCTGTAGGGGGAAAAAAATCAGAAG
+GGAGTGTCCCAAGGACCTAGCCATTCGGCCGAATTTTTTAGACATTTTGG
+GAGTCTCCTCCGAGGCCTTTAAGTGCGAACCGCGCGAAGCGGCCCTGCCC
+GGGGAGACTCGCTGAGGCAGGGCTGAGGCGGCGGGCGGGAGCAAGCTGCT
+CTAGCATTTGGGTTCTGCCCTGTGGCGTGTTCTCTTCCAGGGCCTTTCCA
+GCATCATCGGAGAAGACGAAGCACCCTGGCCGCCACTGTCCGTGCTGCGC
+CAACTCGCCCGGCCGCCCGCCCTTCCGAGGGCAGGCAGAAGCCCCTCTGT
+GTCCTCCACCGCCGCGCCCCGGCTCGCCCCTCGGGCCGCGGCGTGTGCCC
+AGCCTCACGTCGGGGTGTGTGTGGCCGCGCGGGCGTGTGTGAGTGTGGCA
+GGGGGAGGGGGCCCTCCGATCTGCTCCATCCGTCCGTTTTATTAGGGACA
+CATTAATCTATAATCAAATACACCTCATAAAATTTTTATTGAAAGGCATA
+ATATCATTACAGAGGTCTTCCACCTGTTTTAAACAACACGACAAGCTGTG
+AGCAAGCGTGTGTGTGGGGATGTGTGGGGAGGGGTGGGTGTGAGTAGGGA
+GAGAGGCGAGGGGAGAACAGCTCCCCTCGGGCGCTAGGGGCCGCCCCGAG
+GGCCCGCCTGCCTCGGGCGACACCGGCCTGGCGCCCCCGCGGCCGCTCCG
+TGTGCCCTGGACTCGCCGCCCGCGGCTCGGAAGCTGGAGAGTCAGCGACG
+GGGCCCGACTGCGGGACCGAGGGCTGCAAGAAGAAGCGAACAAATAGTCC
+CCAGCGCCTCCTCTGGATGCGGTCGCGTCTGTGGTCCTGGCAGCCGCTGG
+GCGGGCCAGGCCAGGTCGGGCCGGGCCGAGCCGGGCACATGGACCTGGGC
+CTGCGGGCTCTAATTGCGGCGCTTATGTTGATGATTTTTTTTTTAATCAC
+AGCAGCCCCCAGTTTAGCGGACTGATTTACTCCCGGTATTGGTAAATATG
+ATCACGTGGGCCGCGCGACCAATGGTGGAGGCTGCAGCCTGCGAACTAGT
+CGGTGGCTCGGGCGCCGGCGGGGAGCTGCTCGGCGGCGGACAGTGTAATG
+TTGGGTGGGAGTGCGGGACGCCTCAAAATGTCTTCCAGTGGCACCCTCAG
+CAACTACTACGTGGACTCGCTTATAGGCCATGAGGGCGACGAGGTGTTCG
+CGGCGCGCTTCGGGCCGCCGGGGCCAGGCGCGCAGGGCCGGCCTGCAGGT
+GTGGCTGATGGCCCGGCCGCCACCGCCGCCGAGTTCGCCTCGTGTAGTTT
+TGCCCCCAGATCGGCCGTGTTCTCTGCCTCGTGGTCCGCGGTGCCCTCCC
+AGCCCCCGGCAGCGGCGGCGATGAGCGGCCTCTACCACCCGTACGTTCCC
+CCGCCGCCCCTGGCCGCCTCTGCCTCCGAGCCCGGCCGCTACGTGCGCTC
+CTGGATGGAGCCGCTGCCCGGCTTCCCGGGCGGTGCGGGCGGTGGCGGTG
+GTGGTGGAGGCGGCGGTCCGGGCCGCGGTCCCAGCCCTGGCCCCAGCGGC
+CCAGCCAACGGGCGCCACTACGGGATTAAGCCTGAAACCCGAGCGGCCCC
+GGCCCCCGCCACGGCCGCCTCCACCACCTCCTCCTCCTCCACTTCCTTAT
+CCTCCTCCTCCAAACGGACTGAGTGCTCCGTGGCCCGGGAGTCCCAGGGG
+AGCAGCGGCCCCGAGTTCTCGTGCAACTCGTTCCTGCAGGAGAAGGCGGC
+AGCGGCGACGGGGGGAACCGGGCCTGGGGCAGGGATCGGGGCCGCGACTG
+GGACGGGCGGCTCGTCGGAGCCCTCAGCTTGCAGCGACCACCCGATCCCA
+GGCTGTTCGCTGAAGGAGGAGGAGAAGCAGCATTCGCAGCCGCAGCAGCA
+GCAACTTGACCCAAGTAAGTGCAAAAGAAATTGCCCCCTGATTTATTGCT
+GAAACCTGTAAGGCTCGAATGTGCAAAACTGATAGTTTTACTAACCTATA
+AAAACGTCTAGACGCCTACCCAAGCCTAGGCGAACAACATGCATCCATAA
+AAAGAGCTTCCCATAACCACCTACCCTGGGCGCTCAGTTAGTACGGTAAA
+CAGAGCGCGAGCATTAAGGCTTTTTATGATAATTCCCCACAAGTTGTGAA
+AAGCGACCATCCTTGGTGAAATTAATTTAACGACCTCTCTTCCCCACCCT
+GTGGTCTCTCCCTGCCTCCCCTCCTCTCCTCTCTCCCCGTCTCCAAACCT
+CCCTCTTTGTAGACAACCCCGCCGCGAACTGGATCCACGCTCGCTCCACC
+CGGAAAAAGCGCTGTCCCTACACCAAATACCAGACGCTTGAGCTGGAGAA
+AGAATTCCTCTTCAACATGTACCTCACCCGGGACCGGCGCTACGAGGTGG
+CCAGGATTCTCAACCTAACAGAGAGACAGGTCAAAATCTGGTTTCAGAAC
+CGTAGGATGAAAATGAAAAAGATGAGCAAGGAGAAATGCCCCAAAGGAGA
+CTGACCCGGCGCGGTGCTGGCGGGAGCGCTCAAGGGCAGCGGATTTGTTG
+TTGTTGCTGTTTTCCTTTGTGGGTGTTTGGTGCTTGATTTCCAGAAACTC
+TCCAGCGACTTGGACTTCTTCTTCTTTTTTTTTTTCTTTTTAGATAGAAG
+TGACTGTGTGGTTGGTCTCTGAGGTATTTGGGGGACTCTGTATTTGCTCG
+TTTACGTGTTGGAAAAACCAAGTGGCTTTGGGGTTTCGCCCTATCCCACT
+CCCTCTCTTTCCTGCTCCATTGGTTCCTTAAGAAATGCTATATTTTGTGA
+GTGCAAGCTGGCTTGGGGAGCCCTCTCTTGTGTAAATGTCCCCCATGTTT
+CTGAAAAGTGCTGTAGTTTAGTCCCCTCACCCCCAGCACTGCCCAAACAG
+GGGCCAAGTGCGCCCCAATTCCAAGAATGAAGGCAGAGCGACAACAGTGC
+GGACACCCCGGCTGCTAGCCCACGGTGAAGCCCGGCGGGGTTGCCCACCA
+GTTGCGAAAGCCCCCTTTCCTCAGGGAGCACGCGGGACCTCGGTGGAGAT
+CTCCAGTGAGGCTTAGAGGAGCCCAGGGCCTCGGGCGGGTTGGGGTTTGT
+CCTCAGTGCATTGGACGCGCTGCTCTCTCCCCTGAAGGCTGGGCTCGCGT
+GGGCGGCCGCGGGTGGTGGCCCTCCCGGTTCCTGCCCGAGGACCAGTTGT
+AAATGTTACTGCTTCCTACTAATAAATGCTGACCTGATCAAATGGAGCCC
+AGACGCTGGCCCTAAACATTGTGTGCCTGCTTTCTCTGCCTCTCTGCAAA
+ATATCACACTCAGGATATTTCTCCTCTACCCCTGGGAGTGAGACATTGTT
+AAAAATTCAGGGCCCTTCCACCTGACAGATCTCTCTGATGTGTCTCTGCC
+TTCTCTGCCTCACATCCCTTTGTGTAGGCAGATGCAGCAGCA
--- a/test-data/in.gtf	Sun Jan 28 04:04:58 2018 -0500
+++ b/test-data/in.gtf	Fri Mar 11 14:08:11 2022 +0000
@@ -1,6 +1,20 @@
-1	ensembl_havana	gene	1	103	.	+	.	gene_id "ENSG00000162526"; gene_version "4"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1";
-1	ensembl_havana	transcript	1	103	.	+	.	gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";
-1	ensembl_havana	exon	1	103	.	+	.	gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; exon_id "ENSE00002319515"; exon_version "1"; tag "basic"; transcript_support_level "NA";
-1	ensembl_havana	CDS	1	100	.	+	0	gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; protein_id "ENSP00000334393"; protein_version "3"; tag "basic"; transcript_support_level "NA";
-1	ensembl_havana	start_codon	1	3	.	+	0	gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";
-1	ensembl_havana	stop_codon	101	103	.	+	0	gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";
+fake_chr2	HAVANA	gene	257	3416	.	+	.	gene_id "ENSG00000128710.6"; gene_type "protein_coding"; gene_name "HOXD10"; level 1; hgnc_id "HGNC:5133"; tag "overlapping_locus"; havana_gene "OTTHUMG00000132511.5";
+fake_chr2	HAVANA	transcript	257	3416	.	+	.	gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3";
+fake_chr2	HAVANA	exon	257	1057	.	+	.	gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 1; exon_id "ENSE00000882912.6"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3";
+fake_chr2	HAVANA	CDS	313	1057	.	+	0	gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 1; exon_id "ENSE00000882912.6"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3";
+fake_chr2	HAVANA	start_codon	313	315	.	+	0	gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 1; exon_id "ENSE00000882912.6"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3";
+fake_chr2	HAVANA	exon	2433	3416	.	+	.	gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 2; exon_id "ENSE00001816717.3"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3";
+fake_chr2	HAVANA	CDS	2433	2707	.	+	2	gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 2; exon_id "ENSE00001816717.3"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3";
+fake_chr2	HAVANA	stop_codon	2708	2710	.	+	0	gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 2; exon_id "ENSE00001816717.3"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3";
+fake_chr2	HAVANA	UTR	257	312	.	+	.	gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 1; exon_id "ENSE00000882912.6"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3";
+fake_chr2	HAVANA	UTR	2708	3416	.	+	.	gene_id "ENSG00000128710.6"; transcript_id "ENST00000249501.5"; gene_type "protein_coding"; gene_name "HOXD10"; transcript_type "protein_coding"; transcript_name "HOXD10-201"; exon_number 2; exon_id "ENSE00001816717.3"; level 2; protein_id "ENSP00000249501.4"; transcript_support_level "1"; hgnc_id "HGNC:5133"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2266.1"; havana_gene "OTTHUMG00000132511.5"; havana_transcript "OTTHUMT00000255692.3";
+fake_chr2	HAVANA	gene	6198	8416	.	+	.	gene_id "ENSG00000128709.13"; gene_type "protein_coding"; gene_name "HOXD9"; level 2; hgnc_id "HGNC:5140"; havana_gene "OTTHUMG00000132516.6";
+fake_chr2	HAVANA	transcript	6198	8416	.	+	.	gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";
+fake_chr2	HAVANA	exon	6198	7064	.	+	.	gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 1; exon_id "ENSE00000882914.5"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";
+fake_chr2	HAVANA	CDS	6248	7064	.	+	0	gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 1; exon_id "ENSE00000882914.5"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";
+fake_chr2	HAVANA	start_codon	6248	6250	.	+	0	gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 1; exon_id "ENSE00000882914.5"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";
+fake_chr2	HAVANA	exon	7413	8416	.	+	.	gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";
+fake_chr2	HAVANA	CDS	7413	7651	.	+	2	gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";
+fake_chr2	HAVANA	stop_codon	7652	7654	.	+	0	gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";
+fake_chr2	HAVANA	UTR	6198	6247	.	+	.	gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 1; exon_id "ENSE00000882914.5"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";
+fake_chr2	HAVANA	UTR	7652	8416	.	+	.	gene_id "ENSG00000128709.13"; transcript_id "ENST00000249499.8"; gene_type "protein_coding"; gene_name "HOXD9"; transcript_type "protein_coding"; transcript_name "HOXD9-201"; exon_number 2; exon_id "ENSE00001845001.4"; level 2; protein_id "ENSP00000249499.6"; transcript_support_level "1"; hgnc_id "HGNC:5140"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS2267.2"; havana_gene "OTTHUMG00000132516.6"; havana_transcript "OTTHUMT00000255698.6";
--- a/test-data/length.tab	Sun Jan 28 04:04:58 2018 -0500
+++ b/test-data/length.tab	Fri Mar 11 14:08:11 2022 +0000
@@ -1,1 +1,2 @@
-ENSG00000162526	103
+ENSG00000128709.13	1871
+ENSG00000128710.6	1785