changeset 0:928a52b5c938 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/brew3r_r commit 3e3c47b732510a9ef0b2864b284aa14308e75ab0
author iuc
date Tue, 11 Jun 2024 08:26:37 +0000
parents
children 3198f52bffaa
files brew3r.r_script.R brew3r_r.xml test-data/generate_test.R test-data/input.gtf test-data/output.gtf test-data/second_input.gtf
diffstat 6 files changed, 624 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/brew3r.r_script.R	Tue Jun 11 08:26:37 2024 +0000
@@ -0,0 +1,122 @@
+library("getopt")
+suppressPackageStartupMessages(library("rtracklayer"))
+library(GenomicRanges)
+library("BREW3R.r")
+
+options(stringAsFactors = FALSE, useFancyQuotes = FALSE)
+args <- commandArgs(trailingOnly = TRUE)
+# - Column 1: the long flag name. A multi-character string.
+# - Column 2: short flag alias of Column 1. A single-character string.
+# - Column 3: Argument mask of the flag. An integer.
+# Possible values: 0=no argument, 1=required argument, 2=optional argument.
+# - Column 4: Data type to which the flag's argument shall be cast using
+# storage.mode(). A multi-character string. This only considered for same-row
+# Column 3 values of 1,2. Possible values: logical, integer, double, complex,
+# character. If numeric is encountered then it will be converted to double.
+# - Column 5 (optional): A brief description of the purpose of the option.
+spec <- matrix(c(
+    "help", "h", 0, "logical", "display help",
+    "gtf_to_extend", "i", 1, "character", "input gtf file to be extended on 3'",
+    "gtf_to_overlap", "g", 1, "character",
+    "input gtf file that will be used to extend",
+    "output", "o", 1, "character", "output extended gtf",
+    "sup_output", "s", 1, "character",
+    "supplementary output file with resolution of overlaps",
+    "no_add", "n", 0, "logical", "do not add new exons",
+    "exclude_pattern", "e", 1, "character", "do not extend genes with names matching this pattern",
+    "filter_unstranded", "f", 0, "logical",
+    "remove unstranded intervals from gtf_to_overlap which overlap intervals from gtf_to_extend of both strands",
+    "quiet", "q", 0, "logical", "decrease verbosity",
+    "verbose", "v", 0, "logical", "increase verbosity"
+), byrow = TRUE, ncol = 5)
+opt <- getopt(spec)
+
+# if help was asked for print a friendly message
+# and exit with a non-zero error code
+if (!is.null(opt$help)) {
+    cat(getopt(spec, usage = TRUE))
+    q(status = 1)
+}
+
+# Check all required arguments
+if (is.null(opt$gtf_to_extend)) {
+    stop("--gtf_to_extend is required")
+}
+if (is.null(opt$gtf_to_overlap)) {
+    stop("--gtf_to_overlap is required")
+}
+if (is.null(opt$output)) {
+    stop("--output is required")
+}
+
+# Check incompatible arguments
+if (!is.null(opt$quiet) && !is.null(opt$verbose)) {
+    stop("quiet and verbose are mutually exclusive options")
+}
+
+# Adjust verbosity
+if (!is.null(opt$quiet)) {
+    options(rlib_message_verbosity = "quiet")
+}
+
+if (!is.null(opt$verbose)) {
+    options(BREW3R.r.verbose = "progression")
+}
+
+# Load gtfs as GenomicRanges
+input_gr_to_extend <- rtracklayer::import(opt$gtf_to_extend, format = "gtf")
+input_gr_template <- rtracklayer::import(opt$gtf_to_overlap, format = "gtf")
+
+# Save CDS info
+input_gr_CDS <- subset(input_gr_to_extend, type == "CDS")
+
+# Filter the template if needed
+if (!is.null(opt$filter_unstranded)) {
+    # Find intervals without strand information in template
+    unstranded.intervals <- which(strand(input_gr_template) == "*")
+    if (length(unstranded.intervals) > 0) {
+        # Check if they overlap genes from input with different strands
+        # First compute the overlap
+        ov <- suppressWarnings(
+            as.data.frame(findOverlaps(
+                input_gr_template[unstranded.intervals],
+                input_gr_to_extend
+            ))
+        )
+        # Add the strand information
+        ov$strand <- as.factor(strand(input_gr_to_extend))[ov$subjectHits]
+        # Simplify the dataframe to get only the strand info
+        ov.simple <- unique(ov[, c("queryHits", "strand")])
+        # If the queryHits is duplicated it means there are different strands
+        multi.strand.query <- ov.simple$queryHits[duplicated(ov.simple$queryHits)]
+        to.remove <- unstranded.intervals[multi.strand.query]
+        # Remove these potentially error-prone intervals from the template
+        input_gr_template <- input_gr_template[-to.remove]
+    }
+}
+
+# Run BREW3R.r main function
+new_gr_exons <- extend_granges(
+    input_gr_to_extend = input_gr_to_extend,
+    input_gr_to_overlap = input_gr_template,
+    add_new_exons = is.null(opt$no_add),
+    overlap_resolution_fn = opt$sup_output
+)
+# Prevent extension using pattern
+if (!is.null(opt$exclude_pattern)) {
+    input_gr_pattern <- subset(
+        input_gr_to_extend,
+        type == "exon" & grepl(opt$exclude_pattern, gene_name)
+    )
+    new_gr_no_pattern <- subset(
+        new_gr_exons,
+        !grepl(opt$exclude_pattern, gene_name)
+    )
+    new_gr_exons <- c(new_gr_no_pattern, input_gr_pattern)
+}
+
+# Recompose with CDS
+new_gr <- c(new_gr_exons, input_gr_CDS)
+
+# Export
+rtracklayer::export.gff(sort(new_gr, ignore.strand = TRUE), opt$output)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/brew3r_r.xml	Tue Jun 11 08:26:37 2024 +0000
@@ -0,0 +1,152 @@
+<tool id="brew3r_r" name="BREW3R.r" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT">
+    <description>Extend GTF</description>
+    <macros>
+        <token name="@TOOL_VERSION@">1.0.1</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
+    <edam_topics>
+        <edam_topic>topic_3308</edam_topic>
+    </edam_topics>
+    <edam_operations>
+        <edam_operation>operation_0362</edam_operation>
+    </edam_operations>
+    <xrefs>
+        <!-- <xref type="bio.tools">BREW3R.r</xref> -->
+        <xref type="bioconductor">BREW3R.r</xref>
+    </xrefs>
+    <requirements>
+        <!-- <requirement type="package" version="@TOOL_VERSION@">bioconductor-brew3r.r</requirement>
+        <requirement type="package" version="1.64.0">bioconductor-rtracklayer</requirement>
+        <requirement type="package" version="1.20.4">r-getopt</requirement> -->
+        <container type="docker">lldelisle/brew3r:v2</container>
+    </requirements>
+    <required_files>
+        <include path="brew3r.r_script.R" />
+    </required_files>
+    <version_command><![CDATA[
+echo $(R --version | grep version | grep -v GNU)", BREW3R.r version" $(R --vanilla --slave -e "library(BREW3R.r); cat(sessionInfo()\$otherPkgs\$BREW3R.r\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
+    ]]></version_command>
+    <command detect_errors="exit_code"><![CDATA[
+Rscript '${__tool_directory__}/brew3r.r_script.R'
+    --gtf_to_extend '$gtf_to_extend'
+    --gtf_to_overlap '$gtf_to_overlap'
+    #if '$sup_output' == 'true':
+        --sup_output '$output_table'
+    #end if
+    #if str($no_add) != '':
+        '$no_add'
+    #end if
+    #if str($exclude_pattern) != '':
+        --exclude_pattern '$exclude_pattern'
+    #end if
+    #if str($filter_unstranded) != '':
+        '$filter_unstranded'
+    #end if
+    -o output.gtf
+
+    ]]></command>
+    <inputs>
+        <param argument="--gtf_to_extend" type="data" format="gtf" label="Input gtf file to be extended on 3'" help="Usually coming from public resource." />
+        <param argument="--gtf_to_overlap" type="data" format="gtf" label="Input gtf file that will be used to extend" help="Coming from StringTie or another public resource." />
+        <param argument="--sup_output" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Get a supplementary output table with resolution of overlaps" />
+        <param argument="--no_add" type="boolean" truevalue="--no_add" falsevalue="" checked="false" label="Do not add new exons" />
+        <param argument="--exclude_pattern" type="text" value="" label="Do not extend genes with names matching this pattern" help="Leave empty if you want to extend all genes.">
+            <sanitizer>
+              <valid initial="string.printable">
+               <remove value="'"/>
+               <remove value="\"/>
+             </valid>
+            </sanitizer>
+        </param>
+        <param argument="--filter_unstranded" type="boolean" truevalue="--filter_unstranded" falsevalue="" checked="false" label="Filter unstranded intervals that overlaps genes of both strands" help="Recommanded if you used StringTie on unstranded libraries." />
+    </inputs>
+    <outputs>
+        <data name="output" format="gtf" from_work_dir="output.gtf" label="${tool.name} on ${gtf_to_extend.name} and ${gtf_to_overlap.name}: GTF" />
+        <data name="output_table" format="tabular" label="${tool.name} on ${gtf_to_extend.name} and ${gtf_to_overlap.name}: overlap resolution">
+            <filter>sup_output == True</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="gtf_to_extend" value="input.gtf"/>
+            <param name="gtf_to_overlap" value="second_input.gtf"/>
+            <output name="output" value="output.gtf" compare="diff" lines_diff="2"/>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="gtf_to_extend" value="input.gtf"/>
+            <param name="gtf_to_overlap" value="second_input.gtf"/>
+            <param name="no_add" value="true"/>
+            <output name="output">
+                <assert_contents>
+                    <has_n_lines n="31"/>
+                    <not_has_text text="BREW3R"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="gtf_to_extend" value="input.gtf"/>
+            <param name="gtf_to_overlap" value="second_input.gtf"/>
+            <param name="exclude_pattern" value="^Gm"/>
+            <output name="output">
+                <assert_contents>
+                    <has_n_lines n="34"/>
+                    <not_has_text text="exon111.ext"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="gtf_to_extend" value="input.gtf"/>
+            <param name="gtf_to_overlap" value="second_input.gtf"/>
+            <param name="exclude_pattern" value="Gm$"/>
+            <output name="output" value="output.gtf" compare="diff" lines_diff="2"/>
+            <assert_command>
+                <has_text text="--exclude_pattern 'Gm$'"/>
+            </assert_command>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="gtf_to_extend" value="input.gtf"/>
+            <param name="gtf_to_overlap" value="second_input.gtf"/>
+            <param name="filter_unstranded" value="true"/>
+            <output name="output">
+                <assert_contents>
+                    <has_n_lines n="36"/>
+                    <not_has_text text="exon121.ext"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+.. class:: infomark
+
+**What it does**
+
+This tool extend the annotations existing in an input GTF file in the 3' end using annotations from another input GTF. During the process, it makes sure that there will not be new overlaps between different genes.
+
+Usage
+.....
+
+
+**Input**
+
+2 GTF files:
+- First one to extend usually comes from a public resource.
+- Second one that is used as template may come from a public resource or from StringTie.
+
+
+**Output**
+
+1 GTF file with all exons from the input GTF where some of them have been extended (the exon_id ends with '.ext') and potentially new exons (the exon_id contains BREW3R).
+
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+            @unpublished{None,
+            author = {Lucille Lopez-Delisle},
+            title = {None},
+            year = {None},
+            eprint = {None},
+            url = {https://github.com/lldelisle/BREW3R.r}
+        }</citation>
+    </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/generate_test.R	Tue Jun 11 08:26:37 2024 +0000
@@ -0,0 +1,262 @@
+library(GenomicRanges)
+input_to_overlap_case1_2_3_4_6_7_8 <- GenomicRanges::GRanges(
+    seqnames = "chr1",
+    ranges = IRanges::IRanges(
+        start = 3,
+        end = 25
+    ),
+    strand = "+",
+    gene_id = "geneA",
+    transcript_id = "transcriptA",
+    type = "exon",
+    exon_id = "exonA"
+)
+big_gr <- NULL
+for (i in c(1:5, 7:10)) {
+    temp.gr <- input_to_overlap_case1_2_3_4_6_7_8
+    temp.gr <- shift(temp.gr, 100 * (i - 1))
+    temp.gr$gene_id <- paste0("gene", LETTERS[i])
+    temp.gr$transcript_id <- paste0("transcript", LETTERS[i])
+    temp.gr$exon_id <- paste0("exon", LETTERS[i])
+    temp.gr$exon_number <- 1
+    big_gr <- c(big_gr, temp.gr)
+}
+input_to_overlap_case5_9 <- GenomicRanges::GRanges(
+    seqnames = "chr1",
+    ranges = IRanges::IRanges(
+        start = c(1, 33, 45, 72),
+        end = c(25, 40, 60, 75)
+    ),
+    strand = "+",
+    gene_id = "geneA",
+    transcript_id = "transcriptA",
+    type = "exon",
+    exon_id = c("exonA", "exonB", "exonC", "exonD")
+)
+for (i in c(6, 11)) {
+    temp.gr <- input_to_overlap_case5_9
+    temp.gr <- shift(temp.gr, 100 * (i - 1))
+    temp.gr$gene_id <- paste0("gene", LETTERS[i])
+    temp.gr$transcript_id <- paste0("transcript", LETTERS[i])
+    temp.gr$exon_id <- paste0("exon", LETTERS[i], letters[1:4])
+    temp.gr$exon_number <- 1:4
+    big_gr <- c(big_gr, temp.gr)
+}
+big_gr <- unlist(as(big_gr, "GRangesList"))
+
+
+
+input_gr <- c(
+    # 1
+    GenomicRanges::GRanges(
+        seqnames = "chr1",
+        ranges = IRanges::IRanges(
+            start = c(5, 20),
+            end = c(10, 30)
+        ),
+        strand = "+",
+        gene_id = c("gene11", "gene12"),
+        transcript_id = c("transcript11", "transcript12"),
+        type = "exon",
+        exon_id = c("exon11", "exon12")
+    ),
+    # 2
+    shift(
+        GenomicRanges::GRanges(
+            seqnames = "chr1",
+            ranges = IRanges::IRanges(
+                start = c(5, 20),
+                end = c(10, 25)
+            ),
+            strand = "+",
+            gene_id = c("gene21", "gene22"),
+            transcript_id = c("transcript21", "transcript22"),
+            type = "exon",
+            exon_id = c("exon21", "exon22")
+        ),
+        100
+    ),
+    # 3_5
+    shift(
+        GenomicRanges::GRanges(
+            seqnames = "chr1",
+            ranges = IRanges::IRanges(
+                start = c(5, 20),
+                end = c(10, 22)
+            ),
+            strand = "+",
+            gene_id = c("gene31", "gene32"),
+            transcript_id = c("transcript31", "transcript32"),
+            type = "exon",
+            exon_id = c("exon31", "exon32")
+        ),
+        200
+    ),
+    # 4
+    shift(
+        GenomicRanges::GRanges(
+            seqnames = "chr1",
+            ranges = IRanges::IRanges(
+                start = c(5, 5),
+                end = c(10, 22)
+            ),
+            strand = "+",
+            gene_id = c("gene41", "gene42"),
+            transcript_id = c("transcript41", "transcript42"),
+            type = "exon",
+            exon_id = c("exon41", "exon42")
+        ),
+        300
+    ),
+    # 4bis
+    shift(
+        GenomicRanges::GRanges(
+            seqnames = "chr1",
+            ranges = IRanges::IRanges(
+                start = c(5, 5),
+                end = c(10, 25)
+            ),
+            strand = "+",
+            gene_id = c("gene51", "gene52"),
+            transcript_id = c("transcript51", "transcript52"),
+            type = "exon",
+            exon_id = c("exon51", "exon52")
+        ),
+        400
+    ),
+    # 3_5
+    shift(
+        GenomicRanges::GRanges(
+            seqnames = "chr1",
+            ranges = IRanges::IRanges(
+                start = c(5, 20),
+                end = c(10, 22)
+            ),
+            strand = "+",
+            gene_id = c("gene61", "gene62"),
+            transcript_id = c("transcript61", "transcript62"),
+            type = "exon",
+            exon_id = c("exon61", "exon62")
+        ),
+        500
+    ),
+    # 6
+    shift(
+        GenomicRanges::GRanges(
+            seqnames = "chr1",
+            ranges = IRanges::IRanges(
+                start = c(1, 1, 30),
+                end = c(10, 10, 40)
+            ),
+            strand = "+",
+            gene_id = "gene71",
+            transcript_id = c("transcript71", "transcript72", "transcript72"),
+            type = "exon",
+            exon_id = c("exon71", "exon71", "exon72")
+        ),
+        600
+    ),
+    # 6bis
+    shift(
+        GenomicRanges::GRanges(
+            seqnames = "chr1",
+            ranges = IRanges::IRanges(
+                start = c(1, 1, 30),
+                end = c(10, 10, 40)
+            ),
+            strand = "+",
+            gene_id = c("gene81", "gene82", "gene82"),
+            transcript_id = c("transcript81", "transcript82", "transcript82"),
+            type = "exon",
+            exon_id = c("exon81", "exon82", "exon83")
+        ),
+        700
+    ),
+    # 7
+    shift(
+        GenomicRanges::GRanges(
+            seqnames = "chr1",
+            ranges = IRanges::IRanges(
+                start = c(1, 1, 30),
+                end = c(8, 10, 40)
+            ),
+            strand = "+",
+            gene_id = "gene1",
+            transcript_id = c("transcript91", "transcript92", "transcript92"),
+            type = "exon",
+            exon_id = c("exon91", "exon92", "exon93")
+        ),
+        800
+    ),
+    # 8
+    shift(
+        GenomicRanges::GRanges(
+            seqnames = "chr1",
+            ranges = IRanges::IRanges(
+                start = c(1, 1, 30),
+                end = c(8, 10, 40)
+            ),
+            strand = "+",
+            gene_id = c("gene101", "gene102", "gene102"),
+            transcript_id = c("transcript101", "transcript102", "transcript102"),
+            type = "exon",
+            exon_id = c("exon101", "exon102", "exon103")
+        ),
+        900
+    ),
+    # 9
+    shift(
+        GenomicRanges::GRanges(
+            seqnames = "chr1",
+            ranges = IRanges::IRanges(
+                start = c(5, 55),
+                end = c(10, 70)
+            ),
+            strand = "+",
+            gene_id = c("gene111", "gene112"),
+            transcript_id = c("transcript111", "transcript112"),
+            type = "exon",
+            exon_id = c("exon111", "exon112")
+        ),
+        1000
+    )
+)
+## Add convergent genes overlapping a unstranded
+input_gr <- c(
+    input_gr,
+    GenomicRanges::GRanges(
+        seqnames = "chr1",
+        ranges = IRanges::IRanges(
+            start = c(1100, 1110),
+            end = c(1105, 1120)
+        ),
+        strand = c("+", "-"),
+        gene_id = c("gene121", "gene122"),
+        transcript_id = c("transcript121", "transcript122"),
+        type = "exon",
+        exon_id = c("exon121", "exon122")
+    )
+)
+big_gr <- c(
+    big_gr,
+    GenomicRanges::GRanges(
+        seqnames = "chr1",
+        ranges = IRanges::IRanges(
+            start = 1103,
+            end = 1113
+        ),
+        strand = "*",
+        gene_id = "geneL",
+        transcript_id = "transcriptL",
+        type = "exon",
+        exon_id = "exonL"
+    )
+)
+input_gr$gene_name <- input_gr$gene_id
+input_gr$gene_name[input_gr$gene_id == "gene111"] <- "Gm001"
+library(BREW3R.r)
+new.gr <- extend_granges(input_gr, big_gr)
+library("rtracklayer")
+export.gff(input_gr, "input.gtf")
+export.gff(big_gr, "second_input.gtf")
+export.gff(sort(new.gr, ignore.strand = TRUE), "output.gtf")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.gtf	Tue Jun 11 08:26:37 2024 +0000
@@ -0,0 +1,31 @@
+##gff-version 2
+##source-version rtracklayer 1.64.0
+##date 2024-06-06
+chr1	rtracklayer	exon	5	10	.	+	.	gene_id "gene11"; transcript_id "transcript11"; exon_id "exon11"; gene_name "gene11"
+chr1	rtracklayer	exon	20	30	.	+	.	gene_id "gene12"; transcript_id "transcript12"; exon_id "exon12"; gene_name "gene12"
+chr1	rtracklayer	exon	105	110	.	+	.	gene_id "gene21"; transcript_id "transcript21"; exon_id "exon21"; gene_name "gene21"
+chr1	rtracklayer	exon	120	125	.	+	.	gene_id "gene22"; transcript_id "transcript22"; exon_id "exon22"; gene_name "gene22"
+chr1	rtracklayer	exon	205	210	.	+	.	gene_id "gene31"; transcript_id "transcript31"; exon_id "exon31"; gene_name "gene31"
+chr1	rtracklayer	exon	220	222	.	+	.	gene_id "gene32"; transcript_id "transcript32"; exon_id "exon32"; gene_name "gene32"
+chr1	rtracklayer	exon	305	310	.	+	.	gene_id "gene41"; transcript_id "transcript41"; exon_id "exon41"; gene_name "gene41"
+chr1	rtracklayer	exon	305	322	.	+	.	gene_id "gene42"; transcript_id "transcript42"; exon_id "exon42"; gene_name "gene42"
+chr1	rtracklayer	exon	405	410	.	+	.	gene_id "gene51"; transcript_id "transcript51"; exon_id "exon51"; gene_name "gene51"
+chr1	rtracklayer	exon	405	425	.	+	.	gene_id "gene52"; transcript_id "transcript52"; exon_id "exon52"; gene_name "gene52"
+chr1	rtracklayer	exon	505	510	.	+	.	gene_id "gene61"; transcript_id "transcript61"; exon_id "exon61"; gene_name "gene61"
+chr1	rtracklayer	exon	520	522	.	+	.	gene_id "gene62"; transcript_id "transcript62"; exon_id "exon62"; gene_name "gene62"
+chr1	rtracklayer	exon	601	610	.	+	.	gene_id "gene71"; transcript_id "transcript71"; exon_id "exon71"; gene_name "gene71"
+chr1	rtracklayer	exon	601	610	.	+	.	gene_id "gene71"; transcript_id "transcript72"; exon_id "exon71"; gene_name "gene71"
+chr1	rtracklayer	exon	630	640	.	+	.	gene_id "gene71"; transcript_id "transcript72"; exon_id "exon72"; gene_name "gene71"
+chr1	rtracklayer	exon	701	710	.	+	.	gene_id "gene81"; transcript_id "transcript81"; exon_id "exon81"; gene_name "gene81"
+chr1	rtracklayer	exon	701	710	.	+	.	gene_id "gene82"; transcript_id "transcript82"; exon_id "exon82"; gene_name "gene82"
+chr1	rtracklayer	exon	730	740	.	+	.	gene_id "gene82"; transcript_id "transcript82"; exon_id "exon83"; gene_name "gene82"
+chr1	rtracklayer	exon	801	808	.	+	.	gene_id "gene1"; transcript_id "transcript91"; exon_id "exon91"; gene_name "gene1"
+chr1	rtracklayer	exon	801	810	.	+	.	gene_id "gene1"; transcript_id "transcript92"; exon_id "exon92"; gene_name "gene1"
+chr1	rtracklayer	exon	830	840	.	+	.	gene_id "gene1"; transcript_id "transcript92"; exon_id "exon93"; gene_name "gene1"
+chr1	rtracklayer	exon	901	908	.	+	.	gene_id "gene101"; transcript_id "transcript101"; exon_id "exon101"; gene_name "gene101"
+chr1	rtracklayer	exon	901	910	.	+	.	gene_id "gene102"; transcript_id "transcript102"; exon_id "exon102"; gene_name "gene102"
+chr1	rtracklayer	exon	930	940	.	+	.	gene_id "gene102"; transcript_id "transcript102"; exon_id "exon103"; gene_name "gene102"
+chr1	rtracklayer	exon	1005	1010	.	+	.	gene_id "gene111"; transcript_id "transcript111"; exon_id "exon111"; gene_name "Gm001"
+chr1	rtracklayer	exon	1055	1070	.	+	.	gene_id "gene112"; transcript_id "transcript112"; exon_id "exon112"; gene_name "gene112"
+chr1	rtracklayer	exon	1100	1105	.	+	.	gene_id "gene121"; transcript_id "transcript121"; exon_id "exon121"; gene_name "gene121"
+chr1	rtracklayer	exon	1110	1120	.	-	.	gene_id "gene122"; transcript_id "transcript122"; exon_id "exon122"; gene_name "gene122"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.gtf	Tue Jun 11 08:26:37 2024 +0000
@@ -0,0 +1,36 @@
+##gff-version 2
+##source-version rtracklayer 1.64.0
+##date 2024-06-06
+chr1	rtracklayer	exon	5	19	.	+	.	gene_id "gene11"; gene_name "gene11"; transcript_id "transcript11"; exon_id "exon11.ext"; exon_number 1
+chr1	rtracklayer	exon	20	30	.	+	.	gene_id "gene12"; gene_name "gene12"; transcript_id "transcript12"; exon_id "exon12"; exon_number 1
+chr1	rtracklayer	exon	105	119	.	+	.	gene_id "gene21"; gene_name "gene21"; transcript_id "transcript21"; exon_id "exon21.ext"; exon_number 1
+chr1	rtracklayer	exon	120	125	.	+	.	gene_id "gene22"; gene_name "gene22"; transcript_id "transcript22"; exon_id "exon22"; exon_number 1
+chr1	rtracklayer	exon	205	219	.	+	.	gene_id "gene31"; gene_name "gene31"; transcript_id "transcript31"; exon_id "exon31.ext"; exon_number 1
+chr1	rtracklayer	exon	220	225	.	+	.	gene_id "gene32"; gene_name "gene32"; transcript_id "transcript32"; exon_id "exon32.ext"; exon_number 1
+chr1	rtracklayer	exon	305	310	.	+	.	gene_id "gene41"; gene_name "gene41"; transcript_id "transcript41"; exon_id "exon41"; exon_number 1
+chr1	rtracklayer	exon	305	325	.	+	.	gene_id "gene42"; gene_name "gene42"; transcript_id "transcript42"; exon_id "exon42.ext"; exon_number 1
+chr1	rtracklayer	exon	405	410	.	+	.	gene_id "gene51"; gene_name "gene51"; transcript_id "transcript51"; exon_id "exon51"; exon_number 1
+chr1	rtracklayer	exon	405	425	.	+	.	gene_id "gene52"; gene_name "gene52"; transcript_id "transcript52"; exon_id "exon52"; exon_number 1
+chr1	rtracklayer	exon	505	519	.	+	.	gene_id "gene61"; gene_name "gene61"; transcript_id "transcript61"; exon_id "exon61.ext"; exon_number 1
+chr1	rtracklayer	exon	520	525	.	+	.	gene_id "gene62"; gene_name "gene62"; transcript_id "transcript62"; exon_id "exon62.ext"; exon_number 1
+chr1	rtracklayer	exon	533	540	.	+	.	gene_id "gene62"; gene_name "gene62"; transcript_id "transcript62"; exon_id "BREW3R0000000003"; exon_number 2
+chr1	rtracklayer	exon	545	560	.	+	.	gene_id "gene62"; gene_name "gene62"; transcript_id "transcript62"; exon_id "BREW3R0000000004"; exon_number 3
+chr1	rtracklayer	exon	572	575	.	+	.	gene_id "gene62"; gene_name "gene62"; transcript_id "transcript62"; exon_id "BREW3R0000000005"; exon_number 4
+chr1	rtracklayer	exon	601	610	.	+	.	gene_id "gene71"; gene_name "gene71"; transcript_id "transcript72"; exon_id "exon71"; exon_number 1
+chr1	rtracklayer	exon	601	625	.	+	.	gene_id "gene71"; gene_name "gene71"; transcript_id "transcript71"; exon_id "exon71.ext"; exon_number 1
+chr1	rtracklayer	exon	630	640	.	+	.	gene_id "gene71"; gene_name "gene71"; transcript_id "transcript72"; exon_id "exon72"; exon_number 2
+chr1	rtracklayer	exon	701	710	.	+	.	gene_id "gene82"; gene_name "gene82"; transcript_id "transcript82"; exon_id "exon82"; exon_number 1
+chr1	rtracklayer	exon	701	725	.	+	.	gene_id "gene81"; gene_name "gene81"; transcript_id "transcript81"; exon_id "exon81.ext"; exon_number 1
+chr1	rtracklayer	exon	730	740	.	+	.	gene_id "gene82"; gene_name "gene82"; transcript_id "transcript82"; exon_id "exon83"; exon_number 2
+chr1	rtracklayer	exon	801	810	.	+	.	gene_id "gene1"; gene_name "gene1"; transcript_id "transcript92"; exon_id "exon92"; exon_number 1
+chr1	rtracklayer	exon	801	825	.	+	.	gene_id "gene1"; gene_name "gene1"; transcript_id "transcript91"; exon_id "exon91.ext"; exon_number 1
+chr1	rtracklayer	exon	830	840	.	+	.	gene_id "gene1"; gene_name "gene1"; transcript_id "transcript92"; exon_id "exon93"; exon_number 2
+chr1	rtracklayer	exon	901	908	.	+	.	gene_id "gene101"; gene_name "gene101"; transcript_id "transcript101"; exon_id "exon101"; exon_number 1
+chr1	rtracklayer	exon	901	910	.	+	.	gene_id "gene102"; gene_name "gene102"; transcript_id "transcript102"; exon_id "exon102"; exon_number 1
+chr1	rtracklayer	exon	930	940	.	+	.	gene_id "gene102"; gene_name "gene102"; transcript_id "transcript102"; exon_id "exon103"; exon_number 2
+chr1	rtracklayer	exon	1005	1025	.	+	.	gene_id "gene111"; gene_name "Gm001"; transcript_id "transcript111"; exon_id "exon111.ext"; exon_number 1
+chr1	rtracklayer	exon	1033	1040	.	+	.	gene_id "gene111"; gene_name "Gm001"; transcript_id "transcript111"; exon_id "BREW3R0000000001"; exon_number 2
+chr1	rtracklayer	exon	1045	1054	.	+	.	gene_id "gene111"; gene_name "Gm001"; transcript_id "transcript111"; exon_id "BREW3R0000000002"; exon_number 3
+chr1	rtracklayer	exon	1055	1070	.	+	.	gene_id "gene112"; gene_name "gene112"; transcript_id "transcript112"; exon_id "exon112"; exon_number 1
+chr1	rtracklayer	exon	1100	1113	.	+	.	gene_id "gene121"; gene_name "gene121"; transcript_id "transcript121"; exon_id "exon121.ext"; exon_number 1
+chr1	rtracklayer	exon	1103	1120	.	-	.	gene_id "gene122"; gene_name "gene122"; transcript_id "transcript122"; exon_id "exon122.ext"; exon_number 1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/second_input.gtf	Tue Jun 11 08:26:37 2024 +0000
@@ -0,0 +1,21 @@
+##gff-version 2
+##source-version rtracklayer 1.64.0
+##date 2024-06-06
+chr1	rtracklayer	exon	3	25	.	+	.	gene_id "geneA"; transcript_id "transcriptA"; exon_id "exonA"; exon_number 1
+chr1	rtracklayer	exon	103	125	.	+	.	gene_id "geneB"; transcript_id "transcriptB"; exon_id "exonB"; exon_number 1
+chr1	rtracklayer	exon	203	225	.	+	.	gene_id "geneC"; transcript_id "transcriptC"; exon_id "exonC"; exon_number 1
+chr1	rtracklayer	exon	303	325	.	+	.	gene_id "geneD"; transcript_id "transcriptD"; exon_id "exonD"; exon_number 1
+chr1	rtracklayer	exon	403	425	.	+	.	gene_id "geneE"; transcript_id "transcriptE"; exon_id "exonE"; exon_number 1
+chr1	rtracklayer	exon	603	625	.	+	.	gene_id "geneG"; transcript_id "transcriptG"; exon_id "exonG"; exon_number 1
+chr1	rtracklayer	exon	703	725	.	+	.	gene_id "geneH"; transcript_id "transcriptH"; exon_id "exonH"; exon_number 1
+chr1	rtracklayer	exon	803	825	.	+	.	gene_id "geneI"; transcript_id "transcriptI"; exon_id "exonI"; exon_number 1
+chr1	rtracklayer	exon	903	925	.	+	.	gene_id "geneJ"; transcript_id "transcriptJ"; exon_id "exonJ"; exon_number 1
+chr1	rtracklayer	exon	501	525	.	+	.	gene_id "geneF"; transcript_id "transcriptF"; exon_id "exonFa"; exon_number 1
+chr1	rtracklayer	exon	533	540	.	+	.	gene_id "geneF"; transcript_id "transcriptF"; exon_id "exonFb"; exon_number 2
+chr1	rtracklayer	exon	545	560	.	+	.	gene_id "geneF"; transcript_id "transcriptF"; exon_id "exonFc"; exon_number 3
+chr1	rtracklayer	exon	572	575	.	+	.	gene_id "geneF"; transcript_id "transcriptF"; exon_id "exonFd"; exon_number 4
+chr1	rtracklayer	exon	1001	1025	.	+	.	gene_id "geneK"; transcript_id "transcriptK"; exon_id "exonKa"; exon_number 1
+chr1	rtracklayer	exon	1033	1040	.	+	.	gene_id "geneK"; transcript_id "transcriptK"; exon_id "exonKb"; exon_number 2
+chr1	rtracklayer	exon	1045	1060	.	+	.	gene_id "geneK"; transcript_id "transcriptK"; exon_id "exonKc"; exon_number 3
+chr1	rtracklayer	exon	1072	1075	.	+	.	gene_id "geneK"; transcript_id "transcriptK"; exon_id "exonKd"; exon_number 4
+chr1	rtracklayer	exon	1103	1113	.	.	.	gene_id "geneL"; transcript_id "transcriptL"; exon_id "exonL";