Mercurial > repos > iuc > brew3r_r
changeset 0:928a52b5c938 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/brew3r_r commit 3e3c47b732510a9ef0b2864b284aa14308e75ab0
author | iuc |
---|---|
date | Tue, 11 Jun 2024 08:26:37 +0000 |
parents | |
children | 3198f52bffaa |
files | brew3r.r_script.R brew3r_r.xml test-data/generate_test.R test-data/input.gtf test-data/output.gtf test-data/second_input.gtf |
diffstat | 6 files changed, 624 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/brew3r.r_script.R Tue Jun 11 08:26:37 2024 +0000 @@ -0,0 +1,122 @@ +library("getopt") +suppressPackageStartupMessages(library("rtracklayer")) +library(GenomicRanges) +library("BREW3R.r") + +options(stringAsFactors = FALSE, useFancyQuotes = FALSE) +args <- commandArgs(trailingOnly = TRUE) +# - Column 1: the long flag name. A multi-character string. +# - Column 2: short flag alias of Column 1. A single-character string. +# - Column 3: Argument mask of the flag. An integer. +# Possible values: 0=no argument, 1=required argument, 2=optional argument. +# - Column 4: Data type to which the flag's argument shall be cast using +# storage.mode(). A multi-character string. This only considered for same-row +# Column 3 values of 1,2. Possible values: logical, integer, double, complex, +# character. If numeric is encountered then it will be converted to double. +# - Column 5 (optional): A brief description of the purpose of the option. +spec <- matrix(c( + "help", "h", 0, "logical", "display help", + "gtf_to_extend", "i", 1, "character", "input gtf file to be extended on 3'", + "gtf_to_overlap", "g", 1, "character", + "input gtf file that will be used to extend", + "output", "o", 1, "character", "output extended gtf", + "sup_output", "s", 1, "character", + "supplementary output file with resolution of overlaps", + "no_add", "n", 0, "logical", "do not add new exons", + "exclude_pattern", "e", 1, "character", "do not extend genes with names matching this pattern", + "filter_unstranded", "f", 0, "logical", + "remove unstranded intervals from gtf_to_overlap which overlap intervals from gtf_to_extend of both strands", + "quiet", "q", 0, "logical", "decrease verbosity", + "verbose", "v", 0, "logical", "increase verbosity" +), byrow = TRUE, ncol = 5) +opt <- getopt(spec) + +# if help was asked for print a friendly message +# and exit with a non-zero error code +if (!is.null(opt$help)) { + cat(getopt(spec, usage = TRUE)) + q(status = 1) +} + +# Check all required arguments +if (is.null(opt$gtf_to_extend)) { + stop("--gtf_to_extend is required") +} +if (is.null(opt$gtf_to_overlap)) { + stop("--gtf_to_overlap is required") +} +if (is.null(opt$output)) { + stop("--output is required") +} + +# Check incompatible arguments +if (!is.null(opt$quiet) && !is.null(opt$verbose)) { + stop("quiet and verbose are mutually exclusive options") +} + +# Adjust verbosity +if (!is.null(opt$quiet)) { + options(rlib_message_verbosity = "quiet") +} + +if (!is.null(opt$verbose)) { + options(BREW3R.r.verbose = "progression") +} + +# Load gtfs as GenomicRanges +input_gr_to_extend <- rtracklayer::import(opt$gtf_to_extend, format = "gtf") +input_gr_template <- rtracklayer::import(opt$gtf_to_overlap, format = "gtf") + +# Save CDS info +input_gr_CDS <- subset(input_gr_to_extend, type == "CDS") + +# Filter the template if needed +if (!is.null(opt$filter_unstranded)) { + # Find intervals without strand information in template + unstranded.intervals <- which(strand(input_gr_template) == "*") + if (length(unstranded.intervals) > 0) { + # Check if they overlap genes from input with different strands + # First compute the overlap + ov <- suppressWarnings( + as.data.frame(findOverlaps( + input_gr_template[unstranded.intervals], + input_gr_to_extend + )) + ) + # Add the strand information + ov$strand <- as.factor(strand(input_gr_to_extend))[ov$subjectHits] + # Simplify the dataframe to get only the strand info + ov.simple <- unique(ov[, c("queryHits", "strand")]) + # If the queryHits is duplicated it means there are different strands + multi.strand.query <- ov.simple$queryHits[duplicated(ov.simple$queryHits)] + to.remove <- unstranded.intervals[multi.strand.query] + # Remove these potentially error-prone intervals from the template + input_gr_template <- input_gr_template[-to.remove] + } +} + +# Run BREW3R.r main function +new_gr_exons <- extend_granges( + input_gr_to_extend = input_gr_to_extend, + input_gr_to_overlap = input_gr_template, + add_new_exons = is.null(opt$no_add), + overlap_resolution_fn = opt$sup_output +) +# Prevent extension using pattern +if (!is.null(opt$exclude_pattern)) { + input_gr_pattern <- subset( + input_gr_to_extend, + type == "exon" & grepl(opt$exclude_pattern, gene_name) + ) + new_gr_no_pattern <- subset( + new_gr_exons, + !grepl(opt$exclude_pattern, gene_name) + ) + new_gr_exons <- c(new_gr_no_pattern, input_gr_pattern) +} + +# Recompose with CDS +new_gr <- c(new_gr_exons, input_gr_CDS) + +# Export +rtracklayer::export.gff(sort(new_gr, ignore.strand = TRUE), opt$output)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/brew3r_r.xml Tue Jun 11 08:26:37 2024 +0000 @@ -0,0 +1,152 @@ +<tool id="brew3r_r" name="BREW3R.r" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT"> + <description>Extend GTF</description> + <macros> + <token name="@TOOL_VERSION@">1.0.1</token> + <token name="@VERSION_SUFFIX@">0</token> + </macros> + <edam_topics> + <edam_topic>topic_3308</edam_topic> + </edam_topics> + <edam_operations> + <edam_operation>operation_0362</edam_operation> + </edam_operations> + <xrefs> + <!-- <xref type="bio.tools">BREW3R.r</xref> --> + <xref type="bioconductor">BREW3R.r</xref> + </xrefs> + <requirements> + <!-- <requirement type="package" version="@TOOL_VERSION@">bioconductor-brew3r.r</requirement> + <requirement type="package" version="1.64.0">bioconductor-rtracklayer</requirement> + <requirement type="package" version="1.20.4">r-getopt</requirement> --> + <container type="docker">lldelisle/brew3r:v2</container> + </requirements> + <required_files> + <include path="brew3r.r_script.R" /> + </required_files> + <version_command><![CDATA[ +echo $(R --version | grep version | grep -v GNU)", BREW3R.r version" $(R --vanilla --slave -e "library(BREW3R.r); cat(sessionInfo()\$otherPkgs\$BREW3R.r\$Version)" 2> /dev/null | grep -v -i "WARNING: ") + ]]></version_command> + <command detect_errors="exit_code"><![CDATA[ +Rscript '${__tool_directory__}/brew3r.r_script.R' + --gtf_to_extend '$gtf_to_extend' + --gtf_to_overlap '$gtf_to_overlap' + #if '$sup_output' == 'true': + --sup_output '$output_table' + #end if + #if str($no_add) != '': + '$no_add' + #end if + #if str($exclude_pattern) != '': + --exclude_pattern '$exclude_pattern' + #end if + #if str($filter_unstranded) != '': + '$filter_unstranded' + #end if + -o output.gtf + + ]]></command> + <inputs> + <param argument="--gtf_to_extend" type="data" format="gtf" label="Input gtf file to be extended on 3'" help="Usually coming from public resource." /> + <param argument="--gtf_to_overlap" type="data" format="gtf" label="Input gtf file that will be used to extend" help="Coming from StringTie or another public resource." /> + <param argument="--sup_output" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Get a supplementary output table with resolution of overlaps" /> + <param argument="--no_add" type="boolean" truevalue="--no_add" falsevalue="" checked="false" label="Do not add new exons" /> + <param argument="--exclude_pattern" type="text" value="" label="Do not extend genes with names matching this pattern" help="Leave empty if you want to extend all genes."> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + <remove value="\"/> + </valid> + </sanitizer> + </param> + <param argument="--filter_unstranded" type="boolean" truevalue="--filter_unstranded" falsevalue="" checked="false" label="Filter unstranded intervals that overlaps genes of both strands" help="Recommanded if you used StringTie on unstranded libraries." /> + </inputs> + <outputs> + <data name="output" format="gtf" from_work_dir="output.gtf" label="${tool.name} on ${gtf_to_extend.name} and ${gtf_to_overlap.name}: GTF" /> + <data name="output_table" format="tabular" label="${tool.name} on ${gtf_to_extend.name} and ${gtf_to_overlap.name}: overlap resolution"> + <filter>sup_output == True</filter> + </data> + </outputs> + <tests> + <test expect_num_outputs="1"> + <param name="gtf_to_extend" value="input.gtf"/> + <param name="gtf_to_overlap" value="second_input.gtf"/> + <output name="output" value="output.gtf" compare="diff" lines_diff="2"/> + </test> + <test expect_num_outputs="1"> + <param name="gtf_to_extend" value="input.gtf"/> + <param name="gtf_to_overlap" value="second_input.gtf"/> + <param name="no_add" value="true"/> + <output name="output"> + <assert_contents> + <has_n_lines n="31"/> + <not_has_text text="BREW3R"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="gtf_to_extend" value="input.gtf"/> + <param name="gtf_to_overlap" value="second_input.gtf"/> + <param name="exclude_pattern" value="^Gm"/> + <output name="output"> + <assert_contents> + <has_n_lines n="34"/> + <not_has_text text="exon111.ext"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="gtf_to_extend" value="input.gtf"/> + <param name="gtf_to_overlap" value="second_input.gtf"/> + <param name="exclude_pattern" value="Gm$"/> + <output name="output" value="output.gtf" compare="diff" lines_diff="2"/> + <assert_command> + <has_text text="--exclude_pattern 'Gm$'"/> + </assert_command> + </test> + <test expect_num_outputs="1"> + <param name="gtf_to_extend" value="input.gtf"/> + <param name="gtf_to_overlap" value="second_input.gtf"/> + <param name="filter_unstranded" value="true"/> + <output name="output"> + <assert_contents> + <has_n_lines n="36"/> + <not_has_text text="exon121.ext"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ + +.. class:: infomark + +**What it does** + +This tool extend the annotations existing in an input GTF file in the 3' end using annotations from another input GTF. During the process, it makes sure that there will not be new overlaps between different genes. + +Usage +..... + + +**Input** + +2 GTF files: +- First one to extend usually comes from a public resource. +- Second one that is used as template may come from a public resource or from StringTie. + + +**Output** + +1 GTF file with all exons from the input GTF where some of them have been extended (the exon_id ends with '.ext') and potentially new exons (the exon_id contains BREW3R). + + ]]></help> + <citations> + <citation type="bibtex"> + @unpublished{None, + author = {Lucille Lopez-Delisle}, + title = {None}, + year = {None}, + eprint = {None}, + url = {https://github.com/lldelisle/BREW3R.r} + }</citation> + </citations> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/generate_test.R Tue Jun 11 08:26:37 2024 +0000 @@ -0,0 +1,262 @@ +library(GenomicRanges) +input_to_overlap_case1_2_3_4_6_7_8 <- GenomicRanges::GRanges( + seqnames = "chr1", + ranges = IRanges::IRanges( + start = 3, + end = 25 + ), + strand = "+", + gene_id = "geneA", + transcript_id = "transcriptA", + type = "exon", + exon_id = "exonA" +) +big_gr <- NULL +for (i in c(1:5, 7:10)) { + temp.gr <- input_to_overlap_case1_2_3_4_6_7_8 + temp.gr <- shift(temp.gr, 100 * (i - 1)) + temp.gr$gene_id <- paste0("gene", LETTERS[i]) + temp.gr$transcript_id <- paste0("transcript", LETTERS[i]) + temp.gr$exon_id <- paste0("exon", LETTERS[i]) + temp.gr$exon_number <- 1 + big_gr <- c(big_gr, temp.gr) +} +input_to_overlap_case5_9 <- GenomicRanges::GRanges( + seqnames = "chr1", + ranges = IRanges::IRanges( + start = c(1, 33, 45, 72), + end = c(25, 40, 60, 75) + ), + strand = "+", + gene_id = "geneA", + transcript_id = "transcriptA", + type = "exon", + exon_id = c("exonA", "exonB", "exonC", "exonD") +) +for (i in c(6, 11)) { + temp.gr <- input_to_overlap_case5_9 + temp.gr <- shift(temp.gr, 100 * (i - 1)) + temp.gr$gene_id <- paste0("gene", LETTERS[i]) + temp.gr$transcript_id <- paste0("transcript", LETTERS[i]) + temp.gr$exon_id <- paste0("exon", LETTERS[i], letters[1:4]) + temp.gr$exon_number <- 1:4 + big_gr <- c(big_gr, temp.gr) +} +big_gr <- unlist(as(big_gr, "GRangesList")) + + + +input_gr <- c( + # 1 + GenomicRanges::GRanges( + seqnames = "chr1", + ranges = IRanges::IRanges( + start = c(5, 20), + end = c(10, 30) + ), + strand = "+", + gene_id = c("gene11", "gene12"), + transcript_id = c("transcript11", "transcript12"), + type = "exon", + exon_id = c("exon11", "exon12") + ), + # 2 + shift( + GenomicRanges::GRanges( + seqnames = "chr1", + ranges = IRanges::IRanges( + start = c(5, 20), + end = c(10, 25) + ), + strand = "+", + gene_id = c("gene21", "gene22"), + transcript_id = c("transcript21", "transcript22"), + type = "exon", + exon_id = c("exon21", "exon22") + ), + 100 + ), + # 3_5 + shift( + GenomicRanges::GRanges( + seqnames = "chr1", + ranges = IRanges::IRanges( + start = c(5, 20), + end = c(10, 22) + ), + strand = "+", + gene_id = c("gene31", "gene32"), + transcript_id = c("transcript31", "transcript32"), + type = "exon", + exon_id = c("exon31", "exon32") + ), + 200 + ), + # 4 + shift( + GenomicRanges::GRanges( + seqnames = "chr1", + ranges = IRanges::IRanges( + start = c(5, 5), + end = c(10, 22) + ), + strand = "+", + gene_id = c("gene41", "gene42"), + transcript_id = c("transcript41", "transcript42"), + type = "exon", + exon_id = c("exon41", "exon42") + ), + 300 + ), + # 4bis + shift( + GenomicRanges::GRanges( + seqnames = "chr1", + ranges = IRanges::IRanges( + start = c(5, 5), + end = c(10, 25) + ), + strand = "+", + gene_id = c("gene51", "gene52"), + transcript_id = c("transcript51", "transcript52"), + type = "exon", + exon_id = c("exon51", "exon52") + ), + 400 + ), + # 3_5 + shift( + GenomicRanges::GRanges( + seqnames = "chr1", + ranges = IRanges::IRanges( + start = c(5, 20), + end = c(10, 22) + ), + strand = "+", + gene_id = c("gene61", "gene62"), + transcript_id = c("transcript61", "transcript62"), + type = "exon", + exon_id = c("exon61", "exon62") + ), + 500 + ), + # 6 + shift( + GenomicRanges::GRanges( + seqnames = "chr1", + ranges = IRanges::IRanges( + start = c(1, 1, 30), + end = c(10, 10, 40) + ), + strand = "+", + gene_id = "gene71", + transcript_id = c("transcript71", "transcript72", "transcript72"), + type = "exon", + exon_id = c("exon71", "exon71", "exon72") + ), + 600 + ), + # 6bis + shift( + GenomicRanges::GRanges( + seqnames = "chr1", + ranges = IRanges::IRanges( + start = c(1, 1, 30), + end = c(10, 10, 40) + ), + strand = "+", + gene_id = c("gene81", "gene82", "gene82"), + transcript_id = c("transcript81", "transcript82", "transcript82"), + type = "exon", + exon_id = c("exon81", "exon82", "exon83") + ), + 700 + ), + # 7 + shift( + GenomicRanges::GRanges( + seqnames = "chr1", + ranges = IRanges::IRanges( + start = c(1, 1, 30), + end = c(8, 10, 40) + ), + strand = "+", + gene_id = "gene1", + transcript_id = c("transcript91", "transcript92", "transcript92"), + type = "exon", + exon_id = c("exon91", "exon92", "exon93") + ), + 800 + ), + # 8 + shift( + GenomicRanges::GRanges( + seqnames = "chr1", + ranges = IRanges::IRanges( + start = c(1, 1, 30), + end = c(8, 10, 40) + ), + strand = "+", + gene_id = c("gene101", "gene102", "gene102"), + transcript_id = c("transcript101", "transcript102", "transcript102"), + type = "exon", + exon_id = c("exon101", "exon102", "exon103") + ), + 900 + ), + # 9 + shift( + GenomicRanges::GRanges( + seqnames = "chr1", + ranges = IRanges::IRanges( + start = c(5, 55), + end = c(10, 70) + ), + strand = "+", + gene_id = c("gene111", "gene112"), + transcript_id = c("transcript111", "transcript112"), + type = "exon", + exon_id = c("exon111", "exon112") + ), + 1000 + ) +) +## Add convergent genes overlapping a unstranded +input_gr <- c( + input_gr, + GenomicRanges::GRanges( + seqnames = "chr1", + ranges = IRanges::IRanges( + start = c(1100, 1110), + end = c(1105, 1120) + ), + strand = c("+", "-"), + gene_id = c("gene121", "gene122"), + transcript_id = c("transcript121", "transcript122"), + type = "exon", + exon_id = c("exon121", "exon122") + ) +) +big_gr <- c( + big_gr, + GenomicRanges::GRanges( + seqnames = "chr1", + ranges = IRanges::IRanges( + start = 1103, + end = 1113 + ), + strand = "*", + gene_id = "geneL", + transcript_id = "transcriptL", + type = "exon", + exon_id = "exonL" + ) +) +input_gr$gene_name <- input_gr$gene_id +input_gr$gene_name[input_gr$gene_id == "gene111"] <- "Gm001" +library(BREW3R.r) +new.gr <- extend_granges(input_gr, big_gr) +library("rtracklayer") +export.gff(input_gr, "input.gtf") +export.gff(big_gr, "second_input.gtf") +export.gff(sort(new.gr, ignore.strand = TRUE), "output.gtf")
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input.gtf Tue Jun 11 08:26:37 2024 +0000 @@ -0,0 +1,31 @@ +##gff-version 2 +##source-version rtracklayer 1.64.0 +##date 2024-06-06 +chr1 rtracklayer exon 5 10 . + . gene_id "gene11"; transcript_id "transcript11"; exon_id "exon11"; gene_name "gene11" +chr1 rtracklayer exon 20 30 . + . gene_id "gene12"; transcript_id "transcript12"; exon_id "exon12"; gene_name "gene12" +chr1 rtracklayer exon 105 110 . + . gene_id "gene21"; transcript_id "transcript21"; exon_id "exon21"; gene_name "gene21" +chr1 rtracklayer exon 120 125 . + . gene_id "gene22"; transcript_id "transcript22"; exon_id "exon22"; gene_name "gene22" +chr1 rtracklayer exon 205 210 . + . gene_id "gene31"; transcript_id "transcript31"; exon_id "exon31"; gene_name "gene31" +chr1 rtracklayer exon 220 222 . + . gene_id "gene32"; transcript_id "transcript32"; exon_id "exon32"; gene_name "gene32" +chr1 rtracklayer exon 305 310 . + . gene_id "gene41"; transcript_id "transcript41"; exon_id "exon41"; gene_name "gene41" +chr1 rtracklayer exon 305 322 . + . gene_id "gene42"; transcript_id "transcript42"; exon_id "exon42"; gene_name "gene42" +chr1 rtracklayer exon 405 410 . + . gene_id "gene51"; transcript_id "transcript51"; exon_id "exon51"; gene_name "gene51" +chr1 rtracklayer exon 405 425 . + . gene_id "gene52"; transcript_id "transcript52"; exon_id "exon52"; gene_name "gene52" +chr1 rtracklayer exon 505 510 . + . gene_id "gene61"; transcript_id "transcript61"; exon_id "exon61"; gene_name "gene61" +chr1 rtracklayer exon 520 522 . + . gene_id "gene62"; transcript_id "transcript62"; exon_id "exon62"; gene_name "gene62" +chr1 rtracklayer exon 601 610 . + . gene_id "gene71"; transcript_id "transcript71"; exon_id "exon71"; gene_name "gene71" +chr1 rtracklayer exon 601 610 . + . gene_id "gene71"; transcript_id "transcript72"; exon_id "exon71"; gene_name "gene71" +chr1 rtracklayer exon 630 640 . + . gene_id "gene71"; transcript_id "transcript72"; exon_id "exon72"; gene_name "gene71" +chr1 rtracklayer exon 701 710 . + . gene_id "gene81"; transcript_id "transcript81"; exon_id "exon81"; gene_name "gene81" +chr1 rtracklayer exon 701 710 . + . gene_id "gene82"; transcript_id "transcript82"; exon_id "exon82"; gene_name "gene82" +chr1 rtracklayer exon 730 740 . + . gene_id "gene82"; transcript_id "transcript82"; exon_id "exon83"; gene_name "gene82" +chr1 rtracklayer exon 801 808 . + . gene_id "gene1"; transcript_id "transcript91"; exon_id "exon91"; gene_name "gene1" +chr1 rtracklayer exon 801 810 . + . gene_id "gene1"; transcript_id "transcript92"; exon_id "exon92"; gene_name "gene1" +chr1 rtracklayer exon 830 840 . + . gene_id "gene1"; transcript_id "transcript92"; exon_id "exon93"; gene_name "gene1" +chr1 rtracklayer exon 901 908 . + . gene_id "gene101"; transcript_id "transcript101"; exon_id "exon101"; gene_name "gene101" +chr1 rtracklayer exon 901 910 . + . gene_id "gene102"; transcript_id "transcript102"; exon_id "exon102"; gene_name "gene102" +chr1 rtracklayer exon 930 940 . + . gene_id "gene102"; transcript_id "transcript102"; exon_id "exon103"; gene_name "gene102" +chr1 rtracklayer exon 1005 1010 . + . gene_id "gene111"; transcript_id "transcript111"; exon_id "exon111"; gene_name "Gm001" +chr1 rtracklayer exon 1055 1070 . + . gene_id "gene112"; transcript_id "transcript112"; exon_id "exon112"; gene_name "gene112" +chr1 rtracklayer exon 1100 1105 . + . gene_id "gene121"; transcript_id "transcript121"; exon_id "exon121"; gene_name "gene121" +chr1 rtracklayer exon 1110 1120 . - . gene_id "gene122"; transcript_id "transcript122"; exon_id "exon122"; gene_name "gene122"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.gtf Tue Jun 11 08:26:37 2024 +0000 @@ -0,0 +1,36 @@ +##gff-version 2 +##source-version rtracklayer 1.64.0 +##date 2024-06-06 +chr1 rtracklayer exon 5 19 . + . gene_id "gene11"; gene_name "gene11"; transcript_id "transcript11"; exon_id "exon11.ext"; exon_number 1 +chr1 rtracklayer exon 20 30 . + . gene_id "gene12"; gene_name "gene12"; transcript_id "transcript12"; exon_id "exon12"; exon_number 1 +chr1 rtracklayer exon 105 119 . + . gene_id "gene21"; gene_name "gene21"; transcript_id "transcript21"; exon_id "exon21.ext"; exon_number 1 +chr1 rtracklayer exon 120 125 . + . gene_id "gene22"; gene_name "gene22"; transcript_id "transcript22"; exon_id "exon22"; exon_number 1 +chr1 rtracklayer exon 205 219 . + . gene_id "gene31"; gene_name "gene31"; transcript_id "transcript31"; exon_id "exon31.ext"; exon_number 1 +chr1 rtracklayer exon 220 225 . + . gene_id "gene32"; gene_name "gene32"; transcript_id "transcript32"; exon_id "exon32.ext"; exon_number 1 +chr1 rtracklayer exon 305 310 . + . gene_id "gene41"; gene_name "gene41"; transcript_id "transcript41"; exon_id "exon41"; exon_number 1 +chr1 rtracklayer exon 305 325 . + . gene_id "gene42"; gene_name "gene42"; transcript_id "transcript42"; exon_id "exon42.ext"; exon_number 1 +chr1 rtracklayer exon 405 410 . + . gene_id "gene51"; gene_name "gene51"; transcript_id "transcript51"; exon_id "exon51"; exon_number 1 +chr1 rtracklayer exon 405 425 . + . gene_id "gene52"; gene_name "gene52"; transcript_id "transcript52"; exon_id "exon52"; exon_number 1 +chr1 rtracklayer exon 505 519 . + . gene_id "gene61"; gene_name "gene61"; transcript_id "transcript61"; exon_id "exon61.ext"; exon_number 1 +chr1 rtracklayer exon 520 525 . + . gene_id "gene62"; gene_name "gene62"; transcript_id "transcript62"; exon_id "exon62.ext"; exon_number 1 +chr1 rtracklayer exon 533 540 . + . gene_id "gene62"; gene_name "gene62"; transcript_id "transcript62"; exon_id "BREW3R0000000003"; exon_number 2 +chr1 rtracklayer exon 545 560 . + . gene_id "gene62"; gene_name "gene62"; transcript_id "transcript62"; exon_id "BREW3R0000000004"; exon_number 3 +chr1 rtracklayer exon 572 575 . + . gene_id "gene62"; gene_name "gene62"; transcript_id "transcript62"; exon_id "BREW3R0000000005"; exon_number 4 +chr1 rtracklayer exon 601 610 . + . gene_id "gene71"; gene_name "gene71"; transcript_id "transcript72"; exon_id "exon71"; exon_number 1 +chr1 rtracklayer exon 601 625 . + . gene_id "gene71"; gene_name "gene71"; transcript_id "transcript71"; exon_id "exon71.ext"; exon_number 1 +chr1 rtracklayer exon 630 640 . + . gene_id "gene71"; gene_name "gene71"; transcript_id "transcript72"; exon_id "exon72"; exon_number 2 +chr1 rtracklayer exon 701 710 . + . gene_id "gene82"; gene_name "gene82"; transcript_id "transcript82"; exon_id "exon82"; exon_number 1 +chr1 rtracklayer exon 701 725 . + . gene_id "gene81"; gene_name "gene81"; transcript_id "transcript81"; exon_id "exon81.ext"; exon_number 1 +chr1 rtracklayer exon 730 740 . + . gene_id "gene82"; gene_name "gene82"; transcript_id "transcript82"; exon_id "exon83"; exon_number 2 +chr1 rtracklayer exon 801 810 . + . gene_id "gene1"; gene_name "gene1"; transcript_id "transcript92"; exon_id "exon92"; exon_number 1 +chr1 rtracklayer exon 801 825 . + . gene_id "gene1"; gene_name "gene1"; transcript_id "transcript91"; exon_id "exon91.ext"; exon_number 1 +chr1 rtracklayer exon 830 840 . + . gene_id "gene1"; gene_name "gene1"; transcript_id "transcript92"; exon_id "exon93"; exon_number 2 +chr1 rtracklayer exon 901 908 . + . gene_id "gene101"; gene_name "gene101"; transcript_id "transcript101"; exon_id "exon101"; exon_number 1 +chr1 rtracklayer exon 901 910 . + . gene_id "gene102"; gene_name "gene102"; transcript_id "transcript102"; exon_id "exon102"; exon_number 1 +chr1 rtracklayer exon 930 940 . + . gene_id "gene102"; gene_name "gene102"; transcript_id "transcript102"; exon_id "exon103"; exon_number 2 +chr1 rtracklayer exon 1005 1025 . + . gene_id "gene111"; gene_name "Gm001"; transcript_id "transcript111"; exon_id "exon111.ext"; exon_number 1 +chr1 rtracklayer exon 1033 1040 . + . gene_id "gene111"; gene_name "Gm001"; transcript_id "transcript111"; exon_id "BREW3R0000000001"; exon_number 2 +chr1 rtracklayer exon 1045 1054 . + . gene_id "gene111"; gene_name "Gm001"; transcript_id "transcript111"; exon_id "BREW3R0000000002"; exon_number 3 +chr1 rtracklayer exon 1055 1070 . + . gene_id "gene112"; gene_name "gene112"; transcript_id "transcript112"; exon_id "exon112"; exon_number 1 +chr1 rtracklayer exon 1100 1113 . + . gene_id "gene121"; gene_name "gene121"; transcript_id "transcript121"; exon_id "exon121.ext"; exon_number 1 +chr1 rtracklayer exon 1103 1120 . - . gene_id "gene122"; gene_name "gene122"; transcript_id "transcript122"; exon_id "exon122.ext"; exon_number 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/second_input.gtf Tue Jun 11 08:26:37 2024 +0000 @@ -0,0 +1,21 @@ +##gff-version 2 +##source-version rtracklayer 1.64.0 +##date 2024-06-06 +chr1 rtracklayer exon 3 25 . + . gene_id "geneA"; transcript_id "transcriptA"; exon_id "exonA"; exon_number 1 +chr1 rtracklayer exon 103 125 . + . gene_id "geneB"; transcript_id "transcriptB"; exon_id "exonB"; exon_number 1 +chr1 rtracklayer exon 203 225 . + . gene_id "geneC"; transcript_id "transcriptC"; exon_id "exonC"; exon_number 1 +chr1 rtracklayer exon 303 325 . + . gene_id "geneD"; transcript_id "transcriptD"; exon_id "exonD"; exon_number 1 +chr1 rtracklayer exon 403 425 . + . gene_id "geneE"; transcript_id "transcriptE"; exon_id "exonE"; exon_number 1 +chr1 rtracklayer exon 603 625 . + . gene_id "geneG"; transcript_id "transcriptG"; exon_id "exonG"; exon_number 1 +chr1 rtracklayer exon 703 725 . + . gene_id "geneH"; transcript_id "transcriptH"; exon_id "exonH"; exon_number 1 +chr1 rtracklayer exon 803 825 . + . gene_id "geneI"; transcript_id "transcriptI"; exon_id "exonI"; exon_number 1 +chr1 rtracklayer exon 903 925 . + . gene_id "geneJ"; transcript_id "transcriptJ"; exon_id "exonJ"; exon_number 1 +chr1 rtracklayer exon 501 525 . + . gene_id "geneF"; transcript_id "transcriptF"; exon_id "exonFa"; exon_number 1 +chr1 rtracklayer exon 533 540 . + . gene_id "geneF"; transcript_id "transcriptF"; exon_id "exonFb"; exon_number 2 +chr1 rtracklayer exon 545 560 . + . gene_id "geneF"; transcript_id "transcriptF"; exon_id "exonFc"; exon_number 3 +chr1 rtracklayer exon 572 575 . + . gene_id "geneF"; transcript_id "transcriptF"; exon_id "exonFd"; exon_number 4 +chr1 rtracklayer exon 1001 1025 . + . gene_id "geneK"; transcript_id "transcriptK"; exon_id "exonKa"; exon_number 1 +chr1 rtracklayer exon 1033 1040 . + . gene_id "geneK"; transcript_id "transcriptK"; exon_id "exonKb"; exon_number 2 +chr1 rtracklayer exon 1045 1060 . + . gene_id "geneK"; transcript_id "transcriptK"; exon_id "exonKc"; exon_number 3 +chr1 rtracklayer exon 1072 1075 . + . gene_id "geneK"; transcript_id "transcriptK"; exon_id "exonKd"; exon_number 4 +chr1 rtracklayer exon 1103 1113 . . . gene_id "geneL"; transcript_id "transcriptL"; exon_id "exonL";