diff test-data/generate_test.R @ 0:928a52b5c938 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/brew3r_r commit 3e3c47b732510a9ef0b2864b284aa14308e75ab0
author iuc
date Tue, 11 Jun 2024 08:26:37 +0000
parents
children d3b0390f325f
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/generate_test.R	Tue Jun 11 08:26:37 2024 +0000
@@ -0,0 +1,262 @@
+library(GenomicRanges)
+input_to_overlap_case1_2_3_4_6_7_8 <- GenomicRanges::GRanges(
+    seqnames = "chr1",
+    ranges = IRanges::IRanges(
+        start = 3,
+        end = 25
+    ),
+    strand = "+",
+    gene_id = "geneA",
+    transcript_id = "transcriptA",
+    type = "exon",
+    exon_id = "exonA"
+)
+big_gr <- NULL
+for (i in c(1:5, 7:10)) {
+    temp.gr <- input_to_overlap_case1_2_3_4_6_7_8
+    temp.gr <- shift(temp.gr, 100 * (i - 1))
+    temp.gr$gene_id <- paste0("gene", LETTERS[i])
+    temp.gr$transcript_id <- paste0("transcript", LETTERS[i])
+    temp.gr$exon_id <- paste0("exon", LETTERS[i])
+    temp.gr$exon_number <- 1
+    big_gr <- c(big_gr, temp.gr)
+}
+input_to_overlap_case5_9 <- GenomicRanges::GRanges(
+    seqnames = "chr1",
+    ranges = IRanges::IRanges(
+        start = c(1, 33, 45, 72),
+        end = c(25, 40, 60, 75)
+    ),
+    strand = "+",
+    gene_id = "geneA",
+    transcript_id = "transcriptA",
+    type = "exon",
+    exon_id = c("exonA", "exonB", "exonC", "exonD")
+)
+for (i in c(6, 11)) {
+    temp.gr <- input_to_overlap_case5_9
+    temp.gr <- shift(temp.gr, 100 * (i - 1))
+    temp.gr$gene_id <- paste0("gene", LETTERS[i])
+    temp.gr$transcript_id <- paste0("transcript", LETTERS[i])
+    temp.gr$exon_id <- paste0("exon", LETTERS[i], letters[1:4])
+    temp.gr$exon_number <- 1:4
+    big_gr <- c(big_gr, temp.gr)
+}
+big_gr <- unlist(as(big_gr, "GRangesList"))
+
+
+
+input_gr <- c(
+    # 1
+    GenomicRanges::GRanges(
+        seqnames = "chr1",
+        ranges = IRanges::IRanges(
+            start = c(5, 20),
+            end = c(10, 30)
+        ),
+        strand = "+",
+        gene_id = c("gene11", "gene12"),
+        transcript_id = c("transcript11", "transcript12"),
+        type = "exon",
+        exon_id = c("exon11", "exon12")
+    ),
+    # 2
+    shift(
+        GenomicRanges::GRanges(
+            seqnames = "chr1",
+            ranges = IRanges::IRanges(
+                start = c(5, 20),
+                end = c(10, 25)
+            ),
+            strand = "+",
+            gene_id = c("gene21", "gene22"),
+            transcript_id = c("transcript21", "transcript22"),
+            type = "exon",
+            exon_id = c("exon21", "exon22")
+        ),
+        100
+    ),
+    # 3_5
+    shift(
+        GenomicRanges::GRanges(
+            seqnames = "chr1",
+            ranges = IRanges::IRanges(
+                start = c(5, 20),
+                end = c(10, 22)
+            ),
+            strand = "+",
+            gene_id = c("gene31", "gene32"),
+            transcript_id = c("transcript31", "transcript32"),
+            type = "exon",
+            exon_id = c("exon31", "exon32")
+        ),
+        200
+    ),
+    # 4
+    shift(
+        GenomicRanges::GRanges(
+            seqnames = "chr1",
+            ranges = IRanges::IRanges(
+                start = c(5, 5),
+                end = c(10, 22)
+            ),
+            strand = "+",
+            gene_id = c("gene41", "gene42"),
+            transcript_id = c("transcript41", "transcript42"),
+            type = "exon",
+            exon_id = c("exon41", "exon42")
+        ),
+        300
+    ),
+    # 4bis
+    shift(
+        GenomicRanges::GRanges(
+            seqnames = "chr1",
+            ranges = IRanges::IRanges(
+                start = c(5, 5),
+                end = c(10, 25)
+            ),
+            strand = "+",
+            gene_id = c("gene51", "gene52"),
+            transcript_id = c("transcript51", "transcript52"),
+            type = "exon",
+            exon_id = c("exon51", "exon52")
+        ),
+        400
+    ),
+    # 3_5
+    shift(
+        GenomicRanges::GRanges(
+            seqnames = "chr1",
+            ranges = IRanges::IRanges(
+                start = c(5, 20),
+                end = c(10, 22)
+            ),
+            strand = "+",
+            gene_id = c("gene61", "gene62"),
+            transcript_id = c("transcript61", "transcript62"),
+            type = "exon",
+            exon_id = c("exon61", "exon62")
+        ),
+        500
+    ),
+    # 6
+    shift(
+        GenomicRanges::GRanges(
+            seqnames = "chr1",
+            ranges = IRanges::IRanges(
+                start = c(1, 1, 30),
+                end = c(10, 10, 40)
+            ),
+            strand = "+",
+            gene_id = "gene71",
+            transcript_id = c("transcript71", "transcript72", "transcript72"),
+            type = "exon",
+            exon_id = c("exon71", "exon71", "exon72")
+        ),
+        600
+    ),
+    # 6bis
+    shift(
+        GenomicRanges::GRanges(
+            seqnames = "chr1",
+            ranges = IRanges::IRanges(
+                start = c(1, 1, 30),
+                end = c(10, 10, 40)
+            ),
+            strand = "+",
+            gene_id = c("gene81", "gene82", "gene82"),
+            transcript_id = c("transcript81", "transcript82", "transcript82"),
+            type = "exon",
+            exon_id = c("exon81", "exon82", "exon83")
+        ),
+        700
+    ),
+    # 7
+    shift(
+        GenomicRanges::GRanges(
+            seqnames = "chr1",
+            ranges = IRanges::IRanges(
+                start = c(1, 1, 30),
+                end = c(8, 10, 40)
+            ),
+            strand = "+",
+            gene_id = "gene1",
+            transcript_id = c("transcript91", "transcript92", "transcript92"),
+            type = "exon",
+            exon_id = c("exon91", "exon92", "exon93")
+        ),
+        800
+    ),
+    # 8
+    shift(
+        GenomicRanges::GRanges(
+            seqnames = "chr1",
+            ranges = IRanges::IRanges(
+                start = c(1, 1, 30),
+                end = c(8, 10, 40)
+            ),
+            strand = "+",
+            gene_id = c("gene101", "gene102", "gene102"),
+            transcript_id = c("transcript101", "transcript102", "transcript102"),
+            type = "exon",
+            exon_id = c("exon101", "exon102", "exon103")
+        ),
+        900
+    ),
+    # 9
+    shift(
+        GenomicRanges::GRanges(
+            seqnames = "chr1",
+            ranges = IRanges::IRanges(
+                start = c(5, 55),
+                end = c(10, 70)
+            ),
+            strand = "+",
+            gene_id = c("gene111", "gene112"),
+            transcript_id = c("transcript111", "transcript112"),
+            type = "exon",
+            exon_id = c("exon111", "exon112")
+        ),
+        1000
+    )
+)
+## Add convergent genes overlapping a unstranded
+input_gr <- c(
+    input_gr,
+    GenomicRanges::GRanges(
+        seqnames = "chr1",
+        ranges = IRanges::IRanges(
+            start = c(1100, 1110),
+            end = c(1105, 1120)
+        ),
+        strand = c("+", "-"),
+        gene_id = c("gene121", "gene122"),
+        transcript_id = c("transcript121", "transcript122"),
+        type = "exon",
+        exon_id = c("exon121", "exon122")
+    )
+)
+big_gr <- c(
+    big_gr,
+    GenomicRanges::GRanges(
+        seqnames = "chr1",
+        ranges = IRanges::IRanges(
+            start = 1103,
+            end = 1113
+        ),
+        strand = "*",
+        gene_id = "geneL",
+        transcript_id = "transcriptL",
+        type = "exon",
+        exon_id = "exonL"
+    )
+)
+input_gr$gene_name <- input_gr$gene_id
+input_gr$gene_name[input_gr$gene_id == "gene111"] <- "Gm001"
+library(BREW3R.r)
+new.gr <- extend_granges(input_gr, big_gr)
+library("rtracklayer")
+export.gff(input_gr, "input.gtf")
+export.gff(big_gr, "second_input.gtf")
+export.gff(sort(new.gr, ignore.strand = TRUE), "output.gtf")