comparison test-data/generate_test.R @ 0:928a52b5c938 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/brew3r_r commit 3e3c47b732510a9ef0b2864b284aa14308e75ab0
author iuc
date Tue, 11 Jun 2024 08:26:37 +0000
parents
children d3b0390f325f
comparison
equal deleted inserted replaced
-1:000000000000 0:928a52b5c938
1 library(GenomicRanges)
2 input_to_overlap_case1_2_3_4_6_7_8 <- GenomicRanges::GRanges(
3 seqnames = "chr1",
4 ranges = IRanges::IRanges(
5 start = 3,
6 end = 25
7 ),
8 strand = "+",
9 gene_id = "geneA",
10 transcript_id = "transcriptA",
11 type = "exon",
12 exon_id = "exonA"
13 )
14 big_gr <- NULL
15 for (i in c(1:5, 7:10)) {
16 temp.gr <- input_to_overlap_case1_2_3_4_6_7_8
17 temp.gr <- shift(temp.gr, 100 * (i - 1))
18 temp.gr$gene_id <- paste0("gene", LETTERS[i])
19 temp.gr$transcript_id <- paste0("transcript", LETTERS[i])
20 temp.gr$exon_id <- paste0("exon", LETTERS[i])
21 temp.gr$exon_number <- 1
22 big_gr <- c(big_gr, temp.gr)
23 }
24 input_to_overlap_case5_9 <- GenomicRanges::GRanges(
25 seqnames = "chr1",
26 ranges = IRanges::IRanges(
27 start = c(1, 33, 45, 72),
28 end = c(25, 40, 60, 75)
29 ),
30 strand = "+",
31 gene_id = "geneA",
32 transcript_id = "transcriptA",
33 type = "exon",
34 exon_id = c("exonA", "exonB", "exonC", "exonD")
35 )
36 for (i in c(6, 11)) {
37 temp.gr <- input_to_overlap_case5_9
38 temp.gr <- shift(temp.gr, 100 * (i - 1))
39 temp.gr$gene_id <- paste0("gene", LETTERS[i])
40 temp.gr$transcript_id <- paste0("transcript", LETTERS[i])
41 temp.gr$exon_id <- paste0("exon", LETTERS[i], letters[1:4])
42 temp.gr$exon_number <- 1:4
43 big_gr <- c(big_gr, temp.gr)
44 }
45 big_gr <- unlist(as(big_gr, "GRangesList"))
46
47
48
49 input_gr <- c(
50 # 1
51 GenomicRanges::GRanges(
52 seqnames = "chr1",
53 ranges = IRanges::IRanges(
54 start = c(5, 20),
55 end = c(10, 30)
56 ),
57 strand = "+",
58 gene_id = c("gene11", "gene12"),
59 transcript_id = c("transcript11", "transcript12"),
60 type = "exon",
61 exon_id = c("exon11", "exon12")
62 ),
63 # 2
64 shift(
65 GenomicRanges::GRanges(
66 seqnames = "chr1",
67 ranges = IRanges::IRanges(
68 start = c(5, 20),
69 end = c(10, 25)
70 ),
71 strand = "+",
72 gene_id = c("gene21", "gene22"),
73 transcript_id = c("transcript21", "transcript22"),
74 type = "exon",
75 exon_id = c("exon21", "exon22")
76 ),
77 100
78 ),
79 # 3_5
80 shift(
81 GenomicRanges::GRanges(
82 seqnames = "chr1",
83 ranges = IRanges::IRanges(
84 start = c(5, 20),
85 end = c(10, 22)
86 ),
87 strand = "+",
88 gene_id = c("gene31", "gene32"),
89 transcript_id = c("transcript31", "transcript32"),
90 type = "exon",
91 exon_id = c("exon31", "exon32")
92 ),
93 200
94 ),
95 # 4
96 shift(
97 GenomicRanges::GRanges(
98 seqnames = "chr1",
99 ranges = IRanges::IRanges(
100 start = c(5, 5),
101 end = c(10, 22)
102 ),
103 strand = "+",
104 gene_id = c("gene41", "gene42"),
105 transcript_id = c("transcript41", "transcript42"),
106 type = "exon",
107 exon_id = c("exon41", "exon42")
108 ),
109 300
110 ),
111 # 4bis
112 shift(
113 GenomicRanges::GRanges(
114 seqnames = "chr1",
115 ranges = IRanges::IRanges(
116 start = c(5, 5),
117 end = c(10, 25)
118 ),
119 strand = "+",
120 gene_id = c("gene51", "gene52"),
121 transcript_id = c("transcript51", "transcript52"),
122 type = "exon",
123 exon_id = c("exon51", "exon52")
124 ),
125 400
126 ),
127 # 3_5
128 shift(
129 GenomicRanges::GRanges(
130 seqnames = "chr1",
131 ranges = IRanges::IRanges(
132 start = c(5, 20),
133 end = c(10, 22)
134 ),
135 strand = "+",
136 gene_id = c("gene61", "gene62"),
137 transcript_id = c("transcript61", "transcript62"),
138 type = "exon",
139 exon_id = c("exon61", "exon62")
140 ),
141 500
142 ),
143 # 6
144 shift(
145 GenomicRanges::GRanges(
146 seqnames = "chr1",
147 ranges = IRanges::IRanges(
148 start = c(1, 1, 30),
149 end = c(10, 10, 40)
150 ),
151 strand = "+",
152 gene_id = "gene71",
153 transcript_id = c("transcript71", "transcript72", "transcript72"),
154 type = "exon",
155 exon_id = c("exon71", "exon71", "exon72")
156 ),
157 600
158 ),
159 # 6bis
160 shift(
161 GenomicRanges::GRanges(
162 seqnames = "chr1",
163 ranges = IRanges::IRanges(
164 start = c(1, 1, 30),
165 end = c(10, 10, 40)
166 ),
167 strand = "+",
168 gene_id = c("gene81", "gene82", "gene82"),
169 transcript_id = c("transcript81", "transcript82", "transcript82"),
170 type = "exon",
171 exon_id = c("exon81", "exon82", "exon83")
172 ),
173 700
174 ),
175 # 7
176 shift(
177 GenomicRanges::GRanges(
178 seqnames = "chr1",
179 ranges = IRanges::IRanges(
180 start = c(1, 1, 30),
181 end = c(8, 10, 40)
182 ),
183 strand = "+",
184 gene_id = "gene1",
185 transcript_id = c("transcript91", "transcript92", "transcript92"),
186 type = "exon",
187 exon_id = c("exon91", "exon92", "exon93")
188 ),
189 800
190 ),
191 # 8
192 shift(
193 GenomicRanges::GRanges(
194 seqnames = "chr1",
195 ranges = IRanges::IRanges(
196 start = c(1, 1, 30),
197 end = c(8, 10, 40)
198 ),
199 strand = "+",
200 gene_id = c("gene101", "gene102", "gene102"),
201 transcript_id = c("transcript101", "transcript102", "transcript102"),
202 type = "exon",
203 exon_id = c("exon101", "exon102", "exon103")
204 ),
205 900
206 ),
207 # 9
208 shift(
209 GenomicRanges::GRanges(
210 seqnames = "chr1",
211 ranges = IRanges::IRanges(
212 start = c(5, 55),
213 end = c(10, 70)
214 ),
215 strand = "+",
216 gene_id = c("gene111", "gene112"),
217 transcript_id = c("transcript111", "transcript112"),
218 type = "exon",
219 exon_id = c("exon111", "exon112")
220 ),
221 1000
222 )
223 )
224 ## Add convergent genes overlapping a unstranded
225 input_gr <- c(
226 input_gr,
227 GenomicRanges::GRanges(
228 seqnames = "chr1",
229 ranges = IRanges::IRanges(
230 start = c(1100, 1110),
231 end = c(1105, 1120)
232 ),
233 strand = c("+", "-"),
234 gene_id = c("gene121", "gene122"),
235 transcript_id = c("transcript121", "transcript122"),
236 type = "exon",
237 exon_id = c("exon121", "exon122")
238 )
239 )
240 big_gr <- c(
241 big_gr,
242 GenomicRanges::GRanges(
243 seqnames = "chr1",
244 ranges = IRanges::IRanges(
245 start = 1103,
246 end = 1113
247 ),
248 strand = "*",
249 gene_id = "geneL",
250 transcript_id = "transcriptL",
251 type = "exon",
252 exon_id = "exonL"
253 )
254 )
255 input_gr$gene_name <- input_gr$gene_id
256 input_gr$gene_name[input_gr$gene_id == "gene111"] <- "Gm001"
257 library(BREW3R.r)
258 new.gr <- extend_granges(input_gr, big_gr)
259 library("rtracklayer")
260 export.gff(input_gr, "input.gtf")
261 export.gff(big_gr, "second_input.gtf")
262 export.gff(sort(new.gr, ignore.strand = TRUE), "output.gtf")