comparison buildHGVS.R @ 0:c12a4d187121 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hgvsparser/ commit f9deb29cdbd2d2a5f2f4fbd470b1078431a36ae0
author iuc
date Fri, 07 Jun 2024 15:21:07 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c12a4d187121
1 # Copyright (C) 2018 Jochen Weile, Roth Lab
2 #
3 # This file is part of hgvsParseR.
4 #
5 # hgvsParseR is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
9 #
10 # hgvsParseR is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with hgvsParseR. If not, see <https://www.gnu.org/licenses/>.
17
18 #' Genomic HGVS Builder
19 #'
20 #' A constructor for a genomic-level HGVS builder object. The object contains a collection of functions
21 #' for building genomic HGVS strings.
22 #'
23 #' The resulting object encapsulates the following functions:
24 #' \itemize{
25 #' \item{substitution(pos,ancestral,variant)} Genomic substitution variants.
26 #' pos = position (integer); ancestral = ancestral nucleotide [ACGT];
27 #' variant = variant nucleotide [ACGT]
28 #' \item{deletion(start,stop)} Genomic deletion. start = start position (integer);
29 #' stop = stop position (integer)
30 #' \item{inversion(start,stop)} Genomic inversion. start = start position (integer);
31 #' stop = stop position (integer)
32 #' \item{duplication(start,stop)} Genomic duplication. start = start position (integer);
33 #' stop = stop position (integer)
34 #' \item{insertion(start,variant)} Genomic insertion. start = position immediately preceeding
35 #' the insertion (integer); seq = inserted nucleotide sequence [ACGT]+
36 #' \item{delins(start,stop,variant)} Genomic deletion and insertion. start = start position (integer);
37 #' stop = stop position relative to the reference (integer); seq = inserted nucleotide sequence [ACGT]+
38 #' \item{cis(...)} Multi-variant phased in cis. Parameters are genomic HGVS strings for the
39 #' corresponding single mutants
40 #' \item{trans(...)} Multi-variant phased in trans. Parameters are genomic HGVS strings for the
41 #' corresponding single mutants
42 #' \item{nophase(...)} Multi-variant with unknown phasing. Parameters are genomic HGVS strings for the
43 #' corresponding single mutants
44 #' }
45 #'
46 #' @return A \code{hgvs.builder.g} object with functions for building genomic HGVS strings.
47 #' The individual functions return single-element character vectors containing these strings.
48 #' @keywords HGVS builder
49 #' @export
50 #' @examples
51 #' builder <- new.hgvs.builder.g()
52 #' string1 <- builder$substitution(123,"A","G")
53 #' string2 <- builder$delins(123,129,"ATTG")
54 #' string3 <- with(builder,cis(substitution(123,"A","C"),substitution(231,"G","A")))
55
56 new.hgvs.builder.g <- function() {
57
58 substitution <- function(pos,ancestral,variant) {
59 if (!is.numeric(pos) || pos < 1) stop("position must be a positive integer")
60 if (!is.character(ancestral) || !(ancestral %in% c("A","C","G","T"))) stop("ancestral must be single nucleotide")
61 if (!is.character(variant) || !(variant %in% c("A","C","G","T"))) stop("variant must be single nucleotide")
62 paste0("g.",pos,ancestral,">",variant)
63 }
64
65 deletion <- function(start,stop) {
66 if (!is.numeric(start)) stop("start must be an integer")
67 if (!is.numeric(stop)) stop("stop must be an integer")
68 if (start > stop) stop("start must be upstream of stop")
69 paste0("g.",start,"_",stop,"del")
70 }
71
72 inversion <- function(start,stop) {
73 if (!is.numeric(start)) stop("start must be an integer")
74 if (!is.numeric(stop)) stop("stop must be an integer")
75 if (start > stop) stop("start must be upstream of stop")
76 paste0("g.",start,"_",stop,"inv")
77 }
78
79 duplication <- function(start,stop) {
80 if (!is.numeric(start)) stop("start must be an integer")
81 if (!is.numeric(stop)) stop("stop must be an integer")
82 if (start > stop) stop("start must be upstream of stop")
83 paste0("g.",start,"_",stop,"dup")
84 }
85
86 insertion <- function(start,seq) {
87 if (!is.numeric(start)) stop("start must be an integer")
88 if (!is.character(seq) || regexpr("^[ACGT]+$",seq) < 1) stop("variant must be nucleotide sequence")
89 paste0("g.",start,"_",start+1,"ins",seq)
90 }
91
92 delins <- function(start,stop,seq) {
93 if (!is.numeric(start)) stop("start must be an integer")
94 if (!is.numeric(stop)) stop("stop must be an integer")
95 if (start > stop) stop("start must be upstream of stop")
96 if (!is.character(seq) || regexpr("^[ACGT]+$",seq) < 1) stop("variant must be nucleotide sequence")
97 paste0("g.",start,"_",stop,"delins",seq)
98 }
99
100 cis <- function(...) {
101 strings <- list(...)
102 if (!all(sapply(strings,is.character))) stop("all arguments must be HGVS strings")
103 strings <- unlist(strings)
104 if (!all(substr(strings,1,2)=="g.")) stop("all arguments must be genomic HGVS strings")
105 bodies <- substr(strings,3,nchar(strings))
106 paste0("g.[",paste(bodies,collapse=";"),"]")
107 }
108
109 trans <- function(...) {
110 strings <- list(...)
111 if (!all(sapply(strings,is.character))) stop("all arguments must be HGVS strings")
112 strings <- unlist(strings)
113 if (!all(substr(strings,1,2)=="g.")) stop("all arguments must be genomic HGVS strings")
114 bodies <- substr(strings,3,nchar(strings))
115 paste0("g.[",paste(bodies,collapse="];["),"]")
116 }
117
118 nophase <- function(...) {
119 strings <- list(...)
120 if (!all(sapply(strings,is.character))) stop("all arguments must be HGVS strings")
121 strings <- unlist(strings)
122 if (!all(substr(strings,1,2)=="g.")) stop("all arguments must be genomic HGVS strings")
123 bodies <- substr(strings,3,nchar(strings))
124 paste0("g.[",paste(bodies,collapse="(;)"),"]")
125 }
126
127 return(structure(list(
128 substitution=substitution,
129 deletion=deletion,
130 inversion=inversion,
131 duplication=duplication,
132 insertion=insertion,
133 delins=delins,
134 cis=cis,
135 trans=trans,
136 nophase=nophase
137 ),class="hgvs.builder.g"))
138 }
139
140 print.hgvs.builder.g <- function() {
141 cat("Genomic HGVS string builder. Use $ operator to access functions.")
142 }
143
144
145
146 #' Coding Sequence HGVS Builder
147 #'
148 #' A constructor for a CDS (=coding sequence) HGVS builder object. The object contains a collection of functions
149 #' for building CDS HGVS strings.
150 #' The resulting object encapsulates the following functions:
151 #' \itemize{
152 #' \item{substitution(pos,ancestral,variant,posOffset=0)} CDS substitution variants.
153 #' pos = position (integer); ancestral = ancestral nucleotide [ACGT];
154 #' variant = variant nucleotide [ACGT]; posOffset = offset from the position when
155 #' crossing exon-intron borders (integer, defaults to 0)
156 #' \item{deletion(start,stop,startOffset=0,stopOffset=0)} CDS deletion. start = start position (integer);
157 #' stop = stop position (integer); startOffset = offset from the start position when
158 #' crossing exon-intron borders (integer, defaults to 0); stopOffset = offset from the
159 #' stop position when crossing exon-intron borders (integer, defaults to 0)
160 #' \item{inversion(start,stop,startOffset=0,stopOffset=0)} CDS inversion. start = start position (integer);
161 #' stop = stop position (integer); startOffset = offset from the start position when
162 #' crossing exon-intron borders (integer, defaults to 0); stopOffset = offset from the
163 #' stop position when crossing exon-intron borders (integer, defaults to 0)
164 #' \item{duplication(start,stop,startOffset=0,stopOffset=0)} CDS duplication. start = start position (integer);
165 #' stop = stop position (integer); startOffset = offset from the start position when
166 #' crossing exon-intron borders (integer, defaults to 0); stopOffset = offset from the
167 #' stop position when crossing exon-intron borders (integer, defaults to 0)
168 #' \item{insertion(start,variant,startOffset=0)} CDS insertion. start = position immediately preceeding
169 #' the insertion (integer); seq = inserted nucleotide sequence [ACGT]+ ;
170 #' startOffset = offset from the start position when crossing exon-intron borders
171 #' (integer, defaults to 0)
172 #' \item{delins(start,stop,variant,startOffset=0,stopOffset=0)} CDS deletion and insertion. start = start position (integer);
173 #' stop = stop position relative to the reference (integer);
174 #' seq = inserted nucleotide sequence [ACGT]+ ; startOffset = offset from the start position when
175 #' crossing exon-intron borders (integer, defaults to 0); stopOffset = offset from the
176 #' stop position when crossing exon-intron borders (integer, defaults to 0)
177 #' \item{cis(...)} Multi-variant phased in cis. Parameters are coding HGVS strings for the
178 #' corresponding single mutants
179 #' \item{trans(...)} Multi-variant phased in trans. Parameters are coding HGVS strings for the
180 #' corresponding single mutants
181 #' \item{nophase(...)} Multi-variant with unknown phasing. Parameters are coding HGVS strings for the
182 #' corresponding single mutants
183 #' }
184 #'
185 #' @return A \code{hgvs.builder.c} object with functions for building coding HGVS strings.
186 #' The individual functions return single-element character vectors containing these strings.
187 #' @keywords HGVS builder
188 #' @export
189 #' @examples
190 #' builder <- new.hgvs.builder.c()
191 #' string1 <- builder$substitution(123,"A","G",posOffset=2)
192 #' string2 <- builder$delins(123,129,"ATTG")
193 #' string3 <- with(builder,cis(substitution(123,"A","C"),substitution(231,"G","A")))
194
195 new.hgvs.builder.c <- function() {
196
197 offsetStr <- function(offset) {
198 if (offset==0) {
199 ""
200 } else if (offset > 0) {
201 paste0("+",offset)
202 } else if (offset < 0) {
203 as.character(offset)
204 }
205 }
206
207 substitution <- function(pos,ancestral,variant,posOffset=0) {
208 if (!is.numeric(pos) || pos < 1) stop("position must be a positive integer")
209 if (!is.numeric(posOffset)) stop("offset must be an integer")
210 if (!is.character(ancestral) || !(ancestral %in% c("A","C","G","T"))) stop("ancestral must be single nucleotide")
211 if (!is.character(variant) || !(variant %in% c("A","C","G","T"))) stop("variant must be single nucleotide")
212 paste0("c.",pos,offsetStr(posOffset),ancestral,">",variant)
213 }
214
215 deletion <- function(start,stop,startOffset=0,stopOffset=0) {
216 if (!is.numeric(start)) stop("start must be an integer")
217 if (!is.numeric(stop)) stop("stop must be an integer")
218 if (!is.numeric(startOffset)) stop("offset must be an integer")
219 if (!is.numeric(stopOffset)) stop("offset must be an integer")
220 if (start+startOffset > stop+stopOffset) stop("start must be before stop")
221 paste0("c.",start,offsetStr(startOffset),"_",stop,offsetStr(stopOffset),"del")
222 }
223
224 inversion <- function(start,stop,startOffset=0,stopOffset=0) {
225 if (!is.numeric(start)) stop("start must be an integer")
226 if (!is.numeric(stop)) stop("stop must be an integer")
227 if (!is.numeric(startOffset)) stop("offset must be an integer")
228 if (!is.numeric(stopOffset)) stop("offset must be an integer")
229 if (start+startOffset >= stop+stopOffset) stop("start must be before stop")
230 paste0("c.",start,offsetStr(startOffset),"_",stop,offsetStr(stopOffset),"inv")
231 }
232
233 duplication <- function(start,stop,startOffset=0,stopOffset=0) {
234 if (!is.numeric(start)) stop("start must be an integer")
235 if (!is.numeric(stop)) stop("stop must be an integer")
236 if (!is.numeric(startOffset)) stop("offset must be an integer")
237 if (!is.numeric(stopOffset)) stop("offset must be an integer")
238 if (start+startOffset > stop+stopOffset) stop("start must be before stop")
239 paste0("c.",start,offsetStr(startOffset),"_",stop,offsetStr(stopOffset),"dup")
240 }
241
242 insertion <- function(start,seq,startOffset=0) {
243 if (!is.numeric(start)) stop("start must be an integer")
244 if (!is.character(seq) || regexpr("^[ACGT]+$",seq) < 1) {
245 stop("variant must be nucleotide sequence")
246 }
247 if (!is.numeric(startOffset)) stop("offset must be an integer")
248 stop <- if (startOffset != 0) start else start+1
249 stopOffset <- if (startOffset != 0) startOffset+1 else startOffset
250 paste0("c.",start,offsetStr(startOffset),"_",stop,offsetStr(stopOffset),"ins",seq)
251 }
252
253 delins <- function(start,stop,seq,startOffset=0,stopOffset=0) {
254 if (!is.numeric(start)) stop("start must be an integer")
255 if (!is.numeric(stop)) stop("stop must be an integer")
256 if (!is.numeric(startOffset)) stop("offset must be an integer")
257 if (!is.numeric(stopOffset)) stop("offset must be an integer")
258 if (start+startOffset > stop+stopOffset) stop("start must be before stop")
259 if (!is.character(seq) || regexpr("^[ACGT]+$",seq) < 1) stop("variant must be nucleotide sequence")
260 paste0("c.",start,offsetStr(startOffset),"_",stop,offsetStr(stopOffset),"delins",seq)
261 }
262
263 cis <- function(...) {
264 strings <- list(...)
265 if (!all(sapply(strings,is.character))) stop("all arguments must be HGVS strings")
266 strings <- unlist(strings)
267 if (!all(substr(strings,1,2)=="c.")) stop("all arguments must be coding HGVS strings")
268 bodies <- substr(strings,3,nchar(strings))
269 paste0("c.[",paste(bodies,collapse=";"),"]")
270 }
271
272 trans <- function(...) {
273 strings <- list(...)
274 if (!all(sapply(strings,is.character))) stop("all arguments must be HGVS strings")
275 strings <- unlist(strings)
276 if (!all(substr(strings,1,2)=="c.")) stop("all arguments must be coding HGVS strings")
277 bodies <- substr(strings,3,nchar(strings))
278 paste0("c.[",paste(bodies,collapse="];["),"]")
279 }
280
281 nophase <- function(...) {
282 strings <- list(...)
283 if (!all(sapply(strings,is.character))) stop("all arguments must be HGVS strings")
284 strings <- unlist(strings)
285 if (!all(substr(strings,1,2)=="c.")) stop("all arguments must be coding HGVS strings")
286 bodies <- substr(strings,3,nchar(strings))
287 paste0("c.[",paste(bodies,collapse="(;)"),"]")
288 }
289
290 return(structure(list(
291 substitution=substitution,
292 deletion=deletion,
293 inversion=inversion,
294 duplication=duplication,
295 insertion=insertion,
296 delins=delins,
297 cis=cis,
298 trans=trans,
299 nophase=nophase
300 ),class="hgvs.builder.c"))
301 }
302
303 print.hgvs.builder.c <- function() {
304 cat("Coding-sequence HGVS string builder. Use $ operator to access functions.")
305 }
306
307
308
309 #' Protein HGVS Builder
310 #'
311 #' A constructor for a protein-level HGVS builder object. The object contains a collection of functions
312 #' for building protein HGVS strings.
313 #'
314 #' The resulting object encapsulates the following functions:
315 #' \itemize{
316 #' \item{synonymous()} A synonymous variant. No parameters required.
317 #' \item{synonymous(pos,ancestral)} Unofficial (yet frequently used) version of synonymous variant syntax.
318 #' pos = position (integer); ancestral = ancestral amino acid in one-letter or three-letter code.
319 #' \item{substitution(pos,ancestral,variant)} AA substitution variants.
320 #' pos = position (integer); ancestral = ancestral amino acid in one-letter or three-letter code;
321 #' variant = variant amino acid in one-letter or three-letter code
322 #' \item{deletion(startPos,startAA,endPos,endAA)} AA deletion. startPos = start position (integer);
323 #' startAA = start amino acid in one-letter or three-letter code;
324 #' endPos = stop position (integer); endAA = start amino acid in one-letter or three-letter code
325 #' \item{duplication(startPos,startAA,endPos,endAA)} AA duplication. startPos = start position (integer);
326 #' startAA = start amino acid in one-letter or three-letter code;
327 #' endPos = stop position (integer); endAA = start amino acid in one-letter or three-letter code
328 #' \item{insertion(leftPos,leftAA,rightAA,seq)} AA insertion. leftPos = position immediately preceeding
329 #' the insertion (integer); leftAA = corresponding amino acid in one-letter or three-letter code;
330 #' rightAA = amino acid to the right of the insertion, in one-letter or three-letter code;
331 #' seq = inserted amino acid sequence, given as a character vector containing the individual
332 #' one-letter or three-letter amino acid codes.
333 #' \item{delins(startPos,startAA,endPos,endAA,seq)} AA deletion and insertion.
334 #' startPos = start position (integer);
335 #' startAA = start amino acid in one-letter or three-letter code;
336 #' endPos = stop position (integer); endAA = start amino acid in one-letter or three-letter code;
337 #' seq = inserted amino acid sequence, given as a character vector containing the individual
338 #' one-letter or three-letter amino acid codes.
339 #' \item{frameshift(startPos,startAA,variantAA=NA,newStop=NA)} Frameshift variant.
340 #' startPos = start position (integer);
341 #' startAA = start amino acid in one-letter or three-letter code;
342 #' variantAA = amino acid replacing the start position in the frameshift sequence,
343 #' given in one-letter or three-letter code, or \code{NA} to omit (default);
344 #' newStop = the position of the nearest coding resulting from the frameshift,
345 #' or \code{NA} to omit (default).
346 #' \item{cis(...)} Multi-variant phased in cis. Parameters are coding HGVS strings for the
347 #' corresponding single mutants. As phasing in trans would be nonsensical in a protein context,
348 #' the \code{trans()} and \code{nophase()} methods are not provided here.
349 #' }
350 #'
351 #' @return A \code{hgvs.builder.g} object with functions for building genomic HGVS strings.
352 #' The individual functions return single-element character vectors containing these strings.
353 #' @keywords HGVS builder
354 #' @export
355 #' @examples
356 #' builder <- new.hgvs.builder.g()
357 #' string1 <- builder$substitution(123,"R","K")
358 #' string2 <- builder$delins(123,"Arg",152,"Leu",c("Lys","Trp","Ser"))
359 #' string3 <- with(builder,cis(substitution(123,"R","K"),deletion(125,"S",152,"L")))
360
361
362 new.hgvs.builder.p <- function(aacode=c(1,3)) {
363
364 aacode <- aacode[[1]]
365 if (!is.numeric(aacode) && !(aacode %in% c(1,3))) {
366 stop("Invalid aacode parameter, only 1 or 3 allowed!")
367 }
368
369 one2three <- c(A="Ala",C="Cys",D="Asp",E="Glu",F="Phe",G="Gly",H="His",
370 I="Ile",K="Lys",L="Leu",M="Met",N="Asn",P="Pro",Q="Gln",R="Arg",
371 S="Ser",T="Thr",V="Val",W="Trp",Y="Tyr",`*`="Ter")
372 three2one <- c(Ala="A",Arg="R",Asn="N",Asp="D",Cys="C",Gln="Q",Glu="E",
373 Gly="G",His="H",Ile="I",Leu="L",Lys="K",Met="M",Phe="F",Pro="P",
374 Ser="S",Thr="T",Trp="W",Tyr="Y",Val="V",Ter="*")
375
376 enforceCode <- function(aa) {
377 if (aa %in% one2three) {
378 if (aacode == 1) {
379 three2one[[aa]]
380 } else {
381 aa
382 }
383 } else if (aa %in% three2one) {
384 if (aacode == 1) {
385 aa
386 } else {
387 one2three[[aa]]
388 }
389 } else {
390 stop("Invalid AA code")
391 }
392 }
393
394 synonymous <- function(pos=NULL,ancestral=NULL) {
395 if (is.null(pos) || is.null(ancestral)) {
396 return("p.=")
397 }
398 if (!is.numeric(pos) || pos < 1) stop("position must be a positive integer")
399 if (!is.character(ancestral) || !(ancestral %in% c(one2three,three2one))) stop("ancestral must be single amimo acid")
400 ancestral <- enforceCode(ancestral)
401 paste0("p.",ancestral,pos,"=")
402 }
403
404 substitution <- function(pos,ancestral,variant) {
405 if (!is.numeric(pos) || pos < 1) stop("position must be a positive integer")
406 if (!is.character(ancestral) || !(ancestral %in% c(one2three,three2one))) stop("ancestral must be single amimo acid")
407 if (!is.character(variant) || !(variant %in% c(one2three,three2one))) stop("variant must be single amino acid")
408 ancestral <- enforceCode(ancestral)
409 variant <- enforceCode(variant)
410 paste0("p.",ancestral,pos,variant)
411 }
412
413 deletion <- function(startPos,startAA,endPos,endAA) {
414 if (!is.numeric(startPos) || startPos < 1) stop("position must be a positive integer")
415 if (!is.numeric(endPos) || endPos < 1) stop("position must be a positive integer")
416 if (startPos > endPos) stop("start must be upstream of stop")
417 if (!is.character(startAA) || !(startAA %in% c(one2three,three2one))) stop("startAA must be single amimo acid")
418 if (!is.character(endAA) || !(endAA %in% c(one2three,three2one))) stop("endAA must be single amimo acid")
419 startAA <- enforceCode(startAA)
420 endAA <- enforceCode(endAA)
421 if (startPos==endPos) {
422 paste0("p.",startAA,startPos,"del")
423 } else {
424 paste0("p.",startAA,startPos,"_",endAA,endPos,"del")
425 }
426 }
427
428 duplication <- function(startPos,startAA,endPos,endAA) {
429 if (!is.numeric(startPos) || startPos < 1) stop("position must be a positive integer")
430 if (!is.numeric(endPos) || endPos < 1) stop("position must be a positive integer")
431 if (startPos >= endPos) stop("start must be upstream of stop")
432 if (!is.character(startAA) || !(startAA %in% c(one2three,three2one)))
433 stop("startAA must be single amimo acid")
434 if (!is.character(endAA) || !(endAA %in% c(one2three,three2one)))
435 stop("endAA must be single amimo acid")
436 startAA <- enforceCode(startAA)
437 endAA <- enforceCode(endAA)
438 paste0("p.",startAA,startPos,"_",endAA,endPos,"dup")
439 }
440
441 insertion <- function(leftPos,leftAA,rightAA,seq) {
442 if (!is.numeric(leftPos) || leftPos < 1) stop("position must be a positive integer")
443 if (!is.character(leftAA) || !(leftAA %in% c(one2three,three2one)))
444 stop("leftAA must be single amimo acid")
445 if (!is.character(rightAA) || !(rightAA %in% c(one2three,three2one)))
446 stop("rightAA must be single amimo acid")
447 if (!is.character(seq) || !all(sapply(seq,function(x) x %in% c(one2three,three2one))))
448 stop("seq must be a vector of amino acids")
449 rightPos <- leftPos+1
450 leftAA <- enforceCode(leftAA)
451 rightAA <- enforceCode(rightAA)
452 seq <- paste(sapply(seq,enforceCode),collapse="")
453 paste0("p.",leftAA,leftPos,"_",rightAA,rightPos,"ins",seq)
454 }
455
456 delins <- function(startPos,startAA,endPos,endAA,seq) {
457 if (!is.numeric(startPos) || startPos < 1) stop("position must be a positive integer")
458 if (!is.numeric(endPos) || endPos < 1) stop("position must be a positive integer")
459 if (startPos > endPos) stop("start must be upstream of stop")
460 if (!is.character(startAA) || !(startAA %in% c(one2three,three2one)))
461 stop("startAA must be single amimo acid")
462 if (!is.character(endAA) || !(endAA %in% c(one2three,three2one)))
463 stop("endAA must be single amimo acid")
464 if (!is.character(seq) || !all(sapply(seq,function(x) x %in% c(one2three,three2one))))
465 stop("seq must be a vector of amino acids")
466 startAA <- enforceCode(startAA)
467 endAA <- enforceCode(endAA)
468 seq <- paste(sapply(seq,enforceCode),collapse="")
469 paste0("p.",startAA,startPos,"_",endAA,endPos,"delins",seq)
470 }
471
472 frameshift <- function(startPos,startAA,variantAA=NA,newStop=NA) {
473 if (!is.numeric(startPos) || startPos < 1) stop("position must be a positive integer")
474 if (!is.na(newStop) && (!is.numeric(newStop) || newStop < 1)) stop("position must be a positive integer")
475 if (!is.character(startAA) || !(startAA %in% c(one2three,three2one)))
476 stop("startAA must be single amimo acid or NA")
477 if (!is.na(variantAA) && (!is.character(startAA) || !(startAA %in% c(one2three,three2one))))
478 stop("variantAA must be single amimo acid or NA")
479 startAA <- enforceCode(startAA)
480 if (is.na(variantAA)) {
481 variantAA <- ""
482 } else {
483 variantAA <- enforceCode(variantAA)
484 }
485 if (is.na(newStop)) {
486 newStop <- ""
487 } else {
488 newStop <- paste0("*",newStop)
489 }
490 paste0("p.",startAA,startPos,variantAA,"fs",newStop)
491 }
492
493 cis <- function(...) {
494 strings <- list(...)
495 if (!all(sapply(strings,is.character))) stop("all arguments must be HGVS strings")
496 strings <- unlist(strings)
497 if (!all(substr(strings,1,2)=="p.")) stop("all arguments must be protein HGVS strings")
498 bodies <- substr(strings,3,nchar(strings))
499 paste0("p.[",paste(bodies,collapse=";"),"]")
500 }
501
502 return(structure(list(
503 synonymous=synonymous,
504 substitution=substitution,
505 deletion=deletion,
506 duplication=duplication,
507 insertion=insertion,
508 delins=delins,
509 frameshift=frameshift,
510 cis=cis
511 ),class="hgvs.builder.p"))
512 }