annotate isolab/pkgs/all_ecipex.R @ 3:63b35931859a draft default tip

Uploaded
author metaboflow_cam
date Tue, 22 Oct 2019 04:25:30 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
1
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
2 #' ======================================================================
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
3 #' wl-26-03-2019, Tue: taken from R package 'ecipex'. This is the only
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
4 #' function in the package. https://cran.r-project.org/package=ecipex
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
5 ecipex <- function(formulas, isoinfo = nistiso, limit = 1e-12, id = FALSE, sortby = "abundance") {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
6 sortby <- match.arg(sortby, c("abundance", "mass"))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
7
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
8 # determine which elements are present in each molecule and the corresponding copy number
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
9 #' elementCount <- lapply(formulas, CHNOSZ::count.elements)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
10 elementCount <- lapply(formulas, count.elements)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
11
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
12 # these lists will be used as indices later
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
13 elementList <- lapply(elementCount, names)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
14 copyList <- lapply(elementCount, format, scientific = FALSE, trim = TRUE)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
15
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
16 # treat them as vectors for now
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
17 elements <- unlist(lapply(elementCount, names))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
18 copies <- as.numeric(unlist(elementCount))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
19
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
20 uniqueElements <- unique(unlist(elements))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
21
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
22 # define a list with an entry for each unique element, stating the unique set of copy numbers that will be required for the formulas provided
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
23 compositionList <- lapply(uniqueElements, function(x) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
24 sort(unique(copies[elements == x]))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
25 })
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
26 names(compositionList) <- uniqueElements
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
27
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
28 # determine the maximum copy number for each element in all of the formulas provided. Needed to determine dimensions of isofft array.
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
29 maxElements <- sapply(compositionList, max)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
30
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
31 # define a list with an entry for each unique element containing the isotopic expansions for all copy numbers required by the formulas
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
32 pureIsos <- lapply(seq(uniqueElements), function(x) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
33 abundance <- isoinfo$abundance[isoinfo$element == uniqueElements[x]]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
34 mass <- isoinfo$mass[isoinfo$element == uniqueElements[x]]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
35 nucleons <- isoinfo$nucleons[isoinfo$element == uniqueElements[x]]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
36
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
37 # in case abundances of zero were included
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
38 mass <- mass[abundance > 0]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
39 abundance <- abundance[abundance > 0]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
40
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
41 # dimensions of isofft array will be isoLength^isoDimension
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
42 isoLength <- nextn(maxElements[x] + 1) # may increase length of isofft but makes it more "composite" so that fft can more efficiently be taken
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
43 isoDimension <- length(abundance) - 1
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
44
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
45 # for elements with only one isotopic variant
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
46 if (isoDimension == 0) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
47 abundanceOut <- lapply(seq(length(compositionList[[x]])), function(y) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
48 1
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
49 })
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
50 massOut <- lapply(seq(length(compositionList[[x]])), function(y) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
51 compositionList[[x]][y] * mass
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
52 })
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
53
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
54 names(abundanceOut) <- compositionList[[x]]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
55 names(massOut) <- compositionList[[x]]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
56
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
57 if (!id) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
58 return(list("mass" = massOut, "abundance" = abundanceOut))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
59 }
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
60 if (id) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
61 speciesOut <- lapply(seq(length(compositionList[[x]])), function(y) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
62 s <- matrix(compositionList[[x]][y])
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
63 colnames(s) <- paste(nucleons, uniqueElements[x], sep = "")
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
64 return(s)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
65 })
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
66 names(speciesOut) <- compositionList[[x]]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
67 return(list("mass" = massOut, "abundance" = abundanceOut, "isoSpecies" = speciesOut))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
68 }
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
69 }
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
70
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
71 # define array containing abundances for a single atom of the current element. Its dimensions must be able to accomodate the appropriate maxElements-fold convolution without confounding overlap
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
72 abundanceIn <- array(rep.int(0, isoLength^isoDimension), dim = rep(isoLength, isoDimension))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
73 abundanceIn[1] <- abundance[1]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
74 abundanceIn[1 + isoLength^seq(0, isoDimension - 1)] <- abundance[seq(2, isoDimension + 1)]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
75
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
76 # The output of the fft is stored in a hypercube with isoLength^isoDimension entries. We require only the entries in a series of the simplexes it contains. We therefore define an array of the same dimensions as abundanceIn (isoLength^isoDimension) which is enumerated in such a way that it is easy to extract the simplexes and save memory
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
77 simplexIndex <- seq(0, isoLength - 1, by = 1)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
78 i <- 1
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
79 while (i < isoDimension) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
80 simplexIndex <- outer(simplexIndex, seq(0, isoLength - 1, by = 1), FUN = "+")
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
81 i <- i + 1
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
82 }
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
83
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
84 # perform the actual multidimensional fft convolution to obtain the desired abundances. Also remove abundance-elements lower than than the specified limit to save memory.
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
85 fftIn <- fft(abundanceIn)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
86 convolOut <- lapply(compositionList[[x]], function(n) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
87 arrayOut <- Re(fft(fftIn^n, inverse = TRUE)) / (isoLength)^isoDimension
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
88 species <- which((arrayOut >= limit) & (simplexIndex <= n), arr.ind = TRUE) # these indices enable us to determine which isotopic species we are dealing with
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
89 return(list("abundance" = arrayOut[species], "species" = species))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
90 })
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
91
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
92 # obtain the corresponding masses and remove abundance-elements lower than than the specified limit to save memory.
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
93 massDiffs <- outer(mass[-1] - mass[1], seq(0, isoLength - 1))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
94 output <- massDiffs[1, ]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
95 if (length(mass) > 2) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
96 for (i in 2:nrow(massDiffs)) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
97 output <- outer(output, massDiffs[i, ], FUN = "+")
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
98 }
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
99 }
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
100 massOut <- lapply(seq_along(compositionList[[x]]), function(n) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
101 mass[1] * compositionList[[x]][n] + output[convolOut[[n]]$species]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
102 })
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
103
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
104 # sort entries in order of descending abundance
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
105 descendingOrder <- lapply(seq_along(compositionList[[x]]), function(y) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
106 order(convolOut[[y]]$abundance, decreasing = TRUE)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
107 })
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
108 abundanceOut <- lapply(seq_along(compositionList[[x]]), function(y) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
109 convolOut[[y]]$abundance[descendingOrder[[y]]]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
110 })
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
111 massOut <- lapply(seq_along(compositionList[[x]]), function(y) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
112 massOut[[y]][descendingOrder[[y]]]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
113 })
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
114
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
115 names(abundanceOut) <- format(compositionList[[x]], scientific = FALSE, trim = TRUE)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
116 names(massOut) <- format(compositionList[[x]], scientific = FALSE, trim = TRUE)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
117
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
118 if (!id) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
119 return(list("mass" = massOut, "abundance" = abundanceOut))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
120 }
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
121 if (id) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
122 speciesOut <- lapply(seq_along(compositionList[[x]]), function(y) { # determine the isotopic species of the values listed in massOut and abundanceOut
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
123 specMat <- cbind(compositionList[[x]][y] - apply(as.matrix(convolOut[[y]]$species[descendingOrder[[y]], ]) - 1, 1, sum), convolOut[[y]]$species[descendingOrder[[y]], ] - 1)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
124 colnames(specMat) <- paste(nucleons, uniqueElements[x], sep = "")
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
125 return(specMat)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
126 })
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
127 names(speciesOut) <- format(compositionList[[x]], scientific = FALSE, trim = TRUE)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
128
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
129 return(list("mass" = massOut, "abundance" = abundanceOut, "isoSpecies" = speciesOut))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
130 }
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
131 })
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
132 names(pureIsos) <- uniqueElements
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
133
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
134
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
135 # list that will contain the full isotopic expansion for all formulas
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
136 fullIsos <- lapply(seq(formulas), function(x) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
137
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
138 # extract highest abundance for each atomic species in current formula
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
139 maxProbs <- sapply(seq(elementList[[x]]), function(i) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
140 pureIsos[[elementList[[x]][i]]]$abundance[[copyList[[x]][i]]][1]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
141 })
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
142
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
143 # for each atomic species we immediately discard abundances which are less than the limit after multiplication by highest abundances in all other species. Similarly for masses, and isotopic species
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
144 ma <- lapply(seq(elementList[[x]]), function(i) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
145 z <- pureIsos[[elementList[[x]][i]]]$mass[[copyList[[x]][i]]][pureIsos[[elementList[[x]][i]]]$abundance[[copyList[[x]][i]]] * prod(maxProbs[-i]) > limit]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
146 if (length(z) == 0) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
147 return(pureIsos[[elementList[[x]][i]]]$mass[[copyList[[x]][i]]][which.max(pureIsos[[elementList[[x]][i]]]$abundance[[copyList[[x]][i]]])])
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
148 }
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
149 return(z)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
150 })
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
151 ab <- lapply(seq(elementList[[x]]), function(i) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
152 z <- pureIsos[[elementList[[x]][i]]]$abundance[[copyList[[x]][i]]][pureIsos[[elementList[[x]][i]]]$abundance[[copyList[[x]][i]]] * prod(maxProbs[-i]) > limit]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
153 if (length(z) == 0) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
154 return(max(pureIsos[[elementList[[x]][i]]]$abundance[[copyList[[x]][i]]]))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
155 }
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
156 return(z)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
157 })
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
158 if (id) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
159 is <- lapply(seq(elementList[[x]]), function(i) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
160 if (ncol(pureIsos[[elementList[[x]][i]]]$isoSpecies[[copyList[[x]][i]]]) == 1) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
161 return(pureIsos[[elementList[[x]][i]]]$isoSpecies[[copyList[[x]][i]]])
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
162 } # if there is only 1 isotope
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
163 z <- as.matrix(pureIsos[[elementList[[x]][i]]]$isoSpecies[[copyList[[x]][i]]][pureIsos[[elementList[[x]][i]]]$abundance[[copyList[[x]][i]]] * prod(maxProbs[-i]) > limit, ])
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
164 if (ncol(z) <= 1) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
165 return(t(as.matrix(pureIsos[[elementList[[x]][i]]]$isoSpecies[[copyList[[x]][i]]][which.max(pureIsos[[elementList[[x]][i]]]$abundance[[copyList[[x]][i]]]), ])))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
166 }
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
167 return(z)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
168 })
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
169 }
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
170
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
171 # these vectors will contain full isotopic expansions, but initially they contain the first elemental expansion
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
172 moleculeMass <- ma[[1]]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
173 moleculeAbundance <- ab[[1]]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
174 if (id) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
175 moleculeSpecies <- is[[1]]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
176 }
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
177
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
178 # fold them an appropriate number of times
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
179 # each time we fold, a number of abundances will fall below limit and these are excluded. Furthermore, we can exclude abundances that are less than the limit after they have been multiplied by the product of the highest abundances in the remaining atomic species
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
180 remainingMaxAbundance <- c(cumprod(maxProbs[c(length(maxProbs):1)])[c(length(maxProbs):1)], 1) # compare with e.g. prod(maxProbs[c(3:5)])
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
181 i <- 2
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
182 while (i <= length(elementList[[x]])) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
183 moleculeMass <- outer(moleculeMass, pureIsos[[elementList[[x]][i]]]$mass[[copyList[[x]][i]]], FUN = "+")
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
184 moleculeAbundance <- outer(moleculeAbundance, pureIsos[[elementList[[x]][i]]]$abundance[[copyList[[x]][i]]])
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
185 keepers <- which(moleculeAbundance > limit / remainingMaxAbundance[i + 1], arr.ind = TRUE)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
186
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
187 moleculeMass <- moleculeMass[keepers]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
188 moleculeAbundance <- moleculeAbundance[keepers]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
189
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
190 if (id) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
191 moleculeSpecies <- cbind(matrix(moleculeSpecies[keepers[, 1], ], ncol = ncol(moleculeSpecies), dimnames = list(NULL, colnames(moleculeSpecies))), matrix(pureIsos[[elementList[[x]][i]]]$isoSpecies[[copyList[[x]][i]]][keepers[, 2], ], ncol = ncol(pureIsos[[elementList[[x]][i]]]$isoSpecies[[copyList[[x]][i]]]), dimnames = list(NULL, colnames(pureIsos[[elementList[[x]][i]]]$isoSpecies[[copyList[[x]][i]]]))))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
192 }
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
193 i <- i + 1
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
194 }
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
195
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
196 if (sortby == "abundance") {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
197 dex <- order(moleculeAbundance, decreasing = TRUE)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
198 }
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
199 if (sortby == "mass") {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
200 dex <- order(moleculeMass, decreasing = FALSE)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
201 }
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
202
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
203 if (id) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
204 return(cbind(data.frame("mass" = moleculeMass[dex], "abundance" = moleculeAbundance[dex]), matrix(moleculeSpecies[dex, ], ncol = ncol(moleculeSpecies), dimnames = list(NULL, colnames(moleculeSpecies)))))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
205 }
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
206 if (!id) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
207 return(data.frame("mass" = moleculeMass[dex], "abundance" = moleculeAbundance[dex]))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
208 }
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
209 })
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
210 names(fullIsos) <- formulas
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
211
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
212 return(fullIsos)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
213 }
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
214
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
215 #' ======================================================================
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
216 #' wl-26-03-2019, Tue: From function 'makeup.R' of R package 'CHNOSZ'
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
217 #' https://cran.r-project.org/package=CHNOSZ
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
218 count.elements <- function(formula) {
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
219 # count the elements in a chemical formula 20120111 jmd
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
220 # this function expects a simple formula,
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
221 # no charge or parenthetical or suffixed subformulas
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
222 # regular expressions inspired by an answer on
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
223 # http://stackoverflow.com/questions/4116786/parsing-a-chemical-formula-from-a-string-in-c
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
224 #elementRegex <- "([A-Z][a-z]*)([0-9]*)"
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
225 elementSymbol <- "([A-Z][a-z]*)"
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
226 # here, element coefficients can be signed (+ or -) and have a decimal point
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
227 elementCoeff <- "((\\+|-|\\.|[0-9])*)"
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
228 elementRegex <- paste(elementSymbol, elementCoeff, sep="")
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
229 # stop if it doesn't look like a chemical formula
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
230 validateRegex <- paste("^(", elementRegex, ")+$", sep="")
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
231 if(length(grep(validateRegex, formula)) == 0)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
232 stop(paste("'",formula,"' is not a simple chemical formula", sep="", collapse="\n"))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
233 # where to put the output
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
234 element <- character()
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
235 count <- numeric()
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
236 # from now use "f" for formula to make writing the code easier
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
237 f <- formula
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
238 # we want to find the starting positions of all the elemental symbols
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
239 # make substrings, starting at every position in the formula
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
240 fsub <- sapply(1:nchar(f), function(i) substr(f, i, nchar(f)))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
241 # get the numbers (positions) that start with an elemental symbol
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
242 # i.e. an uppercase letter
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
243 ielem <- grep("^[A-Z]", fsub)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
244 # for each elemental symbol, j is the position before the start of the next
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
245 # symbol (or the position of the last character of the formula)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
246 jelem <- c(tail(ielem - 1, -1), nchar(f))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
247 # assemble the stuff: each symbol-coefficient combination
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
248 ec <- sapply(seq_along(ielem), function(i) substr(f, ielem[i], jelem[i]))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
249 # get the individual element symbols and coefficients
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
250 myelement <- gsub(elementCoeff, "", ec)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
251 mycount <- as.numeric(gsub(elementSymbol, "", ec))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
252 # any missing coefficients are unity
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
253 mycount[is.na(mycount)] <- 1
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
254 # append to the output
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
255 element <- c(element, myelement)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
256 count <- c(count, mycount)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
257 # in case there are repeated elements, sum all of their counts
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
258 # (tapply hint from https://stat.ethz.ch/pipermail/r-help/2011-January/265341.html)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
259 # use simplify=FALSE followed by unlist to get a vector, not array 20171005
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
260 out <- unlist(tapply(count, element, sum, simplify=FALSE))
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
261 # tapply returns alphabetical sorted list. keep the order appearing in the formula
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
262 out <- out[match(unique(element), names(out))]
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
263 return(out)
63b35931859a Uploaded
metaboflow_cam
parents:
diff changeset
264 }