comparison utils.R @ 0:57c644d3f24c draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 19de0924a65bc65cbbf7c1fc17e9b5348305f95c
author recetox
date Fri, 10 Jun 2022 10:17:46 +0000
parents
children abe783e0daca
comparison
equal deleted inserted replaced
-1:000000000000 0:57c644d3f24c
1 library(recetox.aplcms)
2
3 align_features <- function(sample_names, ...) {
4 aligned <- feature.align(...)
5 feature_names <- seq_len(nrow(aligned$pk.times))
6
7 list(
8 mz_tolerance = as.numeric(aligned$mz.tol),
9 rt_tolerance = as.numeric(aligned$chr.tol),
10 rt_crosstab = as_feature_crosstab(feature_names, sample_names, aligned$pk.times),
11 int_crosstab = as_feature_crosstab(feature_names, sample_names, aligned$aligned.ftrs)
12 )
13 }
14
15 get_sample_name <- function(filename) {
16 tools::file_path_sans_ext(basename(filename))
17 }
18
19 as_feature_crosstab <- function(feature_names, sample_names, data) {
20 colnames(data) <- c("mz", "rt", "mz_min", "mz_max", sample_names)
21 rownames(data) <- feature_names
22 as.data.frame(data)
23 }
24
25 as_feature_sample_table <- function(rt_crosstab, int_crosstab) {
26 feature_names <- rownames(rt_crosstab)
27 sample_names <- colnames(rt_crosstab)[- (1:4)]
28
29 feature_table <- data.frame(
30 feature = feature_names,
31 mz = rt_crosstab[, 1],
32 rt = rt_crosstab[, 2]
33 )
34
35 # series of conversions to produce a table type from data.frame
36 rt_crosstab <- as.table(as.matrix(rt_crosstab[, - (1:4)]))
37 int_crosstab <- as.table(as.matrix(int_crosstab[, - (1:4)]))
38
39 crosstab_axes <- list(feature = feature_names, sample = sample_names)
40 dimnames(rt_crosstab) <- dimnames(int_crosstab) <- crosstab_axes
41
42 x <- as.data.frame(rt_crosstab, responseName = "sample_rt")
43 y <- as.data.frame(int_crosstab, responseName = "sample_intensity")
44
45 data <- merge(x, y, by = c("feature", "sample"))
46 data <- merge(feature_table, data, by = "feature")
47 data
48 }
49
50 load_features <- function(files) {
51 files_list <- sort_samples_by_acquisition_number(files)
52 features <- lapply(files_list, arrow::read_parquet)
53 features <- lapply(features, as.matrix)
54 return(features)
55 }
56
57 save_data_as_parquet_files <- function(data, subdir) {
58 dir.create(subdir)
59 for (i in 0:(length(data) - 1)) {
60 filename <- file.path(subdir, paste0(subdir, "_features_", i, ".parquet"))
61 arrow::write_parquet(as.data.frame(data[i + 1]), filename)
62 }
63 }
64
65 save_aligned_features <- function(aligned, rt_file, int_file, tol_file) {
66 arrow::write_parquet(as.data.frame(aligned$rt_crosstab), rt_file)
67 arrow::write_parquet(as.data.frame(aligned$int_crosstab), int_file)
68
69 mz_tolerance <- c(aligned$mz_tolerance)
70 rt_tolerance <- c(aligned$rt_tolerance)
71 arrow::write_parquet(data.frame(mz_tolerance, rt_tolerance), tol_file)
72 }
73
74 load_aligned_features <- function(rt_file, int_file, tol_file) {
75 rt_cross_table <- arrow::read_parquet(rt_file)
76 int_cross_table <- arrow::read_parquet(int_file)
77 tolerances_table <- arrow::read_parquet(tol_file)
78
79 result <- list()
80 result$mz_tolerance <- tolerances_table$mz_tolerance
81 result$rt_tolerance <- tolerances_table$rt_tolerance
82 result$rt_crosstab <- rt_cross_table
83 result$int_crosstab <- int_cross_table
84 return(result)
85 }
86
87 recover_signals <- function(cluster,
88 filenames,
89 extracted,
90 corrected,
91 aligned,
92 mz_tol = 1e-05,
93 mz_range = NA,
94 rt_range = NA,
95 use_observed_range = TRUE,
96 min_bandwidth = NA,
97 max_bandwidth = NA,
98 recover_min_count = 3) {
99 if (!is(cluster, "cluster")) {
100 cluster <- parallel::makeCluster(cluster)
101 on.exit(parallel::stopCluster(cluster))
102 }
103
104 clusterExport(cluster, c("extracted", "corrected", "aligned", "recover.weaker"))
105 clusterEvalQ(cluster, library("splines"))
106
107 recovered <- parLapply(cluster, seq_along(filenames), function(i) {
108 recover.weaker(
109 loc = i,
110 filename = filenames[[i]],
111 this.f1 = extracted[[i]],
112 this.f2 = corrected[[i]],
113 pk.times = aligned$rt_crosstab,
114 aligned.ftrs = aligned$int_crosstab,
115 orig.tol = mz_tol,
116 align.mz.tol = aligned$mz_tolerance,
117 align.chr.tol = aligned$rt_tolerance,
118 mz.range = mz_range,
119 chr.range = rt_range,
120 use.observed.range = use_observed_range,
121 bandwidth = 0.5,
122 min.bw = min_bandwidth,
123 max.bw = max_bandwidth,
124 recover.min.count = recover_min_count
125 )
126 })
127
128 feature_table <- aligned$rt_crosstab[, 1:4]
129 rt_crosstab <- cbind(feature_table, sapply(recovered, function(x) x$this.times))
130 int_crosstab <- cbind(feature_table, sapply(recovered, function(x) x$this.ftrs))
131
132 feature_names <- rownames(feature_table)
133 sample_names <- colnames(aligned$rt_crosstab[, - (1:4)])
134
135 list(
136 extracted_features = lapply(recovered, function(x) x$this.f1),
137 corrected_features = lapply(recovered, function(x) x$this.f2),
138 rt_crosstab = as_feature_crosstab(feature_names, sample_names, rt_crosstab),
139 int_crosstab = as_feature_crosstab(feature_names, sample_names, int_crosstab)
140 )
141 }
142
143 create_feature_sample_table <- function(features) {
144 table <- as_feature_sample_table(
145 rt_crosstab = features$rt_crosstab,
146 int_crosstab = features$int_crosstab
147 )
148 return(table)
149 }