Mercurial > repos > recetox > recetox_aplcms_align_features
comparison utils.R @ 0:57c644d3f24c draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 19de0924a65bc65cbbf7c1fc17e9b5348305f95c
author | recetox |
---|---|
date | Fri, 10 Jun 2022 10:17:46 +0000 |
parents | |
children | abe783e0daca |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:57c644d3f24c |
---|---|
1 library(recetox.aplcms) | |
2 | |
3 align_features <- function(sample_names, ...) { | |
4 aligned <- feature.align(...) | |
5 feature_names <- seq_len(nrow(aligned$pk.times)) | |
6 | |
7 list( | |
8 mz_tolerance = as.numeric(aligned$mz.tol), | |
9 rt_tolerance = as.numeric(aligned$chr.tol), | |
10 rt_crosstab = as_feature_crosstab(feature_names, sample_names, aligned$pk.times), | |
11 int_crosstab = as_feature_crosstab(feature_names, sample_names, aligned$aligned.ftrs) | |
12 ) | |
13 } | |
14 | |
15 get_sample_name <- function(filename) { | |
16 tools::file_path_sans_ext(basename(filename)) | |
17 } | |
18 | |
19 as_feature_crosstab <- function(feature_names, sample_names, data) { | |
20 colnames(data) <- c("mz", "rt", "mz_min", "mz_max", sample_names) | |
21 rownames(data) <- feature_names | |
22 as.data.frame(data) | |
23 } | |
24 | |
25 as_feature_sample_table <- function(rt_crosstab, int_crosstab) { | |
26 feature_names <- rownames(rt_crosstab) | |
27 sample_names <- colnames(rt_crosstab)[- (1:4)] | |
28 | |
29 feature_table <- data.frame( | |
30 feature = feature_names, | |
31 mz = rt_crosstab[, 1], | |
32 rt = rt_crosstab[, 2] | |
33 ) | |
34 | |
35 # series of conversions to produce a table type from data.frame | |
36 rt_crosstab <- as.table(as.matrix(rt_crosstab[, - (1:4)])) | |
37 int_crosstab <- as.table(as.matrix(int_crosstab[, - (1:4)])) | |
38 | |
39 crosstab_axes <- list(feature = feature_names, sample = sample_names) | |
40 dimnames(rt_crosstab) <- dimnames(int_crosstab) <- crosstab_axes | |
41 | |
42 x <- as.data.frame(rt_crosstab, responseName = "sample_rt") | |
43 y <- as.data.frame(int_crosstab, responseName = "sample_intensity") | |
44 | |
45 data <- merge(x, y, by = c("feature", "sample")) | |
46 data <- merge(feature_table, data, by = "feature") | |
47 data | |
48 } | |
49 | |
50 load_features <- function(files) { | |
51 files_list <- sort_samples_by_acquisition_number(files) | |
52 features <- lapply(files_list, arrow::read_parquet) | |
53 features <- lapply(features, as.matrix) | |
54 return(features) | |
55 } | |
56 | |
57 save_data_as_parquet_files <- function(data, subdir) { | |
58 dir.create(subdir) | |
59 for (i in 0:(length(data) - 1)) { | |
60 filename <- file.path(subdir, paste0(subdir, "_features_", i, ".parquet")) | |
61 arrow::write_parquet(as.data.frame(data[i + 1]), filename) | |
62 } | |
63 } | |
64 | |
65 save_aligned_features <- function(aligned, rt_file, int_file, tol_file) { | |
66 arrow::write_parquet(as.data.frame(aligned$rt_crosstab), rt_file) | |
67 arrow::write_parquet(as.data.frame(aligned$int_crosstab), int_file) | |
68 | |
69 mz_tolerance <- c(aligned$mz_tolerance) | |
70 rt_tolerance <- c(aligned$rt_tolerance) | |
71 arrow::write_parquet(data.frame(mz_tolerance, rt_tolerance), tol_file) | |
72 } | |
73 | |
74 load_aligned_features <- function(rt_file, int_file, tol_file) { | |
75 rt_cross_table <- arrow::read_parquet(rt_file) | |
76 int_cross_table <- arrow::read_parquet(int_file) | |
77 tolerances_table <- arrow::read_parquet(tol_file) | |
78 | |
79 result <- list() | |
80 result$mz_tolerance <- tolerances_table$mz_tolerance | |
81 result$rt_tolerance <- tolerances_table$rt_tolerance | |
82 result$rt_crosstab <- rt_cross_table | |
83 result$int_crosstab <- int_cross_table | |
84 return(result) | |
85 } | |
86 | |
87 recover_signals <- function(cluster, | |
88 filenames, | |
89 extracted, | |
90 corrected, | |
91 aligned, | |
92 mz_tol = 1e-05, | |
93 mz_range = NA, | |
94 rt_range = NA, | |
95 use_observed_range = TRUE, | |
96 min_bandwidth = NA, | |
97 max_bandwidth = NA, | |
98 recover_min_count = 3) { | |
99 if (!is(cluster, "cluster")) { | |
100 cluster <- parallel::makeCluster(cluster) | |
101 on.exit(parallel::stopCluster(cluster)) | |
102 } | |
103 | |
104 clusterExport(cluster, c("extracted", "corrected", "aligned", "recover.weaker")) | |
105 clusterEvalQ(cluster, library("splines")) | |
106 | |
107 recovered <- parLapply(cluster, seq_along(filenames), function(i) { | |
108 recover.weaker( | |
109 loc = i, | |
110 filename = filenames[[i]], | |
111 this.f1 = extracted[[i]], | |
112 this.f2 = corrected[[i]], | |
113 pk.times = aligned$rt_crosstab, | |
114 aligned.ftrs = aligned$int_crosstab, | |
115 orig.tol = mz_tol, | |
116 align.mz.tol = aligned$mz_tolerance, | |
117 align.chr.tol = aligned$rt_tolerance, | |
118 mz.range = mz_range, | |
119 chr.range = rt_range, | |
120 use.observed.range = use_observed_range, | |
121 bandwidth = 0.5, | |
122 min.bw = min_bandwidth, | |
123 max.bw = max_bandwidth, | |
124 recover.min.count = recover_min_count | |
125 ) | |
126 }) | |
127 | |
128 feature_table <- aligned$rt_crosstab[, 1:4] | |
129 rt_crosstab <- cbind(feature_table, sapply(recovered, function(x) x$this.times)) | |
130 int_crosstab <- cbind(feature_table, sapply(recovered, function(x) x$this.ftrs)) | |
131 | |
132 feature_names <- rownames(feature_table) | |
133 sample_names <- colnames(aligned$rt_crosstab[, - (1:4)]) | |
134 | |
135 list( | |
136 extracted_features = lapply(recovered, function(x) x$this.f1), | |
137 corrected_features = lapply(recovered, function(x) x$this.f2), | |
138 rt_crosstab = as_feature_crosstab(feature_names, sample_names, rt_crosstab), | |
139 int_crosstab = as_feature_crosstab(feature_names, sample_names, int_crosstab) | |
140 ) | |
141 } | |
142 | |
143 create_feature_sample_table <- function(features) { | |
144 table <- as_feature_sample_table( | |
145 rt_crosstab = features$rt_crosstab, | |
146 int_crosstab = features$int_crosstab | |
147 ) | |
148 return(table) | |
149 } |