annotate ionflow/ionflow.R @ 0:3b461dc9568b draft default tip

Uploaded
author metaboflow_cam
date Mon, 09 Aug 2021 09:41:22 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
1 #' wl-07-06-2021, Mon: The fourth version: based on Jacopo's new changes in
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
2 #' 'ionflow_funcs.R' and new pipeline 'tutorial_galaxy_ionflow.R'
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
3 #' wl-08-06-2021, Tue: finalise
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
4
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
5 ## ==== General settings ====
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
6 rm(list = ls(all = T))
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
7
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
8 #' flag for command-line use or not. If false, only for debug interactively.
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
9 com_f <- T
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
10
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
11 #' galaxy will stop even if R has warning message
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
12 options(warn = -1) #' disable R warning. Turn back: options(warn=0)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
13
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
14 #' ------------------------------------------------------------------------
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
15 #' Setup R error handling to go to stderr
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
16 #' options( show.error.messages=F, error = function () {
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
17 #' cat( geterrmessage(), file=stderr() )
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
18 #' q( "no", 1, F )
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
19 #' })
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
20
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
21 #' we need that to not crash galaxy with an UTF8 error on German LC settings.
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
22 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
23
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
24 #' wl-28-08-2018, Tue: Convert a string separated by comma into character vector
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
25 str_vec <- function(x) {
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
26 x <- unlist(strsplit(x, ","))
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
27 x <- gsub("^[ \t]+|[ \t]+$", "", x) #' trim white spaces
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
28 }
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
29
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
30 pkgs <- c("optparse", "reshape2", "plyr", "dplyr", "tidyr", "ggplot2",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
31 "ggrepel", "corrplot", "gplots", "network", "sna", "GGally",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
32 "org.Sc.sgd.db","org.Hs.eg.db","GO.db", "GOstats", "KEGG.db",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
33 "pheatmap")
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
34 suppressPackageStartupMessages(invisible(lapply(pkgs, library,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
35 character.only = TRUE)))
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
36
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
37 ## ==== Command line or interactive setting ====
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
38 if (com_f) {
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
39
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
40 func <- function() {
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
41 argv <- commandArgs(trailingOnly = FALSE)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
42 path <- sub("--file=", "", argv[grep("--file=", argv)])
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
43 }
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
44 #' prog_name <- basename(func())
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
45 tool_dir <- paste0(dirname(func()), "/")
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
46
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
47 option_list <-
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
48 list(
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
49 make_option(c("-v", "--verbose"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
50 action = "store_true", default = TRUE,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
51 help = "Print extra output [default]"
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
52 ),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
53 make_option(c("-q", "--quietly"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
54 action = "store_false",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
55 dest = "verbose", help = "Print little output"
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
56 ),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
57
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
58 #' Data pre-processing
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
59 make_option("--ion_file", type = "character",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
60 help = "ion concentration file in tabular format"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
61 make_option("--var_id", type = "integer", default = 1,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
62 help = "Column index of variable"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
63 make_option("--batch_id", type = "integer", default = 3,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
64 help = "Column index of batch ID"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
65 make_option("--data_id", type = "integer", default = 5,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
66 help = "Start column index of data matrix"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
67 make_option("--method_norm", type = "character", default = "median",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
68 help = "Batch correction methods. Support: median,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
69 median+std and none"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
70 make_option("--batch_control", type = "character", default = "yes",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
71 help = "Use control lines for batch correction or not"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
72 make_option("--control_lines", type = "character", default = "BY4741",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
73 help = "Batch control lines"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
74 make_option("--control_use", type = "character", default = "all",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
75 help = "Select lines used for batch correction control.
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
76 Three selection: control, all and control.out"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
77 make_option("--method_outliers", type = "character",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
78 default = "log.FC.dist",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
79 help = "Outlier detection method. Currently support:
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
80 mad, IQR, log.FC.dist and none."),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
81 make_option("--thres_outl", type = "double", default = 3.0,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
82 help = "Outlier detection threshold"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
83 make_option("--stand_method", type = "character", default = "std",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
84 help = "Standardisation method. Currently support:
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
85 std, mad and custom."),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
86 make_option("--std_file", type = "character",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
87 help = "user predifined std file with respect to ions"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
88 make_option("--thres_symb", type = "double", default = 2.0,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
89 help = "Symbolisation threshold"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
90
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
91 #' Exploratory analysis
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
92 make_option("--thres_ion_corr", type = "double", default = 0.15,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
93 help = "Threshold for Ion correlation (0 - 1)"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
94
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
95 #' Clustering analysis
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
96 make_option("--min_clust_size", type = "double", default = 10.0,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
97 help = "Minimal cluster size."),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
98 make_option("--h_tree", type = "double", default = 0.0,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
99 help = "Cutting height for hierarchical clustering."),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
100 make_option("--filter_zero_string", type = "logical", default = TRUE,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
101 help = "Filter the zero string or not"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
102
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
103 #' Enrichment analysis
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
104 make_option("--pval", type = "double", default = 0.05,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
105 help = "P-values for enrichment analysis."),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
106 make_option("--min_count", type = "double", default = 3.0,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
107 help = "Minimal count number for enrichment analysis."),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
108 make_option("--ont", type = "character", default = "BP",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
109 help = "Ontology method: BP, MF and CC."),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
110 make_option("--annot_pkg", type = "character", default = "org.Sc.sgd.db",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
111 help = "Annotation package"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
112
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
113 #' Network analysis
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
114 make_option("--method_corr", type = "character", default = "cosine",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
115 help = "Similarity measure method. Currently support:
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
116 pearson, spearman, kendall, cosine, mahal_cosine,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
117 hybrid_mahal_cosine"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
118 make_option("--thres_corr", type = "double", default = 0.70,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
119 help = "Similarity threshold for network analysis (0 - 1).
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
120 Features large than threshold will be kept."),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
121
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
122 #' output: pre-processing
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
123 make_option("--pre_proc_pdf",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
124 type = "character", default = "pre_proc.pdf",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
125 help = "Save plots from pre-processing"
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
126 ),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
127 make_option("--df_stats_out",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
128 type = "character", default = "df_stats.tsv",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
129 help = "Save stats summary of raw, batch corrected and
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
130 standardised data"
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
131 ),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
132 make_option("--outl_out",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
133 type = "character", default = "outl.tsv",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
134 help = "Save outliers summary"
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
135 ),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
136 make_option("--data_wide_out",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
137 type = "character", default = "data_wide.tsv",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
138 help = "Save pre-processed data in wide format"
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
139 ),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
140 make_option("--data_wide_symb_out",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
141 type = "character", default = "data_wide_symb.tsv",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
142 help = "Save pre-processed data Symbolization in wide format"
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
143 ),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
144
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
145 #' output: exploratory analysis
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
146 make_option("--expl_anal_pdf",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
147 type = "character", default = "expl_anal.pdf",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
148 help = "Save plots from exploratory analysis"
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
149 ),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
150
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
151 #' output: clustering analysis
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
152 make_option("--clus_anal_pdf",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
153 type = "character", default = "clus_anal.pdf",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
154 help = "Save plots from clustering analysis"
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
155 ),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
156
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
157 #' output: enrichment analysis
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
158 make_option("--go_en_out",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
159 type = "character", default = "go_en.tsv",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
160 help = "Save GO enrichment table"
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
161 ),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
162
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
163 #' output: network analysis
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
164 make_option("--gene_net_pdf",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
165 type = "character", default = "gene_net.pdf",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
166 help = "Save plots from gene network"
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
167 ),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
168 make_option("--imbe_out",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
169 type = "character", default = "impact_betweenness.tsv",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
170 help = "Save impact and betweenness table"
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
171 )
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
172 )
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
173
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
174 opt <- parse_args(
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
175 object = OptionParser(option_list = option_list),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
176 args = commandArgs(trailingOnly = TRUE)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
177 )
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
178 } else {
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
179 #' tool_dir <- "C:/R_lwc/my_galaxy/ionflow/"
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
180 tool_dir <- "~/my_galaxy/ionflow/"
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
181
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
182 opt <- list(
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
183
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
184 #' Input
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
185 ion_file = paste0(tool_dir, "test-data/Dataset_IonFlow_Ionome_KO_short.csv"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
186 var_id = 1,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
187 batch_id = 3,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
188 data_id = 5,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
189 method_norm = "median",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
190 batch_control = "yes",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
191 control_lines = "BY4741",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
192 control_use = "all",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
193 method_outliers = "log.FC.dist",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
194 thres_outl = 3.0,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
195 stand_method = "std",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
196 thres_symb = 2,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
197
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
198 #' Exploratory analysis
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
199 thres_ion_corr = 0.15,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
200
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
201 #' Clustering analysis
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
202 min_clust_size = 10.0,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
203 h_tree = 0.0,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
204 filter_zero_string = TRUE,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
205
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
206 #' Enrichment analysis
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
207 pval = 0.05,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
208 min_count = 3,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
209 ont = "BP",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
210 annot_pkg = "org.Sc.sgd.db",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
211
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
212 #' Network analysis
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
213 method_corr = "cosine",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
214 thres_corr = 0.7,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
215
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
216 #' output: pre-processing
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
217 pre_proc_pdf = paste0(tool_dir, "test-data/res/pre_proc.pdf"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
218 df_stats_out = paste0(tool_dir, "test-data/res/df_stats.tsv"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
219 outl_out = paste0(tool_dir, "test-data/res/outl.tsv"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
220 data_wide_out = paste0(tool_dir, "test-data/res/data_wide.tsv"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
221 data_wide_symb_out = paste0(tool_dir, "test-data/res/data_wide_symb.tsv"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
222
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
223 #' output: exploratory analysis
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
224 expl_anal_pdf = paste0(tool_dir, "test-data/res/expl_anal.pdf"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
225
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
226 #' output: clustering analysis
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
227 clus_anal_pdf = paste0(tool_dir, "test-data/res/clus_anal.pdf"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
228
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
229 #' output: enrichment analysis
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
230 go_en_out = paste0(tool_dir, "test-data/res/go_en.tsv"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
231
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
232 #' output: network analysis
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
233 gene_net_pdf = paste0(tool_dir, "test-data/res/gene_net.pdf"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
234 imbe_out = paste0(tool_dir, "test-data/res/impact_betweenness.tsv")
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
235 )
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
236 }
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
237 #' print(opt)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
238
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
239 suppressPackageStartupMessages({
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
240 source(paste0(tool_dir, "ionflow_funcs.R"))
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
241 })
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
242
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
243 ## ==== Data preparation ====
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
244 ion_data <- read.table(opt$ion_file, header = T, sep = ",")
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
245
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
246 if (opt$batch_control == "yes") {
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
247 control_lines <- opt$control_line
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
248 } else {
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
249 control_lines <- NULL
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
250 }
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
251
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
252 if (opt$stand_method == "custom") { #' if (lenth(opt$std_file) > 0) {
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
253 stdev <- read.table(opt$std_file, header = T, sep = "\t")
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
254 } else {
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
255 stdev <- NULL
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
256 }
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
257
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
258 ## ==== Pre-processing ====
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
259 pre <- PreProcessing(data = ion_data,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
260 var_id = opt$var_id,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
261 batch_id = opt$batch_id,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
262 data_id = opt$data_id,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
263 method_norm = opt$method_norm,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
264 control_lines = control_lines,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
265 control_use = opt$control_use,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
266 method_outliers = opt$method_outliers,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
267 thres_outl = opt$thres_outl,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
268 stand_method = opt$stand_method,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
269 stdev = stdev,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
270 thres_symb = opt$thres_symb)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
271
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
272 #' save plot in pdf
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
273 pdf(file = opt$pre_proc_pdf, onefile = T) # width = 15, height = 10
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
274 plot(pre$plot.hist)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
275 plot(pre$plot.overview)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
276 plot(pre$plot.medians)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
277 plot(pre$plot.CV)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
278 plot(pre$plot.change.stat)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
279 plot(pre$plot.change.dir)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
280 dev.off()
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
281
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
282 #' combine stats
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
283 df_stats <- list(raw_data = pre$stats.raw.data,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
284 bat_data = pre$stats.batches)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
285 df_stats <- dplyr::bind_rows(df_stats, .id = "Data_Set")
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
286 row.names(df_stats) = NULL
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
287
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
288 #' save tables
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
289 write.table(df_stats, file = opt$df_stats_out, sep = "\t", row.names = F)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
290 write.table(pre$stats.outliers, file = opt$outl_out, sep = "\t",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
291 row.names = F)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
292 write.table(pre$data.line.zscores, file = opt$data_wide_out, sep = "\t",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
293 row.names = F)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
294 write.table(pre$data.line.symb, file = opt$data_wide_symb_out,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
295 sep = "\t", row.names = F)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
296
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
297 ## ==== Exploratory analysis ====
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
298
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
299 pdf(file = opt$expl_anal_pdf, onefile = T)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
300 expl <- IonAnalysis(data = pre$data.line.zscores,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
301 thres_ion_corr = opt$thres_ion_corr)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
302 plot(expl$plot.pca)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
303 plot(expl$plot.net)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
304 dev.off()
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
305
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
306 ## ==== Clustering analysis ====
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
307
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
308 gcl <- ProfileClustering(pre$data.line.symb,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
309 min_clust_size = opt$min_clust_size,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
310 h_tree = opt$h_tree,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
311 filter_zero_string = opt$filter_zero_string)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
312
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
313 #' select larger clusters
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
314 cluster_vector <-
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
315 gcl$clusters.vector[gcl$clusters.vector$Cluster %in%
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
316 gcl$tab.clusters.subset$Cluster, ]
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
317
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
318 #' extract symbolic and z-score prifiles for lines in selected clusters
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
319 symbol_profiles <- pre$data.line.symb
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
320 symbol_profiles$Cluster <-
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
321 cluster_vector$Cluster[match(symbol_profiles$Line, cluster_vector$Line)]
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
322
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
323 zscore_profiles <- pre$data.line.zscores
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
324 zscore_profiles$Cluster <-
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
325 cluster_vector$Cluster[match(zscore_profiles$Line, cluster_vector$Line)]
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
326
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
327 #' remove lines showing no phenotype
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
328 symbol_profiles <- symbol_profiles[!is.na(symbol_profiles$Cluster),]
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
329 zscore_profiles <- zscore_profiles[!is.na(zscore_profiles$Cluster),]
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
330
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
331 mat_long <- reshape2::melt(zscore_profiles, id = c("Line", "Cluster"),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
332 variable.name = "Ion", value.name = "zscore")
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
333
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
334 mat_long$n.genes <-
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
335 gcl$tab.clusters.subset$Number.of.genes[match(mat_long$Cluster,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
336 gcl$tab.clusters.subset$Cluster)]
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
337 mat_long$title <- paste0('Cluster ', mat_long$Cluster,' (',
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
338 mat_long$n.genes, ' genes)')
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
339
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
340 p_gcl <-
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
341 ggplot(data = mat_long, aes(x = Ion, y = zscore, group = Line), color = "gray") +
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
342 geom_line() +
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
343 stat_summary(fun.data = "mean_se", color = "red", geom = "line", group = 1) +
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
344 labs(x = "", y = "z-score") +
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
345 coord_cartesian(ylim = c(-8, 8)) +
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
346 facet_wrap(~title) +
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
347 theme(legend.position = "none",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
348 axis.text.x = element_text(angle = 90, hjust = 1),
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
349 axis.text = element_text(size = 10))
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
350
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
351 pdf(file = opt$clus_anal_pdf, onefile = T)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
352 plot(p_gcl)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
353 dev.off()
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
354
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
355 ## ==== Enrichment analysis ====
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
356
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
357 ge <- GOEnricher(cluster_vector,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
358 pval = opt$pval,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
359 min_count = opt$min_count,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
360 annot_pkg = opt$annot_pkg,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
361 ont = opt$ont,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
362 gene_uni = as.character(pre$data.line.zscores$Line))
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
363
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
364 if (nrow(ge$enrichment.summary) > 0) {
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
365 write.table(ge$enrichment.summary, file = opt$go_en_out, sep = "\t",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
366 row.names = FALSE)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
367 }
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
368
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
369 ## ==== Network analysis ====
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
370
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
371 gn <- GeneticNetwork(data = zscore_profiles,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
372 method_corr = opt$method_corr,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
373 thres_corr = opt$thres_corr,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
374 network_modules = "input",
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
375 cluster_vector = cluster_vector,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
376 cluster_label_vector = NULL)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
377
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
378 pdf(file = opt$gene_net_pdf, onefile = T) # width = 15, height = 10
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
379 plot(gn$plot.network)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
380 plot(gn$plot.impact_betweenness)
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
381 dev.off()
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
382
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
383 write.table(gn$stats.impact_betweenness, file = opt$imbe_out,
3b461dc9568b Uploaded
metaboflow_cam
parents:
diff changeset
384 sep = "\t", row.names = FALSE)