comparison mykrobe_parser.R @ 4:8529045f0fdf draft

planemo upload for repository https://github.com/phac-nml/mykrobe-parser commit 34d9c47b9451e5f7843028dba22b96d125fb09f5
author nml
date Thu, 25 Apr 2019 10:13:46 -0400
parents f2608dccd3e0
children deebc6410d13
comparison
equal deleted inserted replaced
3:0dcfdec76c3c 4:8529045f0fdf
150 "", # Lims_INTComment 150 "", # Lims_INTComment
151 opt$version, # Mykrobe_Workflow_Version 151 opt$version, # Mykrobe_Workflow_Version
152 opt$depth, # Mykrobe_min_depth_default_5 152 opt$depth, # Mykrobe_min_depth_default_5
153 opt$conf, # Mykrobe_min_conf_default_10 153 opt$conf, # Mykrobe_min_conf_default_10
154 "", # LIMS_file - empty as it's an upload field in LIMS 154 "", # LIMS_file - empty as it's an upload field in LIMS
155 opt$name) # LIMS_filename 155 opt$name) # Mutation_set_version
156 156
157 names(params) <- c("Lims_Comment", 157 names(params) <- c("Lims_Comment",
158 "Lims_INTComment", 158 "Lims_INTComment",
159 "Mykrobe_Workflow_Version", 159 "Mykrobe_Workflow_Version",
160 "Mykrobe_min_depth_default_5", 160 "Mykrobe_min_depth_default_5",
161 "Mykrobe_min_conf_default_10", 161 "Mykrobe_min_conf_default_10",
162 "LIMS_file", 162 "LIMS_file",
163 "LIMS_filename") 163 "Mutation_set_version")
164 164
165 165
166 # A default report in the order our LIMS requires 166 # A default report in the order our LIMS requires
167 167
168 # Make a default dataframe to combine the rest into and enforce column order / fill missing ones with NAs 168 # Make a default dataframe to combine the rest into and enforce column order / fill missing ones with NAs
183 "Ethambutol_Prediction", 183 "Ethambutol_Prediction",
184 "Mykrobe_pncA", 184 "Mykrobe_pncA",
185 "Mykrobe_rpsA", 185 "Mykrobe_rpsA",
186 "Pyrazinamide_R_mutations", 186 "Pyrazinamide_R_mutations",
187 "Pyrazinamide_Prediction", 187 "Pyrazinamide_Prediction",
188 "Mykrobe_gyrA", 188 "Mykrobe_Ofloxacin_gyrA",
189 "Quinolones_R_mutations", 189 "Ofloxacin_R_mutations",
190 "Quinolones_Prediction", 190 "Ofloxacin_Prediction",
191 "Mykrobe_Moxifloxacin_gyrA",
192 "Moxifloxacin_R_mutations",
193 "Moxifloxacin_Prediction",
191 "Mykrobe_rpsL", 194 "Mykrobe_rpsL",
192 "Mykrobe_Streptomycin_rrs", 195 "Mykrobe_Streptomycin_rrs",
193 "Mykrobe_Streptomycin_gid", 196 "Mykrobe_Streptomycin_gid",
194 "Streptomycin_R_mutations", 197 "Streptomycin_R_mutations",
195 "Streptomycin_Prediction", 198 "Streptomycin_Prediction",
210 "mykrobe_version", 213 "mykrobe_version",
211 "Mykrobe_Resistance_probe_set", 214 "Mykrobe_Resistance_probe_set",
212 "Mykrobe_min_depth_default_5", 215 "Mykrobe_min_depth_default_5",
213 "Mykrobe_min_conf_default_10", 216 "Mykrobe_min_conf_default_10",
214 "LIMS_file", 217 "LIMS_file",
215 "LIMS_filename") 218 "Mutation_set_version")
216 219
217 report <- setNames(data.frame(matrix("", ncol = length(columns), nrow = 1), stringsAsFactors = F), columns) 220 report <- setNames(data.frame(matrix("", ncol = length(columns), nrow = 1), stringsAsFactors = F), columns)
218 221
219 222
220 # List of drugs that are tested 223 # List of drugs that are tested
221 all_drugs <- c("Isoniazid", 224 all_drugs <- c("Isoniazid",
222 "Rifampicin", 225 "Rifampicin",
223 "Ethambutol", 226 "Ethambutol",
224 "Pyrazinamide", 227 "Pyrazinamide",
225 "Moxifloxacin_Ofloxacin", 228 "Moxifloxacin",
229 "Ofloxacin",
226 "Streptomycin", 230 "Streptomycin",
227 "Amikacin", 231 "Amikacin",
228 "Capreomycin", 232 "Capreomycin",
229 "Kanamycin") 233 "Kanamycin")
230 234
285 mutate(tempcols = paste(drug, "R_mutations", sep = "_")) %>% 289 mutate(tempcols = paste(drug, "R_mutations", sep = "_")) %>%
286 mutate(R_mutations = variants) %>% 290 mutate(R_mutations = variants) %>%
287 mutate(variants = strsplit(variants, "__")) %>% # Split the mutations across rows (list first then split across rows) 291 mutate(variants = strsplit(variants, "__")) %>% # Split the mutations across rows (list first then split across rows)
288 unnest(variants) %>% 292 unnest(variants) %>%
289 separate(variants, c("gene", "mutation"), "_") %>% 293 separate(variants, c("gene", "mutation"), "_") %>%
290 mutate(columnname = ifelse(gene %in% c("tlyA", "rrs", "gid"), # Check for columns that include the drug name or not and paste accordingly 294 mutate(columnname = ifelse(gene %in% c("tlyA", "rrs", "eis", "gid"), # Check for columns that include the drug name or not and paste accordingly
291 paste("Mykrobe", drug, gene, sep = "_"), 295 paste("Mykrobe", drug, gene, sep = "_"),
292 paste("Mykrobe", gene, sep = "_"))) %>% 296 paste("Mykrobe", gene, sep = "_"))) %>%
293 # Extract out the mutation information with a regex that covers all potential genes 297 # Extract out the mutation information with a regex that covers all potential genes
294 # This regex looks for whatever is ahead of the first colon and after the last hyphen 298 # This regex looks for whatever is ahead of the first colon and after the last hyphen
295 mutate(mutation = str_match(mutation, "(.*)-.*:")[,2]) %>% 299 mutate(mutation = str_match(mutation, "(.*)-.*:")[,2]) %>%
330 report <- 334 report <-
331 report %>% 335 report %>%
332 filter_at(vars(ends_with("_Prediction")), any_vars(. != "failed")) %>% 336 filter_at(vars(ends_with("_Prediction")), any_vars(. != "failed")) %>%
333 mutate_at(vars(starts_with("Mykrobe_")), funs(replace(., is.na(.), "No Mutation"))) %>% 337 mutate_at(vars(starts_with("Mykrobe_")), funs(replace(., is.na(.), "No Mutation"))) %>%
334 full_join(anti_join(report, ., by = "file")) %>% 338 full_join(anti_join(report, ., by = "file")) %>%
335 select(columns) %>% 339 select(columns)
336 rename(Moxifloxacin_Ofloxacin_R_mutations = Quinolones_R_mutations,
337 Moxifloxacin_Ofloxacin_Prediction = Quinolones_Prediction)
338 340
339 341
340 # Add in the parameters fed from Galaxy using named character vector 342 # Add in the parameters fed from Galaxy using named character vector
341 report <- 343 report <-
342 report %>% 344 report %>%
345 Lims_INTComment = params["Lims_INTComment"], 347 Lims_INTComment = params["Lims_INTComment"],
346 Mykrobe_Workflow_Version = params["Mykrobe_Workflow_Version"], 348 Mykrobe_Workflow_Version = params["Mykrobe_Workflow_Version"],
347 Mykrobe_min_depth_default_5 = params["Mykrobe_min_depth_default_5"], 349 Mykrobe_min_depth_default_5 = params["Mykrobe_min_depth_default_5"],
348 Mykrobe_min_conf_default_10 = params["Mykrobe_min_conf_default_10"], 350 Mykrobe_min_conf_default_10 = params["Mykrobe_min_conf_default_10"],
349 LIMS_file = params["LIMS_file"], 351 LIMS_file = params["LIMS_file"],
350 LIMS_filename = params["LIMS_filename"] 352 Mutation_set_version = params["Mutation_set_version"]
351 ) 353 )
352 354
353 355
354 #View(report) 356 #View(report)
355 357