Mercurial > repos > nml > mykrobe_parser
comparison mykrobe_parser.R @ 4:8529045f0fdf draft
planemo upload for repository https://github.com/phac-nml/mykrobe-parser commit 34d9c47b9451e5f7843028dba22b96d125fb09f5
author | nml |
---|---|
date | Thu, 25 Apr 2019 10:13:46 -0400 |
parents | f2608dccd3e0 |
children | deebc6410d13 |
comparison
equal
deleted
inserted
replaced
3:0dcfdec76c3c | 4:8529045f0fdf |
---|---|
150 "", # Lims_INTComment | 150 "", # Lims_INTComment |
151 opt$version, # Mykrobe_Workflow_Version | 151 opt$version, # Mykrobe_Workflow_Version |
152 opt$depth, # Mykrobe_min_depth_default_5 | 152 opt$depth, # Mykrobe_min_depth_default_5 |
153 opt$conf, # Mykrobe_min_conf_default_10 | 153 opt$conf, # Mykrobe_min_conf_default_10 |
154 "", # LIMS_file - empty as it's an upload field in LIMS | 154 "", # LIMS_file - empty as it's an upload field in LIMS |
155 opt$name) # LIMS_filename | 155 opt$name) # Mutation_set_version |
156 | 156 |
157 names(params) <- c("Lims_Comment", | 157 names(params) <- c("Lims_Comment", |
158 "Lims_INTComment", | 158 "Lims_INTComment", |
159 "Mykrobe_Workflow_Version", | 159 "Mykrobe_Workflow_Version", |
160 "Mykrobe_min_depth_default_5", | 160 "Mykrobe_min_depth_default_5", |
161 "Mykrobe_min_conf_default_10", | 161 "Mykrobe_min_conf_default_10", |
162 "LIMS_file", | 162 "LIMS_file", |
163 "LIMS_filename") | 163 "Mutation_set_version") |
164 | 164 |
165 | 165 |
166 # A default report in the order our LIMS requires | 166 # A default report in the order our LIMS requires |
167 | 167 |
168 # Make a default dataframe to combine the rest into and enforce column order / fill missing ones with NAs | 168 # Make a default dataframe to combine the rest into and enforce column order / fill missing ones with NAs |
183 "Ethambutol_Prediction", | 183 "Ethambutol_Prediction", |
184 "Mykrobe_pncA", | 184 "Mykrobe_pncA", |
185 "Mykrobe_rpsA", | 185 "Mykrobe_rpsA", |
186 "Pyrazinamide_R_mutations", | 186 "Pyrazinamide_R_mutations", |
187 "Pyrazinamide_Prediction", | 187 "Pyrazinamide_Prediction", |
188 "Mykrobe_gyrA", | 188 "Mykrobe_Ofloxacin_gyrA", |
189 "Quinolones_R_mutations", | 189 "Ofloxacin_R_mutations", |
190 "Quinolones_Prediction", | 190 "Ofloxacin_Prediction", |
191 "Mykrobe_Moxifloxacin_gyrA", | |
192 "Moxifloxacin_R_mutations", | |
193 "Moxifloxacin_Prediction", | |
191 "Mykrobe_rpsL", | 194 "Mykrobe_rpsL", |
192 "Mykrobe_Streptomycin_rrs", | 195 "Mykrobe_Streptomycin_rrs", |
193 "Mykrobe_Streptomycin_gid", | 196 "Mykrobe_Streptomycin_gid", |
194 "Streptomycin_R_mutations", | 197 "Streptomycin_R_mutations", |
195 "Streptomycin_Prediction", | 198 "Streptomycin_Prediction", |
210 "mykrobe_version", | 213 "mykrobe_version", |
211 "Mykrobe_Resistance_probe_set", | 214 "Mykrobe_Resistance_probe_set", |
212 "Mykrobe_min_depth_default_5", | 215 "Mykrobe_min_depth_default_5", |
213 "Mykrobe_min_conf_default_10", | 216 "Mykrobe_min_conf_default_10", |
214 "LIMS_file", | 217 "LIMS_file", |
215 "LIMS_filename") | 218 "Mutation_set_version") |
216 | 219 |
217 report <- setNames(data.frame(matrix("", ncol = length(columns), nrow = 1), stringsAsFactors = F), columns) | 220 report <- setNames(data.frame(matrix("", ncol = length(columns), nrow = 1), stringsAsFactors = F), columns) |
218 | 221 |
219 | 222 |
220 # List of drugs that are tested | 223 # List of drugs that are tested |
221 all_drugs <- c("Isoniazid", | 224 all_drugs <- c("Isoniazid", |
222 "Rifampicin", | 225 "Rifampicin", |
223 "Ethambutol", | 226 "Ethambutol", |
224 "Pyrazinamide", | 227 "Pyrazinamide", |
225 "Moxifloxacin_Ofloxacin", | 228 "Moxifloxacin", |
229 "Ofloxacin", | |
226 "Streptomycin", | 230 "Streptomycin", |
227 "Amikacin", | 231 "Amikacin", |
228 "Capreomycin", | 232 "Capreomycin", |
229 "Kanamycin") | 233 "Kanamycin") |
230 | 234 |
285 mutate(tempcols = paste(drug, "R_mutations", sep = "_")) %>% | 289 mutate(tempcols = paste(drug, "R_mutations", sep = "_")) %>% |
286 mutate(R_mutations = variants) %>% | 290 mutate(R_mutations = variants) %>% |
287 mutate(variants = strsplit(variants, "__")) %>% # Split the mutations across rows (list first then split across rows) | 291 mutate(variants = strsplit(variants, "__")) %>% # Split the mutations across rows (list first then split across rows) |
288 unnest(variants) %>% | 292 unnest(variants) %>% |
289 separate(variants, c("gene", "mutation"), "_") %>% | 293 separate(variants, c("gene", "mutation"), "_") %>% |
290 mutate(columnname = ifelse(gene %in% c("tlyA", "rrs", "gid"), # Check for columns that include the drug name or not and paste accordingly | 294 mutate(columnname = ifelse(gene %in% c("tlyA", "rrs", "eis", "gid"), # Check for columns that include the drug name or not and paste accordingly |
291 paste("Mykrobe", drug, gene, sep = "_"), | 295 paste("Mykrobe", drug, gene, sep = "_"), |
292 paste("Mykrobe", gene, sep = "_"))) %>% | 296 paste("Mykrobe", gene, sep = "_"))) %>% |
293 # Extract out the mutation information with a regex that covers all potential genes | 297 # Extract out the mutation information with a regex that covers all potential genes |
294 # This regex looks for whatever is ahead of the first colon and after the last hyphen | 298 # This regex looks for whatever is ahead of the first colon and after the last hyphen |
295 mutate(mutation = str_match(mutation, "(.*)-.*:")[,2]) %>% | 299 mutate(mutation = str_match(mutation, "(.*)-.*:")[,2]) %>% |
330 report <- | 334 report <- |
331 report %>% | 335 report %>% |
332 filter_at(vars(ends_with("_Prediction")), any_vars(. != "failed")) %>% | 336 filter_at(vars(ends_with("_Prediction")), any_vars(. != "failed")) %>% |
333 mutate_at(vars(starts_with("Mykrobe_")), funs(replace(., is.na(.), "No Mutation"))) %>% | 337 mutate_at(vars(starts_with("Mykrobe_")), funs(replace(., is.na(.), "No Mutation"))) %>% |
334 full_join(anti_join(report, ., by = "file")) %>% | 338 full_join(anti_join(report, ., by = "file")) %>% |
335 select(columns) %>% | 339 select(columns) |
336 rename(Moxifloxacin_Ofloxacin_R_mutations = Quinolones_R_mutations, | |
337 Moxifloxacin_Ofloxacin_Prediction = Quinolones_Prediction) | |
338 | 340 |
339 | 341 |
340 # Add in the parameters fed from Galaxy using named character vector | 342 # Add in the parameters fed from Galaxy using named character vector |
341 report <- | 343 report <- |
342 report %>% | 344 report %>% |
345 Lims_INTComment = params["Lims_INTComment"], | 347 Lims_INTComment = params["Lims_INTComment"], |
346 Mykrobe_Workflow_Version = params["Mykrobe_Workflow_Version"], | 348 Mykrobe_Workflow_Version = params["Mykrobe_Workflow_Version"], |
347 Mykrobe_min_depth_default_5 = params["Mykrobe_min_depth_default_5"], | 349 Mykrobe_min_depth_default_5 = params["Mykrobe_min_depth_default_5"], |
348 Mykrobe_min_conf_default_10 = params["Mykrobe_min_conf_default_10"], | 350 Mykrobe_min_conf_default_10 = params["Mykrobe_min_conf_default_10"], |
349 LIMS_file = params["LIMS_file"], | 351 LIMS_file = params["LIMS_file"], |
350 LIMS_filename = params["LIMS_filename"] | 352 Mutation_set_version = params["Mutation_set_version"] |
351 ) | 353 ) |
352 | 354 |
353 | 355 |
354 #View(report) | 356 #View(report) |
355 | 357 |