Mercurial > repos > nml > mykrobe_parser

--- a/mykrobe_parser.R	Mon Oct 01 14:54:34 2018 -0400
+++ b/mykrobe_parser.R	Tue Oct 23 08:54:51 2018 -0400
@@ -20,13 +20,18 @@
 # August 2018

 # Libraries ####
-library(jsonlite, quietly = T)
-library(here, quietly = T)
-suppressMessages(library(dplyr, quietly = T))
-suppressMessages(library(purrr, quietly = T))
-library(tidyr, quietly = T)
-library(stringr, quietly = T)
-library(optparse, quietly = T)
+
+sink(stdout(), type = "message")
+
+suppressPackageStartupMessages({
+  library(jsonlite)
+  library(here)
+  library(dplyr)
+  library(purrr)
+  library(tidyr)
+  library(stringr)
+  library(optparse)
+})

 # Define custom functions, variables, and paths. Collect and use CL arguments ####

@@ -98,18 +103,6 @@
   as_tibble(temp)
 }

-sink(stdout(), type = "message")
-
-suppressPackageStartupMessages({
-  library(jsonlite)
-  library(here)
-  library(dplyr)
-  library(purrr)
-  library(tidyr)
-  library(stringr)
-  library(optparse)
-})
-
 # Get command line arguments with optparse
 option_list = list(
   make_option(c("-f", "--file"),
@@ -274,43 +267,53 @@
   stop("No susceptibility results in files specified. Did the testing fail?", call.=FALSE)
 }

-# Variants
-# Multiple resistance mutations and confidence per drug in the X_R_mutations column
-# Actual protein changes in Mykrobe_X columns
+# Variants, if present
+if (0 < predictions.table %>%
+    select(ends_with("_Prediction")) %>%
+    unlist(use.names = F) %>%
+    str_count("[R,r]") %>%
+    sum()){

-variants.temp <-
-  temp %>%
-  select(file, drug, variants = `variants (gene:alt_depth:wt_depth:conf)`) %>%
-  mutate(variants = replace(variants, variants == "", NA)) %>% # Make missing data consistent...
-  filter(!is.na(variants)) %>% # ...Then get rid of it
-  mutate(tempcols = paste(drug, "R_mutations", sep = "_")) %>%
-  mutate(R_mutations = variants) %>%
-  mutate(variants = strsplit(variants, "__")) %>% # Split the mutations across rows (list first then split across rows)
-  unnest(variants) %>%
-  separate(variants, c("gene", "mutation"), "_") %>%
-  mutate(columnname = ifelse(gene %in% c("tlyA", "rrs", "gid"), # Check for columns that include the drug name or not and paste accordingly
-                             paste("Mykrobe", drug, gene, sep = "_"),
-                             paste("Mykrobe", gene, sep = "_"))) %>%
-  # Extract out the mutation information with a regex that covers all potential genes
-  # This regex looks for whatever is ahead of the first colon and after the last hyphen
-  mutate(mutation = str_match(mutation, "(.*)-.*:")[,2]) %>%
-  select(file, tempcols, R_mutations, columnname, mutation)
+      # Multiple resistance mutations and confidence per drug in the X_R_mutations column
+      # Actual protein changes in Mykrobe_X columns
+
+      variants.temp <-
+        temp %>%
+        select(file, drug, variants = `variants (gene:alt_depth:wt_depth:conf)`) %>%
+        mutate(variants = replace(variants, variants == "", NA)) %>% # Make missing data consistent...
+        filter(!is.na(variants)) %>% # ...Then get rid of it
+        mutate(tempcols = paste(drug, "R_mutations", sep = "_")) %>%
+        mutate(R_mutations = variants) %>%
+        mutate(variants = strsplit(variants, "__")) %>% # Split the mutations across rows (list first then split across rows)
+        unnest(variants) %>%
+        separate(variants, c("gene", "mutation"), "_") %>%
+        mutate(columnname = ifelse(gene %in% c("tlyA", "rrs", "gid"), # Check for columns that include the drug name or not and paste accordingly
+                                   paste("Mykrobe", drug, gene, sep = "_"),
+                                   paste("Mykrobe", gene, sep = "_"))) %>%
+        # Extract out the mutation information with a regex that covers all potential genes
+        # This regex looks for whatever is ahead of the first colon and after the last hyphen
+        mutate(mutation = str_match(mutation, "(.*)-.*:")[,2]) %>%
+        select(file, tempcols, R_mutations, columnname, mutation)
+
+      # Split each kind of variants into its own temp table then merge
+      variants.1 <-
+        variants.temp %>%
+        select(file, tempcols, R_mutations) %>%
+        distinct() %>%
+        spread(tempcols, R_mutations)
+
+      variants.2 <-
+        variants.temp %>%
+        select(file, columnname, mutation) %>%
+        group_by(file, columnname) %>%
+        summarise(mutation = paste(mutation, collapse = ";")) %>%
+        spread(columnname, mutation)
+
+      variants.table <- full_join(variants.1, variants.2, by = "file")
+}else{
+  variants.table <- data.frame(file=predictions.table$file, stringsAsFactors = F)
+}

-# Split each kind of variants into its own temp table then merge
-variants.1 <-
-  variants.temp %>%
-  select(file, tempcols, R_mutations) %>%
-  distinct() %>%
-  spread(tempcols, R_mutations)
-
-variants.2 <-
-  variants.temp %>%
-  select(file, columnname, mutation) %>%
-  group_by(file, columnname) %>%
-  summarise(mutation = paste(mutation, collapse = ";")) %>%
-  spread(columnname, mutation)
-
-variants.table <- full_join(variants.1, variants.2, by = "file")

 # Make a report ####

@@ -369,7 +372,7 @@
          lineage_depth) %>%
   distinct() %>%
   write.csv("output-jsondata.csv", row.names = F)
-print("Writing JSON data to CSV as output-jsondata.txt")
+print("Writing JSON data to CSV as output-jsondata.csv")
 sink(NULL, type="message") # close the sink

 quit()
\ No newline at end of file
--- a/mykrobe_parser.xml	Mon Oct 01 14:54:34 2018 -0400
+++ b/mykrobe_parser.xml	Tue Oct 23 08:54:51 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="mykrobe_parseR" name="mykrobe_parseR" version="0.1.1">
+<tool id="mykrobe_parseR" name="mykrobe_parseR" version="0.1.2">
     <requirements>
         <requirement type="package" version="3.4.1">r-base</requirement>
         <requirement type="package" version="1.5.0">r-jsonlite</requirement>