Mercurial > repos > iuc > snpfreqplot
comparison heatmap_for_variants.R @ 1:e362b3143cde draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
| author | iuc |
|---|---|
| date | Thu, 10 Dec 2020 13:41:29 +0000 |
| parents | 1062d6ad6503 |
| children | dc51db22310c |
comparison
equal
deleted
inserted
replaced
| 0:1062d6ad6503 | 1:e362b3143cde |
|---|---|
| 16 variant_files <- fapply(samples$ids, read_and_process) # nolint | 16 variant_files <- fapply(samples$ids, read_and_process) # nolint |
| 17 | 17 |
| 18 extractall_data <- function(id) { | 18 extractall_data <- function(id) { |
| 19 variants <- variant_files[[id]] | 19 variants <- variant_files[[id]] |
| 20 tmp <- variants %>% | 20 tmp <- variants %>% |
| 21 mutate(posalt = uni_select) %>% | 21 mutate(unique_selectors = group_select) %>% |
| 22 select(posalt, AF) | 22 select(unique_selectors, AF) |
| 23 colnames(tmp) <- c("Mutation", id) | 23 colnames(tmp) <- c("Mutation", id) |
| 24 return(tmp) | 24 return(tmp) |
| 25 } | 25 } |
| 26 | 26 |
| 27 extractall_annots <- function(id) { | 27 extractall_annots <- function(id) { |
| 28 variants <- variant_files[[id]] | 28 variants <- variant_files[[id]] |
| 29 tmp <- variants %>% | 29 tmp <- variants %>% |
| 30 mutate(posalt = uni_select, | 30 mutate(unique_selectors = group_select, |
| 31 effect = EFF....EFFECT, gene = EFF....GENE) %>% | 31 effect = EFF....EFFECT, gene = EFF....GENE) %>% |
| 32 select(posalt, effect, gene) | 32 select(unique_selectors, effect, gene) |
| 33 # allow "." as an alternative missing value in EFF.EFFECT and EFF.GENE | |
| 34 tmp$effect <- sub("^\\.$", "", tmp$effect) | |
| 35 tmp$gene <- sub("^\\.$", "", tmp$gene) | |
| 33 return(tmp) | 36 return(tmp) |
| 34 } | 37 } |
| 35 | 38 |
| 36 # process allele frequencies | 39 # process allele frequencies |
| 37 processed_files <- fapply(samples$ids, extractall_data) | 40 processed_files <- fapply(samples$ids, extractall_data) |
| 51 ## readout annotations | 54 ## readout annotations |
| 52 processed_annots <- fapply(samples$ids, extractall_annots) | 55 processed_annots <- fapply(samples$ids, extractall_annots) |
| 53 ann_final <- processed_annots %>% | 56 ann_final <- processed_annots %>% |
| 54 reduce(function(x, y) { | 57 reduce(function(x, y) { |
| 55 unique(rbind(x, y))}) %>% | 58 unique(rbind(x, y))}) %>% |
| 56 filter(posalt %in% colnames(final)) ## apply frequency filter | 59 ## apply frequency filter |
| 60 filter(unique_selectors %in% colnames(final)) | |
| 57 ann_final <- as_tibble(ann_final[str_order( | 61 ann_final <- as_tibble(ann_final[str_order( |
| 58 ann_final$posalt, numeric = T), ]) %>% | 62 ann_final$unique_selectors, numeric = T), ]) %>% |
| 59 column_to_rownames("posalt") ## sort | 63 column_to_rownames("unique_selectors") ## sort |
| 60 | 64 |
| 61 # rename annotations | 65 # rename annotations |
| 62 trans <- function(x, mapping, replace_missing=NULL) { | 66 trans <- function(x, mapping, replace_missing=NULL) { |
| 63 # helper function for translating effects | 67 # helper function for translating effects |
| 64 mapped <- mapping[[x]] | 68 mapped <- mapping[[x]] |
| 144 pheat_number_of_clusters <- length(samples$ids) | 148 pheat_number_of_clusters <- length(samples$ids) |
| 145 print(paste0("[INFO] Number of clusters: now set to ", | 149 print(paste0("[INFO] Number of clusters: now set to ", |
| 146 pheat_number_of_clusters)) | 150 pheat_number_of_clusters)) |
| 147 } | 151 } |
| 148 | 152 |
| 153 | |
| 154 # Fix Labels | |
| 155 ## Prettify names, check for label parity between final and ann_final | |
| 156 fix_label <- function(name) { | |
| 157 ##' Reduce: 424 AGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTT A | |
| 158 ##' to: 424 AGT… > A | |
| 159 cols <- unlist(str_split(name, " ")) | |
| 160 ## first 3 are POS REF ALT, and the rest are optional differences | |
| 161 pos_ref_alt <- cols[1:3] | |
| 162 rest <- "" | |
| 163 if (length(cols) > 3) { | |
| 164 rest <- paste0(" :: ", paste(cols[4:length(cols)], sep = " ")) | |
| 165 } | |
| 166 ## Trim the REF or ALT if too long | |
| 167 if (str_length(pos_ref_alt[2]) > 3) { | |
| 168 pos_ref_alt[2] <- paste0(substring(pos_ref_alt[2], 1, 3), "…") | |
| 169 } | |
| 170 if (str_length(pos_ref_alt[3]) > 3) { | |
| 171 pos_ref_alt[3] <- paste0(substring(pos_ref_alt[3], 1, 3), "…") | |
| 172 } | |
| 173 ## Join required | |
| 174 new_name <- paste0(pos_ref_alt[1], " ", | |
| 175 pos_ref_alt[2], " > ", | |
| 176 pos_ref_alt[3]) | |
| 177 ## Join rest | |
| 178 new_name <- paste0(new_name, " ", paste(rest)) | |
| 179 } | |
| 180 | |
| 181 colnames(final) <- sapply(colnames(final), fix_label) | |
| 182 rownames(ann_final) <- sapply(rownames(ann_final), fix_label) | |
| 183 ## sanity test | |
| 184 stopifnot(all(colnames(final) %in% rownames(ann_final))) | |
| 185 | |
| 186 | |
| 187 # Perform Plotting | |
| 149 get_plot_dims <- function(heat_map) { | 188 get_plot_dims <- function(heat_map) { |
| 150 ## get the dimensions of a pheatmap object | 189 ## get the dimensions of a pheatmap object |
| 151 ## useful for plot formats that can't be written to a file directly, but | 190 ## useful for plot formats that can't be written to a file directly, but |
| 152 ## for which we need to set up a plotting device | 191 ## for which we need to set up a plotting device |
| 153 ## source: https://stackoverflow.com/a/61876386 | 192 ## source: https://stackoverflow.com/a/61876386 |
