# HG changeset patch # User artbio # Date 1507561629 14400 # Node ID a3be3601bcb3fdf2c1b3e913f89832a4b69f645d # Parent 12c14642e6ac8db494db906cfd60b4543cae1a60 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 9f9c64aff0d225881bedb97bd5035ccbca945d9d diff -r 12c14642e6ac -r a3be3601bcb3 small_rna_maps.r --- a/small_rna_maps.r Sun Oct 08 17:56:13 2017 -0400 +++ b/small_rna_maps.r Mon Oct 09 11:07:09 2017 -0400 @@ -12,6 +12,7 @@ option_list <- list( make_option(c("-f", "--first_dataframe"), type="character", help="path to first dataframe"), make_option(c("-e", "--extra_dataframe"), type="character", help="path to additional dataframe"), + make_option(c("-n", "--normalization"), type="character", help="space-separated normalization/size factors"), make_option("--first_plot_method", type = "character", help="How additional data should be plotted"), make_option("--extra_plot_method", type = "character", help="How additional data should be plotted"), make_option("--output_pdf", type = "character", help="path to the pdf file with plots") @@ -27,6 +28,21 @@ Table <- within(Table, Counts[Polarity=="R"] <- (Counts[Polarity=="R"]*-1)) } n_samples=length(unique(Table$Dataset)) +samples = unique(Table$Dataset) +if (args$normalization != "") { + norm_factors = as.numeric(unlist(strsplit(args$normalization, " "))) +} else { + norm_factors = rep(1, n_samples) +} +if (args$first_plot_method == "Counts" | args$first_plot_method == "Size" | args$first_plot_method == "Coverage") { + i = 1 + for (sample in samples) { + print(norm_factors[i]) + Table[, length(Table)][Table$Dataset==sample] <- Table[, length(Table)][Table$Dataset==sample]*norm_factors[i] + i = i + 1 + } + print(tail(Table)) +} genes=unique(levels(Table$Chromosome)) per_gene_readmap=lapply(genes, function(x) subset(Table, Chromosome==x)) per_gene_limit=lapply(genes, function(x) c(1, unique(subset(Table, Chromosome==x)$Chrom_length)) ) @@ -36,9 +52,16 @@ ExtraTable=read.delim(args$extra_dataframe, header=T, row.names=NULL) if (args$extra_plot_method == "Counts" | args$extra_plot_method=='Size') { ExtraTable <- within(ExtraTable, Counts[Polarity=="R"] <- (Counts[Polarity=="R"]*-1)) + } + if (args$extra_plot_method == "Counts" | args$extra_plot_method == "Size" | args$extra_plot_method == "Coverage") { + i = 1 + for (sample in samples) { + ExtraTable[, length(ExtraTable)][ExtraTable$Dataset==sample] <- ExtraTable[, length(ExtraTable)][ExtraTable$Dataset==sample]*norm_factors[i] + i = i + 1 } + } per_gene_size=lapply(genes, function(x) subset(ExtraTable, Chromosome==x)) - } +} ## functions diff -r 12c14642e6ac -r a3be3601bcb3 small_rna_maps.xml --- a/small_rna_maps.xml Sun Oct 08 17:56:13 2017 -0400 +++ b/small_rna_maps.xml Mon Oct 09 11:07:09 2017 -0400 @@ -1,4 +1,4 @@ - + numpy @@ -33,6 +33,7 @@ Rscript '$__tool_directory__'/small_rna_maps.r --first_dataframe '$output_tab' --extra_dataframe '$extra_output_tab' + --normalization '$normalization' #if str($plots_options.plots_options_selector ) == "two_plot": --first_plot_method '${plots_options.first_plot}' --extra_plot_method '${plots_options.extra_plot}' @@ -44,20 +45,22 @@ ]]> + - + - + @@ -92,6 +95,7 @@ + @@ -101,6 +105,7 @@ + @@ -110,6 +115,7 @@ + @@ -119,6 +125,7 @@ + @@ -128,6 +135,7 @@ + @@ -137,13 +145,25 @@ + + + + + + + + + + + + @@ -152,6 +172,7 @@ + @@ -159,9 +180,18 @@ + + + + + + + + + @@ -169,9 +199,19 @@ **What it does** -Generate read count maps from alignment BAM files, using pysam and lattice. +Plots maps of (1) read counts, (2) mean sizes, (3) median sizes, (4) coverage depth or (5) +size read distribution along chromosome references. + +Mean sizes and median sizes are the mean and the median sizes, respectively, of all reads +whose 5' end map to a given coordinate in a chromosome reference. +Coverage depths are computed from the input bam alignment files using the python pysam module. -In addition to the read counts (lower graphs), median size, mean size and coverage depth of reads(lower graphs) mapping at a given position are plotted. +The variables mentioned above (1-5) can be plotted either separately or in all possible +pairwise combinations. + +For comparison purpose, values from bam alignment files can be normalized by a size factor +before plotting. If the normalization field is leaved blank, default normalization of 1 +is assumed. **Inputs** @@ -180,6 +220,9 @@ - single-read - sorted - mapping to the same reference + +Optionally, a space-separated list of normalization/size factors may be added before plotting. +This list maps to the selected bam alignments from bottom to top. **Output** diff -r 12c14642e6ac -r a3be3601bcb3 test-data/coverage_normed.pdf Binary file test-data/coverage_normed.pdf has changed diff -r 12c14642e6ac -r a3be3601bcb3 test-data/size-count_normed.pdf Binary file test-data/size-count_normed.pdf has changed