Mercurial > repos > iuc > ribowaltz_plot
changeset 0:8e903cb3f919 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ribowaltz commit ff002df702f544829d1b500ac4b517c1e70ad14d
author | iuc |
---|---|
date | Thu, 22 Sep 2022 20:30:20 +0000 |
parents | |
children | e25d81465c23 |
files | macros.xml ribowaltz.R ribowaltz_plot.R ribowaltz_plot.xml test-data/rep1.bam test-data/rep1.rdata test-data/rep1_annot.gtf.gz |
diffstat | 7 files changed, 564 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Sep 22 20:30:20 2022 +0000 @@ -0,0 +1,28 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="1.2.0">ribowaltz</requirement> + <requirement type="package" version="1.20.3">r-getopt</requirement> + </requirements> + </xml> + <token name="@TOOL_VERSION@">1.2.0</token> + <token name="@PROFILE@">21.05</token> + <xml name="edam_ontology"> + <edam_topics> + <edam_topic>topic_4027</edam_topic> + </edam_topics> + <edam_operations> + <edam_operation>operation_0439</edam_operation> + </edam_operations> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1006169</citation> + </citations> + </xml> + <xml name="xrefs"> + <xrefs> + <xref type='bio.tools'>riboWaltz</xref> + </xrefs> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ribowaltz.R Thu Sep 22 20:30:20 2022 +0000 @@ -0,0 +1,131 @@ +# setup R error handling to go to stderr +options(show.error.messages = FALSE, error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) +}) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") +library("getopt") +options(stringAsFactors = FALSE, useFancyQuotes = FALSE) +args <- commandArgs(trailingOnly = TRUE) + +# get options, using the spec as defined by the enclosed list. +# we read the options from the default: commandArgs(TRUE). +spec <- matrix(c( + "quiet", "q", 0, "logical", + "help", "h", 0, "logical", + "bamdir", "b", 1, "character", + "gtffile", "g", 1, "character", + "codon_coverage_info", "Y", 1, "character", + "cds_coverage_info", "Z", 1, "character", + "psite_info_rdata", "O", 0, "character", + "refseq_sep", "s", 0, "character", + "params_duplicate_filterting", "d", 0, "character", + "params_peridiocity_filterting", "l", 0, "character", + "params_custom_filterting", "c", 0, "character", + "params_psite_additional", "p", 0, "character", + "params_coverage_additional", "C", 0, "character" +), byrow = TRUE, ncol = 4) +opt <- getopt(spec) + +# if help was asked for print a friendly message +# and exit with a non-zero error code +if (!is.null(opt$help)) { + cat(getopt(spec, usage = TRUE)) + q(status = 1) +} + +verbose <- is.null(opt$quiet) + +library("riboWaltz") + +# create annotation data table +annotation_dt <- create_annotation(opt$gtffile) + +sep <- opt$refseq_sep +if (opt$refseq_sep == "") { + sep <- NULL +} +# convert alignments in BAM files into list of data tables +reads_list <- bamtolist(bamfolder = opt$bamdir, annotation = annotation_dt, refseq_sep = sep) + +library("jsonlite") +# remove duplicate reads +if (!is.null(opt$params_duplicate_filterting)) { + json_duplicate_filterting <- fromJSON(opt$params_duplicate_filterting) + reads_list <- duplicates_filter( + data = reads_list, + extremity = json_duplicate_filterting$extremity, + keep = json_duplicate_filterting$keep + ) +} + +# selection of read lengths - periodicity filtering +if (!is.null(opt$params_peridiocity_filterting)) { + json_peridiocity_filterting <- fromJSON(opt$params_peridiocity_filterting) + reads_list <- length_filter( + data = reads_list, + length_filter_mode = "periodicity", + periodicity_threshold = json_peridiocity_filterting$periodicity_threshold + ) +} +# selection of read lengths - length range filtering +if (!is.null(opt$params_custom_filterting)) { + json_custom_filterting <- fromJSON(opt$params_custom_filterting) + reads_list <- length_filter( + data = reads_list, + length_filter_mode = "custom", + length_range = json_custom_filterting$length_range + ) +} + +# compute P-site offset +json_psite_additional <- fromJSON(opt$params_psite_additional) +psite_offset <- psite( + reads_list, + start = json_psite_additional$use_start, + flanking = json_psite_additional$flanking, + extremity = json_psite_additional$psite_extrimity, + plot = TRUE, + cl = json_psite_additional$cl, + plot_format = "pdf", + plot_dir = "plots" +) +psite_offset + +reads_psite_list <- psite_info(reads_list, psite_offset) +reads_psite_list +# write a separate P-site offset info table for each sample +for (sample in names(reads_psite_list)) { + write.table( + reads_psite_list[[sample]], + file = paste(sample, "psite_info.tsv", sep = "_"), + sep = "\t", row.names = FALSE, quote = FALSE + ) + print(paste(sample, "psite_info.tsv", sep = "_")) +} + +# write R object to a file +if (!is.null(opt$psite_info_rdata)) { + save(reads_psite_list, annotation_dt, file = opt$psite_info_rdata) +} + +json_coverage_additional <- fromJSON(opt$params_coverage_additional) +# codon coverage +codon_coverage_out <- codon_coverage( + reads_psite_list, + annotation_dt, + psite = json_coverage_additional$psites_per_region, + min_overlap = json_coverage_additional$min_overlap +) +write.table(codon_coverage_out, file = opt$codon_coverage_info, sep = "\t", row.names = FALSE, quote = FALSE) + +# CDS coverage +cds_coverage_out <- cds_coverage( + reads_psite_list, + annotation_dt, + start_nts = json_coverage_additional$start_nts, + stop_nts = json_coverage_additional$stop_nts +) +write.table(cds_coverage_out, file = opt$cds_coverage_info, sep = "\t", row.names = FALSE, quote = FALSE)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ribowaltz_plot.R Thu Sep 22 20:30:20 2022 +0000 @@ -0,0 +1,152 @@ +# setup R error handling to go to stderr +options(show.error.messages = FALSE, error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) +}) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") +library("getopt") +options(stringAsFactors = FALSE, useFancyQuotes = FALSE) +args <- commandArgs(trailingOnly = TRUE) + +# get options, using the spec as defined by the enclosed list. +# we read the options from the default: commandArgs(TRUE). +spec <- matrix(c( + "quiet", "q", 0, "logical", + "help", "h", 0, "logical", + "input_rdata", "i", 1, "character", + "params_rlength_distr", "r", 0, "character", + "params_rends_heat", "e", 0, "character", + "region_psite_plot", "R", 0, "logical", + "params_trint_periodicity", "t", 0, "character", + "params_metaplots", "m", 0, "character", + "params_codon_usage_psite", "u", 0, "character" +), byrow = TRUE, ncol = 4) +opt <- getopt(spec) + +# if help was asked for print a friendly message +# and exit with a non-zero error code +if (!is.null(opt$help)) { + cat(getopt(spec, usage = TRUE)) + q(status = 1) +} + +verbose <- is.null(opt$quiet) + +library("riboWaltz") +library("jsonlite") + +load(opt$input_rdata) + +if (!is.null(opt$params_rlength_distr)) { + pdf("read_lengths.pdf") + json_rlength_distr <- fromJSON(opt$params_rlength_distr) + length_dist <- rlength_distr( + reads_psite_list, + sample = names(reads_psite_list), + cl = json_rlength_distr$cl, + multisamples = json_rlength_distr$multisamples, + plot_style = json_rlength_distr$plot_style + ) + print(length_dist) + dev.off() +} + +if (!is.null(opt$params_rends_heat)) { + pdf("read_ends_heatmap.pdf", height = 5 * length(reads_psite_list), width = 15) + json_rends_heat <- fromJSON(opt$params_rends_heat) + for (sample_name in names(reads_psite_list)) { + ends_heatmap <- rends_heat( + reads_psite_list, + annotation_dt, + sample = sample_name, + cl = json_rends_heat$cl, + utr5l = json_rends_heat$utr5l, + cdsl = json_rends_heat$cdsl, + utr3l = json_rends_heat$utr3l + ) + print(ends_heatmap[["plot"]]) + } + dev.off() +} + +if (!is.null(opt$region_psite_plot)) { + pdf("psites_per_region.pdf", height = 12, width = 7 * length(reads_psite_list)) + psite_region <- region_psite(reads_psite_list, annotation_dt, sample = names(reads_psite_list)) + print(psite_region[["plot"]]) + dev.off() +} + +if (!is.null(opt$params_trint_periodicity)) { + pdf("trinucleotide_periodicity.pdf", height = 6 * length(reads_psite_list), width = 10) + json_trint_periodicity <- fromJSON(opt$params_trint_periodicity) + frames_stratified <- frame_psite_length( + reads_psite_list, + sample = names(reads_psite_list), + cl = json_trint_periodicity$cl, + region = json_trint_periodicity$region, + length_range = json_trint_periodicity$length_range + ) + frames_stratified[["plot"]] + frames <- frame_psite_length( + reads_psite_list, + sample = names(reads_psite_list), + region = json_trint_periodicity$region, + length_range = json_trint_periodicity$length_range + ) + print(frames[["plot"]]) + dev.off() +} + +if (!is.null(opt$params_metaplots)) { + pdf("metaplots.pdf", height = 5 * length(reads_psite_list), width = 24) + json_metaplots <- fromJSON(opt$params_metaplots) + metaprofile <- metaprofile_psite( + reads_psite_list, + annotation_dt, + sample = names(reads_psite_list), + multisamples = json_metaplots$multisamples, + plot_style = json_metaplots$plot_style, + length_range = json_metaplots$length_range, + frequency = json_metaplots$frequency, + utr5l = json_metaplots$utr5l, + cdsl = json_metaplots$cdsl, + utr3l = json_metaplots$utr3l, + plot_title = "sample.transcript.length_range" + ) + print(metaprofile) + sample_list <- list() + for (sample_name in names(reads_psite_list)) { + sample_list[[sample_name]] <- c(sample_name) + } + metaheatmap <- metaheatmap_psite( + reads_psite_list, + annotation_dt, + sample = sample_list, + length_range = json_metaplots$length_range, + utr5l = json_metaplots$utr5l, + cdsl = json_metaplots$cdsl, + utr3l = json_metaplots$utr3l, + plot_title = "Comparison metaheatmap" + ) + print(metaheatmap[["plot"]]) + dev.off() +} + +if (!is.null(opt$params_codon_usage_psite)) { + pdf("codon_usage.pdf", height = 6, width = 16) + json_codon_usage_psite <- fromJSON(opt$params_codon_usage_psite) + for (sample_name in names(reads_psite_list)) { + cu_barplot <- codon_usage_psite( + reads_psite_list, + annotation_dt, + sample = sample_name, + fastapath = json_codon_usage_psite$fastapath, + fasta_genome = FALSE, + frequency_normalization = json_codon_usage_psite$frequency + ) + print(cu_barplot[["plot"]]) + } + dev.off() +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ribowaltz_plot.xml Thu Sep 22 20:30:20 2022 +0000 @@ -0,0 +1,253 @@ +<tool id="ribowaltz_plot" name="riboWaltz-plot" version="@VERSION@" profile="@PROFILE@"> + <description>visual inspection of ribosome profiling data</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro='requirements'/> + <expand macro='edam_ontology' /> + <expand macro='xrefs'/> + <command detect_errors="exit_code"><![CDATA[ + Rscript '${__tool_directory__}/ribowaltz_plot.R' -i '$input_rdata' + #import json + #if $rlength_distr.plot == 'yes': + #set params_rlength_distr = [] + #silent $params_rlength_distr.append( + {"cl": int($rlength_distr.plot_options.cl), "multisamples": str($rlength_distr.plot_options.multisamples), + "plot_style": str($rlength_distr.plot_options.plot_style)}) + --params_rlength_distr '#echo json.dumps($params_rlength_distr)#' + #end if + #if $rends_heat.plot == 'yes': + #set params_rends_heat = [] + #silent $params_rends_heat.append( + {"cl": int($rends_heat.plot_options.cl), "utr5l": int($rends_heat.plot_options.utr5l), + "cdsl": int($rends_heat.plot_options.cdsl), "utr3l": int($rends_heat.plot_options.utr3l)}) + --params_rends_heat '#echo json.dumps($params_rends_heat)#' + #end if + #if $region_psite: + --region_psite_plot + #end if + #if $trint_periodicity.plot == 'yes': + #set length_range = 'all' + #if $trint_periodicity.plot_options.length_range.filter == 'range': + #$length_range = str($trint_periodicity.plot_options.length_range.length_range_min) + ':' + str($trint_periodicity.plot_options.length_range.length_range_max) + #end if + #set params_trint_periodicity = [] + #silent $params_trint_periodicity.append( + {"cl": int($trint_periodicity.plot_options.cl), "region": str($trint_periodicity.plot_options.region), + "length_range": $length_range}) + --params_trint_periodicity '#echo json.dumps($params_trint_periodicity)#' + #end if + #if $metaplots.plot == 'yes': + #set length_range = 'all' + #if $metaplots.plot_options.length_range.filter == 'range': + #$length_range = str($metaplots.plot_options.length_range.length_range_min) + ':' + str($metaplots.plot_options.length_range.length_range_max) + #end if + #set params_metaplots = [] + #silent $params_metaplots.append( + {"multisamples": str($metaplots.plot_options.multisamples), "plot_style": str($metaplots.plot_options.plot_style), + "length_range": $length_range, "frequency": bool($metaplots.plot_options.frequency), + "utr5l": int($metaplots.plot_options.utr5l), "cdsl": int($metaplots.plot_options.cdsl), + "utr3l": int($metaplots.plot_options.utr3l)}) + --params_metaplots '#echo json.dumps($params_metaplots)#' + #end if + #if $codon_usage_psite.plot == 'yes': + #set params_codon_usage_psite = [] + #silent $params_codon_usage_psite.append( + {"fastapath": str($codon_usage_psite.plot_options.fastapath), "frequency": bool($codon_usage_psite.plot_options.frequency), + "label_scatter": bool($codon_usage_psite.plot_options.label_scatter), + "label_number": int($codon_usage_psite.plot_options.label_number)}) + --params_codon_usage_psite '#echo json.dumps($params_codon_usage_psite)#' + #end if + ]]></command> + <inputs> + <param name="input_rdata" type="data" format="rdata" label="RDATA file generated by riboWaltz tool"/> + <conditional name="rlength_distr"> + <param name="plot" type="select" label="Include read length distribution plots?"> + <option value="no">no</option> + <option value="yes">yes</option> + </param> + <when value="yes"> + <section name="plot_options" title="Plot options"> + <param name="cl" type="integer" value="100" min="1" max="100" label="Confidence level"/> + <param name="multisamples" type="select" label="How to handle multiple samples and replicates?"> + <option value="separated">Separate plots</option> + <option value="average">Sinlge plot with mean signal</option> + </param> + <param name="plot_style" type="select" label="How to organize and display multiple bar plots??"> + <option value="split">Separate plots</option> + <option value="dodged">Sinlge plot with mean signal</option> + </param> + </section> + </when> + <when value="no"/> + </conditional> + <conditional name="rends_heat"> + <param name="plot" type="select" label="Inlcude plots about abundance reads extremities around start and stop codons?"> + <option value="no">no</option> + <option value="yes">yes</option> + </param> + <when value="yes"> + <section name="plot_options" title="Plot options"> + <param name="cl" type="integer" value="95" min="1" max="100" label="Confidence level"/> + <param name="utr5l" type="integer" value="50" min="0" label="5' UTR region flanking the start codon"/> + <param name="cdsl" type="integer" value="50" min="0" label="CDS region flanking both the start and stop codon"/> + <param name="utr3l" type="integer" value="50" min="0" label="3' UTR region flanking the stop codon"/> + </section> + </when> + <when value="no"/> + </conditional> + <param name="region_psite" type="boolean" truevalue="1" falsevalue="0" checked="false" + label="Inlcude plots that show P-sites per region?"/> + <conditional name="trint_periodicity"> + <param name="plot" type="select" label="Inlcude plots with Trinucleotide periodicity?"> + <option value="no">no</option> + <option value="yes">yes</option> + </param> + <when value="yes"> + <section name="plot_options" title="Plot options"> + <param name="cl" type="integer" value="95" min="1" max="100" label="Confidence level"/> + <param name="region" type="select" label="Region(s) of the apartment to be analyzed"> + <option value="all" selected="true">All</option> + <option value="5end">5' UTR</option> + <option value="3end">3' UTR</option> + </param> + <conditional name="length_range"> + <param name="filter" type="select" label="Read length(s) to be included in the analysis"> + <option value="all" selected="true">All</option> + <option value="range">based on read length ranges</option> + </param> + <when value="all" /> + <when value="range"> + <param name="length_range_min" value="1" type="integer" min="1" + label="Read lengths ranging from"/> + <param name="length_range_max" value="100" type="integer" min="1" + label="Read lengths ranging to"/> + </when> + </conditional> + </section> + </when> + <when value="no"/> + </conditional> + <conditional name="metaplots"> + <param name="plot" type="select" label="Inlcude metaplots?"> + <option value="no">no</option> + <option value="yes">yes</option> + </param> + <when value="yes"> + <section name="plot_options" title="Plot options"> + <param name="multisamples" type="select" label="How to handle multiple samples and replicates?"> + <option value="separated">Separate plots</option> + <option value="average">Sinlge plot with mean signal</option> + <option value="sum">Sinlge plot with sum of the signal</option> + </param> + <param name="plot_style" type="select" label="How to organize and display multiple bar plots??"> + <option value="split">Separate plots</option> + <option value="dodged">Sinlge plot with mean signal</option> + </param> + <conditional name="length_range"> + <param name="filter" type="select" label="Read length(s) to be included in the analysis"> + <option value="all" selected="true">All</option> + <option value="range">based on read length ranges</option> + </param> + <when value="all" /> + <when value="range"> + <param name="length_range_min" value="1" type="integer" min="1" + label="Read lengths ranging from"/> + <param name="length_range_max" value="100" type="integer" min="1" + label="Read lengths ranging to"/> + </when> + </conditional> + <param name="frequency" type="boolean" truevalue="1" falsevalue="0" checked="false" + label="Normalize the metaprofile(s) such that the area under the curve(s) is 1?" + help="If checked and if multisamples is set to average or sum, normalization is performed + before combining the signal from multiple samples"/> + <param name="utr5l" type="integer" value="50" min="0" label="5' UTR region flanking the start codon"/> + <param name="cdsl" type="integer" value="50" min="0" label="CDS region flanking both the start and stop codon"/> + <param name="utr3l" type="integer" value="25" min="0" label="3' UTR region flanking the stop codon"/> + </section> + </when> + <when value="no"/> + </conditional> + <conditional name="codon_usage_psite"> + <param name="plot" type="select" label="Inlcude plots with codon usage?"> + <option value="no">no</option> + <option value="yes">yes</option> + </param> + <when value="yes"> + <section name="plot_options" title="Plot options"> + <param name="fastapath" type="data" format="fasta" /> + <param name="frequency" type="boolean" truevalue="1" falsevalue="0" checked="true" + label="normalize the 64 codon usage indexes for the corresponding codon frequencies in coding sequences?"/> + <param name="label_scatter" type="boolean" truevalue="1" falsevalue="0" checked="false" + label="Label the dots in the scatter plot?" + help=" This parameter is considered only if there exatcly two input samples"/> + <param name="label_number" type="integer" value="64" min="1" max="64" label="how many dots in the scatter plot should be labeled?"/> + </section> + </when> + <when value="no"/> + </conditional> + </inputs> + <outputs> + <collection name="out_plots" type="list" label="riboWaltz plots on ${on_string}"> + <discover_datasets pattern="(?P<designation>.+)\.pdf" format="pdf" directory="." visible="false"/> + </collection> + </outputs> + <tests> + <test expect_num_outputs="1"> + <param name="input_rdata" value="rep1.rdata"/> + <param name="region_psite" value="1"/> + <conditional name="rlength_distr"> + <param name="plot" value="yes"/> + </conditional> + <conditional name="rends_heat"> + <param name="plot" value="yes"/> + </conditional> + <conditional name="trint_periodicity"> + <param name="plot" value="yes"/> + </conditional> + <conditional name="metaplots"> + <param name="plot" value="yes"/> + </conditional> + <output_collection name="out_plots" type="list"> + <element name="metaplots"> + <assert_contents> + <has_size value="9180" delta="100"/> + </assert_contents> + </element> + <element name="psites_per_region"> + <assert_contents> + <has_size value="5187" delta="100"/> + </assert_contents> + </element> + <element name="read_ends_heatmap"> + <assert_contents> + <has_size value="26327" delta="500"/> + </assert_contents> + </element> + <element name="read_lengths"> + <assert_contents> + <has_size value="4877" delta="100"/> + </assert_contents> + </element> + <element name="trinucleotide_periodicity"> + <assert_contents> + <has_size value="7730" delta="100"/> + </assert_contents> + </element> + </output_collection> + </test> + </tests> + <help><![CDATA[ +Visual inspection of ribosome profiling data. More information can be found here: https://github.com/LabTranslationalArchitectomics/riboWaltz + +**Inputs** + +RDATA file generated by rioWaltz tool. + +**Outputs** + +Generates various plots to visualize P-site offsets, codon usage etc. + + ]]></help> + <expand macro="citations" /> +</tool>