changeset 0:f00479673d47 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mapping_quality_stats commit e4b37874b820a2ac48732667128a08e5755b7c4b
author artbio
date Wed, 15 Jun 2022 10:43:07 +0000
parents
children 7883d97fa479
files mapping_quality_stats.py mapping_quality_stats.r mapping_quality_stats.xml test-data/distribution.pdf test-data/distribution.tab test-data/sample.bam
diffstat 6 files changed, 161 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mapping_quality_stats.py	Wed Jun 15 10:43:07 2022 +0000
@@ -0,0 +1,36 @@
+import argparse
+from collections import defaultdict
+
+import pysam
+
+
+def Parser():
+    the_parser = argparse.ArgumentParser()
+    the_parser.add_argument('-bam', '--bam', dest='bam', required=True,
+                            help='input BAM file')
+    the_parser.add_argument('-o', '--output', dest='distribution',
+                            required=True,
+                            help='tabular output for mapq distribution')
+    args = the_parser.parse_args()
+    return args
+
+
+def collect_mapq(bam, out):
+    samfile = pysam.AlignmentFile(bam, "rb")
+    mapq_dict = defaultdict(int)
+    for read in samfile:
+        mapq_dict[read.mapping_quality] += 1
+    with open(out, 'w') as out:
+        out.write('mapq\tnumber_of_alignments\n')
+        for quality in sorted(mapq_dict):
+            out.write(f"{quality}\t{mapq_dict[quality]}\n")
+    return mapq_dict
+
+
+def main(bam, out):
+    collect_mapq(bam, out)
+
+
+if __name__ == "__main__":
+    args = Parser()
+    main(args.bam, args.distribution)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mapping_quality_stats.r	Wed Jun 15 10:43:07 2022 +0000
@@ -0,0 +1,31 @@
+## Setup R error handling to go to stderr
+options(show.error.messages = FALSE,
+        error = function() {
+            cat(geterrmessage(), file = stderr())
+            q("no", 1, FALSE)
+        }
+)
+
+
+warnings()
+library(optparse)
+library(ggplot2)
+
+option_list <- list(
+    make_option(c("-i", "--input"), type = "character", help = "Path to tabular file"),
+    make_option(c("-o", "--output"), type = "character", help = "path to the pdf plot")
+    )
+
+parser <- OptionParser(usage = "%prog [options] file", option_list = option_list)
+args <- parse_args(parser)
+
+# data frame implementation
+table <- read.delim(args$input, header = TRUE)
+colnames(table) <- c("MAPQ", "Counts")
+
+
+# Barplot
+pdf(file = args$output)
+ggplot(table, aes(x = MAPQ, y = Counts)) +
+    geom_bar(stat = "identity")
+devname <- dev.off()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mapping_quality_stats.xml	Wed Jun 15 10:43:07 2022 +0000
@@ -0,0 +1,62 @@
+<tool id="mapqstatistics" name="Mapping Quality Stats" version="0.16.0.1+galaxy0">
+  <description></description>
+  <requirements>
+        <requirement type="package" version="1.6.4=r36h6115d3f_0">r-optparse</requirement>
+        <requirement type="package" version="3.2.1=r36h6115d3f_0">r-ggplot2</requirement>
+        <requirement type="package" version="0.16.0.1">pysam</requirement>
+  </requirements>
+  <stdio>
+      <exit_code range="1:" level="fatal" description="Tool exception" />
+  </stdio>
+  <command detect_errors="exit_code"><![CDATA[
+    ln -f -s $input.metadata.bam_index input.bam.bai &&
+    ln -s $input input.bam &&
+    python $__tool_directory__/mapping_quality_stats.py
+        -bam input.bam
+        -o $table &&
+    Rscript $__tool_directory__/mapping_quality_stats.r
+        -i '$table'
+        -o $plot
+  ]]></command>
+<inputs>
+    <param name="input" type="data" format="bam" label="Select a bam file to analyze"/>
+</inputs>
+
+ <outputs>
+   <data format="tabular" name="table" label="Distribution table" />
+   <data format="pdf" name="plot" label="Distribution of MAPQs" />
+</outputs>
+
+    <tests>
+        <test>
+            <param name="input" value="sample.bam" ftype="bam"/>
+            <output file="distribution.tab" name="table" />
+            <output file="distribution.pdf" name="plot" />
+        </test>
+    </tests>
+
+
+<help>
+
+**What it does**
+
+Collects the values of mapping quality (MAPQ) in a BAM files
+Shows the data as a table and a barplot
+
+**Inputs**
+
+A bam alignment files which must be sorted
+
+**Output**
+
+A data frame of MAPQ counts
+
+A pdf barplot generated by R and ggplot2
+
+</help>
+
+<citations>
+    <citation type="doi">10.1093/bioinformatics/btp352</citation>
+</citations>
+</tool>
+
Binary file test-data/distribution.pdf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/distribution.tab	Wed Jun 15 10:43:07 2022 +0000
@@ -0,0 +1,32 @@
+mapq	number_of_alignments
+0	54
+6	1
+8	2
+12	1
+14	2
+16	1
+17	1
+21	3
+23	1
+24	1
+25	1
+30	2
+33	6
+40	9
+44	2
+45	9
+46	6
+47	180
+48	196
+49	60
+50	17
+51	7
+52	11
+53	4
+54	6
+55	1
+56	8
+57	9
+58	10
+59	10
+60	10496
Binary file test-data/sample.bam has changed