Mercurial > repos > artbio > mapping_quality_stats
changeset 0:f00479673d47 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mapping_quality_stats commit e4b37874b820a2ac48732667128a08e5755b7c4b
author | artbio |
---|---|
date | Wed, 15 Jun 2022 10:43:07 +0000 |
parents | |
children | 7883d97fa479 |
files | mapping_quality_stats.py mapping_quality_stats.r mapping_quality_stats.xml test-data/distribution.pdf test-data/distribution.tab test-data/sample.bam |
diffstat | 6 files changed, 161 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mapping_quality_stats.py Wed Jun 15 10:43:07 2022 +0000 @@ -0,0 +1,36 @@ +import argparse +from collections import defaultdict + +import pysam + + +def Parser(): + the_parser = argparse.ArgumentParser() + the_parser.add_argument('-bam', '--bam', dest='bam', required=True, + help='input BAM file') + the_parser.add_argument('-o', '--output', dest='distribution', + required=True, + help='tabular output for mapq distribution') + args = the_parser.parse_args() + return args + + +def collect_mapq(bam, out): + samfile = pysam.AlignmentFile(bam, "rb") + mapq_dict = defaultdict(int) + for read in samfile: + mapq_dict[read.mapping_quality] += 1 + with open(out, 'w') as out: + out.write('mapq\tnumber_of_alignments\n') + for quality in sorted(mapq_dict): + out.write(f"{quality}\t{mapq_dict[quality]}\n") + return mapq_dict + + +def main(bam, out): + collect_mapq(bam, out) + + +if __name__ == "__main__": + args = Parser() + main(args.bam, args.distribution)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mapping_quality_stats.r Wed Jun 15 10:43:07 2022 +0000 @@ -0,0 +1,31 @@ +## Setup R error handling to go to stderr +options(show.error.messages = FALSE, + error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) + } +) + + +warnings() +library(optparse) +library(ggplot2) + +option_list <- list( + make_option(c("-i", "--input"), type = "character", help = "Path to tabular file"), + make_option(c("-o", "--output"), type = "character", help = "path to the pdf plot") + ) + +parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) +args <- parse_args(parser) + +# data frame implementation +table <- read.delim(args$input, header = TRUE) +colnames(table) <- c("MAPQ", "Counts") + + +# Barplot +pdf(file = args$output) +ggplot(table, aes(x = MAPQ, y = Counts)) + + geom_bar(stat = "identity") +devname <- dev.off()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mapping_quality_stats.xml Wed Jun 15 10:43:07 2022 +0000 @@ -0,0 +1,62 @@ +<tool id="mapqstatistics" name="Mapping Quality Stats" version="0.16.0.1+galaxy0"> + <description></description> + <requirements> + <requirement type="package" version="1.6.4=r36h6115d3f_0">r-optparse</requirement> + <requirement type="package" version="3.2.1=r36h6115d3f_0">r-ggplot2</requirement> + <requirement type="package" version="0.16.0.1">pysam</requirement> + </requirements> + <stdio> + <exit_code range="1:" level="fatal" description="Tool exception" /> + </stdio> + <command detect_errors="exit_code"><![CDATA[ + ln -f -s $input.metadata.bam_index input.bam.bai && + ln -s $input input.bam && + python $__tool_directory__/mapping_quality_stats.py + -bam input.bam + -o $table && + Rscript $__tool_directory__/mapping_quality_stats.r + -i '$table' + -o $plot + ]]></command> +<inputs> + <param name="input" type="data" format="bam" label="Select a bam file to analyze"/> +</inputs> + + <outputs> + <data format="tabular" name="table" label="Distribution table" /> + <data format="pdf" name="plot" label="Distribution of MAPQs" /> +</outputs> + + <tests> + <test> + <param name="input" value="sample.bam" ftype="bam"/> + <output file="distribution.tab" name="table" /> + <output file="distribution.pdf" name="plot" /> + </test> + </tests> + + +<help> + +**What it does** + +Collects the values of mapping quality (MAPQ) in a BAM files +Shows the data as a table and a barplot + +**Inputs** + +A bam alignment files which must be sorted + +**Output** + +A data frame of MAPQ counts + +A pdf barplot generated by R and ggplot2 + +</help> + +<citations> + <citation type="doi">10.1093/bioinformatics/btp352</citation> +</citations> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/distribution.tab Wed Jun 15 10:43:07 2022 +0000 @@ -0,0 +1,32 @@ +mapq number_of_alignments +0 54 +6 1 +8 2 +12 1 +14 2 +16 1 +17 1 +21 3 +23 1 +24 1 +25 1 +30 2 +33 6 +40 9 +44 2 +45 9 +46 6 +47 180 +48 196 +49 60 +50 17 +51 7 +52 11 +53 4 +54 6 +55 1 +56 8 +57 9 +58 10 +59 10 +60 10496