Mercurial > repos > youngkim > ezbamqc
changeset 9:6610eedd9fae
Uploaded
line wrap: on
line diff
--- a/BAMqc.sh Wed Mar 30 12:03:10 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,122 +0,0 @@ -#!/bin/sh - -### Galaxy Wrapper for BAMqc ### - -alignment_files="" -refgene="" -attrID="" -rRNA="" -outputHTML="" -stranded="" -mapq="30" -lowBound="-250" -upperBound="250" -stepSize="5" -labels="" -cores="1" - -ARGS=$(getopt -o "r:f:R:o:s:p:q:" -- "$@") - -if [ $? -ne 0 ]; then - echo "Invalid command-line parameters. Do not use this script outside of Galaxy" >&2 - exit 1 -fi - -eval set -- "$ARGS" - -while [ $# -gt 0 ]; do - case "$1" in - -r) - refgene="$2" - shift 2 - ;; - -f) - attrID="$2" - shift 2 - ;; - -R) - rRNA="$2" - shift 2 - ;; - -o) - outputHTML=$2 - shift 2 - ;; - -s) - stranded=$2 - shift 2 - ;; - -q) - mapq=$2 - shift 2 - ;; - - -p) - cores=$2 - shift 2 - ;; - --) - shift - break - ;; - esac -done - -if [ "$cores" -gt 10 ];then - cores="10" -fi - -outputDir=`echo $outputHTML | sed 's/\.dat$/_files/'` -if [ ! -d "$outputDir" ]; then - mkdir $outputDir -fi - -touch bamqc.log - -while [ "$#" -ne 0 ]; -do - FILE="$1" - LABEL=`echo $2 | sed 's/ /-/g; s/\[//; s/\]//;'` - shift 2 - QNAME_SORTED=`samtools view -H ${FILE} | grep "SO:queryname"` - if [ $? -ne 0 ]; then - BASE=`basename ${FILE} \.dat` - echo "Sorting BAM file (${LABEL}." >>samtools.log - samtools sort -@ 5 -n ${FILE} ${BASE} 2>>samtools.log - if [ $? -ne 0 ]; then - echo "Error with samtools sorting for BAM file (${LABEL})." >&2 - cat samtools.log >&2 - exit 1 - fi - echo "BAM file (${LABEL}) was re-sorted by query name." >>bamqc.log - FILELIST="$FILELIST ${BASE}.bam" - else - FILELIST="$FILELIST $FILE" - fi - LABELLIST="$LABELLIST $LABEL" -done - -CMD="ezBAMQC -i $FILELIST -l $LABELLIST -f $attrID -r $refgene -o Galaxy_BAMqc_output --stranded $stranded -q $mapq --rRNA $rRNA -t $cores" - -echo "BAMqc command: $CMD" >> bamqc.log -echo >> bamqc.log - -$CMD 2>> bamqc.log - -if [ $? -ne 0 ]; then - echo "BAMqc ran with errors" >&2 - cat bamqc.log >&2 - exit 1 -fi - -sed -i "s/\.\.\/Galaxy_BAMqc_output\///g;" Galaxy_BAMqc_output/bamqc_output.html - -cp -r Galaxy_BAMqc_output/data "$outputDir" -cp -r Galaxy_BAMqc_output/figs "$outputDir" -cp Galaxy_BAMqc_output/bamqc_output.html "$outputHTML" - -if [ $? -ne 0 ]; then - echo "Copying BAMqc results failed" >&2 -fi - -echo "BAMqc results copied to $outputDir" >>bamqc.log
--- a/BAMqc.xml Wed Mar 30 12:03:10 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,109 +0,0 @@ -<tool id="cshl_BAMqc" name="ezBAMQC" version="0.6.7" > - - <description> - performs QC on BAM files for gene abundances and sample correlation - </description> - - <requirements> - <requirement type="package">BAMqc</requirement> - <requirement type="package">samtools</requirement> - <requirement type="package">R</requirement> - </requirements> - - <command interpreter="sh"> - - BAMqc.sh - - -r '${refdb}' - - -f '${attrID}' - - -R '${rRNAdb}' - - -s '$stranded' - - -o "$output" - - #set $core = len($files) - - -p $core - - #if str($cond_adv_options.adv_options) == 'yes': - -q '$cond_adv_options.mapq' - #end if - - #for $file in $files - '$file.input' - '$file.input.tag' - #end for - - </command> - - <inputs> - <repeat name="files" title="BAM files" min="1"> - <param format="bam" name="input" type="data" label="Files for QC" /> - </repeat> - - <param name="refdb" type="select" label="Reference gene model (GTF)"> - <options from_data_table="gene_GTF_database" /> - </param> - - <param name="attrID" type="text" size="50" value="gene_id" label="Feature ID name" help="Summing reads based on gene (gene_id) or transcript (transcript_id."> - <sanitizer> - <valid initial="none"> - <add value="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890_-." /> - </valid> - </sanitizer> - </param> - - <param name="rRNAdb" type="select" label="Ribosomal RNA locations (BED)"> - <options from_data_table="rRNA_BED_database" /> - </param> - - <param name="stranded" type="select" label="Strandedness"> - <option value="yes">Yes - Read from stranded library</option> - <option value="no">No - Reads from unstranded library</option> - <option value="reverse">Reverse - reverse-stranded library (e.g. NSR)</option> - </param> - - <conditional name="cond_adv_options"> - <param name="adv_options" type="select" label="Set advanced options"> - <option value="no" selected="true">No</option> - <option value="yes">Yes</option> - </param> - - <when value="yes"> - <param name="mapq" type="integer" value="30" label="Minimum mapping quality for an alignment to be called uniquely mapped" /> - </when> - - <when value="no" /> - </conditional> - - </inputs> - - <outputs> - <data format="html" name="output" label="BAM QC on $on_tag_string" /> - <data format="txt" name="log" from_work_dir="bamqc.log" label="ezBAMQC log output" hidden="True" /> - </outputs> - - <help> - -**What it does** - -This tool takes the mapping results from RNA-Seq libraries (BAM), and performs rapid gene abundance quantification. If multiple files are provided, it will calculate and display correlation between each sample. - ------ - -The Galaxy wrapper for this tool is written by the `Cold Spring Harbor Laboratory`_ `Bioinformatics Shared Resources`_. - -ezBAMQC_ is written by the `Molly Hammell Laboratory`_ and the `Bioinformatics Shared Resources`_ at CSHL_. - -.. _CSHL: `Cold Spring Harbor Laboratory`_ -.. _ezBAMQC: http://hammelllab.labsites.cshl.edu/software#ezBAMQC -.. _`Molly Hammell Laboratory`: http://hammelllab.labsites.cshl.edu/ -.. _`Cold Spring Harbor Laboratory`: http://www.cshl.edu/ -.. _`Bioinformatics Shared Resources`: http://bioinfo.cshl.edu/index.html - - </help> -</tool> -
--- a/cshl_geneGTF.loc.sample Wed Mar 30 12:03:10 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,20 +0,0 @@ -## -## Location of gene annotations GTF -## -## Format is: -## value<TAB>Name<TAB>dbkey<TAB>ID -## -## Name can contain spaces - -/localdata1/annotations/GTF/latest/hg19_refGene.gtf hg19 UCSC refSeq hg19 hg19_refgene -/localdata1/annotations/GTF/latest/hg18_refGene.gtf hg18 UCSC refSeq hg18 hg18_refgene -/localdata1/annotations/GTF/latest/mm10_refGene.gtf mm10 UCSC refSeq mm10 mm10_refgene -/localdata1/annotations/GTF/latest/mm9_refGene.gtf mm9 UCSC refSeq mm9 mm9_refgene -/localdata1/annotations/GTF/latest/dm3_refGene.gtf dm3 UCSC refSeq dm3 dm3_refgene -/localdata1/annotations/GTF/refseq_2015_01_20/rn5_refGene.gtf rn5 UCSC refSeq rn5 rn5_refgene -#/localdata1/annotations/GTF/latest/rn4_refGene.gtf rn4 UCSC refSeq rn4 rn4_refgene -#/localdata1/annotations/GTF/latest/ce6_refGene.gtf ce6 UCSC refSeq ce6 ce6_refgene -#/localdata1/annotations/GTF/latest/ce10_refGene.gtf ce10 UCSC refSeq ce10 ce10_refgene -#/localdata1/annotations/GTF/igenomes_2014_08_28/ZmAGPv2_iGenome_genes.gtf Maize (AGPv2) iGenomes gene info ZmAGPv2 ZmAGPv2_genes -#/localdata1/annotations/GTF/igenomes_2014_08_28/ZmAGPv3_iGenome_genes.gtf Maize (AGPv3) iGenomes gene info ZmAGPv3 ZmAGPv3_genes -#/localdata1/annotations/GTF/igenomes_2014_08_28/tair10_iGenome_genes.gtf Arabidopsis (TAIR10) iGenomes gene info
--- a/cshl_rRNA_BED.loc.sample Wed Mar 30 12:03:10 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,19 +0,0 @@ -## -## Location of rRNA BED files -## -## Format is: -## value<TAB>Name<TAB>dbkey<TAB>ID -## -## Name can contain spaces - -#/localdata1/annotations/BED_files/rRNA/hg19_rmsk_rRNA.bed hg38 repeatMasker rRNA hg38 hg38_rRNA -/localdata1/annotations/BED_files/rRNA/hg19_rmsk_rRNA.bed hg19 repeatMasker rRNA hg19 hg19_rRNA -/localdata1/annotations/BED_files/rRNA/hg18_rmsk_rRNA.bed hg18 repeatMasker rRNA hg18 hg18_rRNA -/localdata1/annotations/BED_files/rRNA/mm10_rmsk_rRNA.bed mm10 repeatMasker rRNA mm10 mm10_rRNA -/localdata1/annotations/BED_files/rRNA/mm9_rmsk_rRNA.bed mm9 repeatMasker rRNA mm9 mm9_rRNA -#/localdata1/annotations/BED_files/rRNA/dm6_rmsk_rRNA.bed dm6 repeatMasker rRNA dm6 dm6_rRNA -/localdata1/annotations/BED_files/rRNA/dm3_rmsk_rRNA.bed dm3 repeatMasker rRNA dm3 dm3_rRNA -/localdata1/annotations/BED_files/rRNA/rn5_rmsk_rRNA.bed rn5 repeatMasker rRNA rn5 rn5_rRNA -#/localdata1/annotations/BED_files/rRNA/rn4_rmsk_rRNA.bed rn4 repeatMasker rRNA rn4 rn4_rRNA -#/localdata1/annotations/BED_files/rRNA/ce6_rmsk_rRNA.bed ce6 repeatMasker rRNA ce6 ce6_rRNA -#/localdata1/annotations/BED_files/rRNA/ce10_rmsk_rRNA.bed ce10 repeatMasker rRNA ce10 ce10_rRNA \ No newline at end of file
--- a/ezBAMQC/Makefile Wed Mar 30 12:03:10 2016 -0400 +++ b/ezBAMQC/Makefile Wed Mar 30 12:11:46 2016 -0400 @@ -1,6 +1,6 @@ # Makefile for ezBAMQC, utilities for the Sequence Alignment/Map format. # -# Version 0.6.5 +# Version 0.6.7 # # Copyright (C) 2015 Bioinformatics Shared Resource, CSHL. # Portions copyright (C) 2015 Cold Spring Harbor Laboratory.
--- a/ezBAMQC/README.rst Wed Mar 30 12:03:10 2016 -0400 +++ b/ezBAMQC/README.rst Wed Mar 30 12:11:46 2016 -0400 @@ -1,184 +1,186 @@ -.. image:: https://raw.githubusercontent.com/mhammell-laboratory/bamqc/master/doc/bamqc-icon.png - :width: 200 px - :alt: generated at codeology.braintreepayments.com/mhammell-laboratory/bamqc - :align: right - :target: http://codeology.braintreepayments.com/mhammell-laboratory/bamqc - -===== -ezBAMQC -===== -*"ezBAMQC, a tool to check the quality of mapped next generation sequencing files."* - -:Description: - - ezBAMQC is a tool to check the quality of either one or many mapped next-generation-sequencing datasets. It conducts comprehensive evaluations of aligned sequencing data from multiple aspects including: clipping profile, mapping quality distribution, mapped read length distribution, genomic/transcriptomic mapping distribution, inner distance distribution (for paired-end reads), ribosomal RNA contamination, transcript 5’ and 3’ end bias, transcription dropout rate, sample correlations, sample reproducibility, sample variations. It outputs a set of tables and plots and one HTML page that contains a summary of the results. Many metrics are designed for RNA-seq data specifically, but ezBAMQC can be applied to any mapped sequencing dataset such as RNA-seq, CLIP-seq, GRO-seq, ChIP-seq, DNA-seq and so on. :: - -:Links: - - `Github Page <https://github.com/mhammell-laboratory/bamqc>`_ - - `Pypi Page <https://pypi.python.org/pypi/ezBAMQC>`_ - - `MHammell Lab <http://hammelllab.labsites.cshl.edu/software>`_ - -:Authors: - Ying Jin, David Molik, and Molly Hammell - -:Version: 0.6.5 - -:Contact: - Ying Jin (yjin@cshl.edu) - -Installation guide for ezBAMQC for from source installs -===================================================== - -When installing ezBAMQC there are several options, but the main point is: since ezBAMQC uses C++ STD 11 you'll need a version of GCC that can support that, this useally means 4.8 or 4.9. beyond that, you'll need Python, R and Corrplot for interfacing with the C code. - -:Intallation: - `Source Code <https://github.com/mhammell-laboratory/ezBAMQC/releases>`_ - - `Pypi <https://pypi.python.org/pypi?:action=display&name=ezBAMQC>`_ - -:Prerequisites: - * `python2.7 <https://www.python.org/download/releases/2.7/>`_ - * `R <https://www.r-project.org/>`_ - * `corrplot <https://cran.r-project.org/web/packages/corrplot/>`_ - * `GCC 4.8.1 or greater <https://gcc.gnu.org/gcc-4.8/>`_ GCC 4.9.1 or greater is recomended for PyPi install - -:Notes: - * While there are multiple methods of installing the prerequistes it may help to look at (if using a yum based linux distro):* - * `Devtoolset-3 <https://access.redhat.com/documentation/en-US/Red_Hat_Developer_Toolset/3/html/User_Guide/sect-Red_Hat_Developer_Toolset-Install.html>`_ for GCC compilers - * `IUS <https://ius.io/>`_ for Python2.7 - * `Software Collections <https://www.softwarecollections.org/>`_ for collections of software (like devtoolset 3 or python) - * `rpmfinder <https://www.rpmfind.net/>`_ for searching rpms across mutliple systems - -Setup -===== - -1) Make sure that the GCC comiler is in your PATH: - -:: - - export PATH=/path/to/gcc:$PATH - -2) Make sure that python2.7 is in your PYTHONPATH: - -:: - - export PYTHONPATH=/path/to/python2.7/site-packages:$PYTHONPATH - -3) There are three methods of installation of ezBAMQC, from source, from setup.py, and from pypi, once prequistes are setup. - -From Source -~~~~~~~~~~~ - -1) Download source - -2) Unpack tarball and go to the directory of the package: - -:: - - tar xvfz bamqc-0.6.6.tar.gz - - cd bamqc-0.6.6 - -3) Run make: - -:: - - make - -From Setup.py -~~~~~~~~~~~~~ - -:: - - python2.7 setup.py install - -From Pypi -~~~~~~~~~ - -:: - - pip2.7 install BAMqc - -Usage -===== - -:: - - ezBAMQC [-h] -i alignment_files [alignment_files ...] -r [refgene] - [-f [attrID]] [--rRNA [rRNA]] -o [dir] [--stranded [stranded]] - [-q [mapq]] [-l labels [labels ...]] [-t NUMTHREADS] - -optional arguments: - -:: - - -h, --help show this help message and exit. - -i, --inputFile alignment files. Could be multiple SAM/BAM files separated by space. Required. - -r, --refgene gene annotation file in GTF format. Required - -f the read summation at which feature level in the GTF file. DEFAULT: gene_id. - --rRNA rRNA coordinates in BED format. - -o, --outputDir output directory. Required. - --stranded strandness of the library? - yes : sense stranded - reverse : reverse stranded - no : not stranded - DEFAULT: yes. - -q, --mapq Minimum mapping quality (phred scaled) for an alignment to be called uniquely mapped. DEFAULT:30 - -l, --label Labels of input files. DEFAULT:smp1 smp2 ... - -t, --threads Number of threads to use. DEFAULT:1 - -Example: - -:: - - ezBAMQC -i test-data/exp_data/treat1.bam test-data/exp_data/treat2.bam test-data/exp_data/treat3.bam -r test-data/exp_data/hg9_refGene.gtf -q 30 --rRNA test-data/exp_data/hg19_rRNA.bed -o exp_output2 - - Please find the example output from folder test-data. - -FAQ -==== -Q: Why use ezBAMQC? - -A: ezBAMQC is efficient and easy to use. With one command line, it reports a comprehensive evaluation of the data with a set of plots and tables.The ability to assess multiple samples together with high efficiency make it especially useful in cases where there are a large number of samples from the same condition, genotype, or treatment. ezBAMQC was written in C++ and supports multithreading. A mouse RNA-seq sample with 120M alignments can be done in 8 minutes with 5 threads. - -Q: Why the total number of reads reported by ezBAMQC does not match with samtools flagstat? - -A: The difference is because of non-uniquely mapped reads or multiply aligned reads (multi-reads). Samtools flagstat counts each multiple aligment as a different reads, but ezBAMQC counts reads accoriding to the read ID, i.e., each individual read will be counted once no matter that it is a uniquely mapped read or multi-read. - -Q: What is "Low Quality Reads" ? - -A: Reads marked as qc fail accoriding to SAM format or reads with mapping quality lower than the value set by the option -q will be considered as "Low Quality Reads". - -Q: How the setting of option -q alter the results? - -A: Reads with low quality, i.e., did not pass -q cutoff, are only counted in Total Reads, Mapped Reads, and Mappability by mapping quality plot. The rest of the report does not include low quality reads. - -Q: Do multi-reads (non-uniquely mapped reads) have been considered in Read distribution and gene quantification? - -A: No. Only uniquely mapped reads were counted. - - -Acknowledgements -================ - -#) Samtools contributors -#) Users' valuable feedback - -Copying & Distribution -====================== - -ezBAMQC is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but *WITHOUT ANY WARRANTY*; without even the implied warranty of -*MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE*. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with ezBAMQC. If not, see `this website <http://www.gnu.org/licenses/>`_ +======= +ezBAMQC +======= + +*"ezBAMQC, a tool to check the quality of mapped next generation sequencing files."* + +:Codeology Icon: + + .. image:: https://raw.githubusercontent.com/mhammell-laboratory/bamqc/master/doc/bamqc-icon.gif + :alt: generated at codeology.braintreepayments.com/mhammell-laboratory/bamqc + :align: right + :target: http://codeology.braintreepayments.com/mhammell-laboratory/bamqc + +:Description: + + ezBAMQC is a tool to check the quality of either one or many mapped next-generation-sequencing datasets. It conducts comprehensive evaluations of aligned sequencing data from multiple aspects including: clipping profile, mapping quality distribution, mapped read length distribution, genomic/transcriptomic mapping distribution, inner distance distribution (for paired-end reads), ribosomal RNA contamination, transcript 5’ and 3’ end bias, transcription dropout rate, sample correlations, sample reproducibility, sample variations. It outputs a set of tables and plots and one HTML page that contains a summary of the results. Many metrics are designed for RNA-seq data specifically, but ezBAMQC can be applied to any mapped sequencing dataset such as RNA-seq, CLIP-seq, GRO-seq, ChIP-seq, DNA-seq and so on. + +:Links: + + `Github Page <https://github.com/mhammell-laboratory/bamqc>`_ + + `Pypi Page <https://pypi.python.org/pypi/ezBAMQC>`_ + + `MHammell Lab <http://hammelllab.labsites.cshl.edu/software>`_ + +:Authors: + Ying Jin, David Molik, and Molly Hammell + +:Version: 0.6.7 + +:Contact: + Ying Jin (yjin@cshl.edu) + +Installation guide for ezBAMQC for from source installs +======================================================= + +When installing ezBAMQC there are several options, but the main point is: since ezBAMQC uses C++ STD 11 you'll need a version of GCC that can support that, this useally means 4.8 or 4.9. beyond that, you'll need Python, R and Corrplot for interfacing with the C code. + +:Intallation: + `Source Code <https://github.com/mhammell-laboratory/ezBAMQC/releases>`_ + + `Pypi <https://pypi.python.org/pypi?:action=display&name=ezBAMQC>`_ + +:Prerequisites: + * `python2.7 <https://www.python.org/download/releases/2.7/>`_ + * `R <https://www.r-project.org/>`_ + * `corrplot <https://cran.r-project.org/web/packages/corrplot/>`_ + * `GCC 4.8.1 or greater <https://gcc.gnu.org/gcc-4.8/>`_ GCC 4.9.1 or greater is recomended for PyPi install + +:Notes: + * While there are multiple methods of installing the prerequistes it may help to look at (if using a yum based linux distro):* + * `Devtoolset-3 <https://access.redhat.com/documentation/en-US/Red_Hat_Developer_Toolset/3/html/User_Guide/sect-Red_Hat_Developer_Toolset-Install.html>`_ for GCC compilers + * `IUS <https://ius.io/>`_ for Python2.7 + * `Software Collections <https://www.softwarecollections.org/>`_ for collections of software (like devtoolset 3 or python) + * `rpmfinder <https://www.rpmfind.net/>`_ for searching rpms across mutliple systems + +Setup +===== + +1) Make sure that the GCC comiler is in your PATH: + +:: + + export PATH=/path/to/gcc:$PATH + +2) Make sure that python2.7 is in your PYTHONPATH: + +:: + + export PYTHONPATH=/path/to/python2.7/site-packages:$PYTHONPATH + +3) There are three methods of installation of ezBAMQC, from source, from setup.py, and from pypi, once prequistes are setup. + +From Source +~~~~~~~~~~~ + +1) Download source + +2) Unpack tarball and go to the directory of the package: + +:: + + tar xvfz bamqc-0.6.7.tar.gz + + cd bamqc-0.6.7 + +3) Run make: + +:: + + make + +From Setup.py +~~~~~~~~~~~~~ + +:: + + python2.7 setup.py install + +From Pypi +~~~~~~~~~ + +:: + + pip2.7 install BAMqc + +Usage +===== + +:: + + ezBAMQC [-h] -i alignment_files [alignment_files ...] -r [refgene] + [-f [attrID]] [--rRNA [rRNA]] -o [dir] [--stranded [stranded]] + [-q [mapq]] [-l labels [labels ...]] [-t NUMTHREADS] + +optional arguments: + +:: + + -h, --help show this help message and exit. + -i, --inputFile alignment files. Could be multiple SAM/BAM files separated by space. Required. + -r, --refgene gene annotation file in GTF format. Required + -f the read summation at which feature level in the GTF file. DEFAULT: gene_id. + --rRNA rRNA coordinates in BED format. + -o, --outputDir output directory. Required. + --stranded strandness of the library? + yes : sense stranded + reverse : reverse stranded + no : not stranded + DEFAULT: yes. + -q, --mapq Minimum mapping quality (phred scaled) for an alignment to be called uniquely mapped. DEFAULT:30 + -l, --label Labels of input files. DEFAULT:smp1 smp2 ... + -t, --threads Number of threads to use. DEFAULT:1 + +Example: + +:: + + ezBAMQC -i test-data/exp_data/treat1.bam test-data/exp_data/treat2.bam test-data/exp_data/treat3.bam -r test-data/exp_data/hg9_refGene.gtf -q 30 --rRNA test-data/exp_data/hg19_rRNA.bed -o exp_output2 + + Please find the example output from folder test-data. + +FAQ +=== +Q: Why use ezBAMQC? + +A: ezBAMQC is efficient and easy to use. With one command line, it reports a comprehensive evaluation of the data with a set of plots and tables.The ability to assess multiple samples together with high efficiency make it especially useful in cases where there are a large number of samples from the same condition, genotype, or treatment. ezBAMQC was written in C++ and supports multithreading. A mouse RNA-seq sample with 120M alignments can be done in 8 minutes with 5 threads. + +Q: Why the total number of reads reported by ezBAMQC does not match with samtools flagstat? + +A: The difference is because of non-uniquely mapped reads or multiply aligned reads (multi-reads). Samtools flagstat counts each multiple aligment as a different reads, but ezBAMQC counts reads accoriding to the read ID, i.e., each individual read will be counted once no matter that it is a uniquely mapped read or multi-read. + +Q: What is "Low Quality Reads" ? + +A: Reads marked as qc fail accoriding to SAM format or reads with mapping quality lower than the value set by the option -q will be considered as "Low Quality Reads". + +Q: How the setting of option -q alter the results? + +A: Reads with low quality, i.e., did not pass -q cutoff, are only counted in Total Reads, Mapped Reads, and Mappability by mapping quality plot. The rest of the report does not include low quality reads. + +Q: Do multi-reads (non-uniquely mapped reads) have been considered in Read distribution and gene quantification? + +A: No. Only uniquely mapped reads were counted. + + +Acknowledgements +================ + +#) Samtools contributors +#) Users' valuable feedback + +Copying & Distribution +====================== + +ezBAMQC is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but *WITHOUT ANY WARRANTY*; without even the implied warranty of +*MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE*. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with ezBAMQC. If not, see `this website <http://www.gnu.org/licenses/>`_
--- a/ezBAMQC/ezBAMQC Wed Mar 30 12:03:10 2016 -0400 +++ b/ezBAMQC/ezBAMQC Wed Mar 30 12:11:46 2016 -0400 @@ -12,7 +12,7 @@ @status: -@version: 0.6.6 +@version: 0.6.7 ''' @@ -872,7 +872,7 @@ f.write("nz_gene_mm[i] = length(which(M1[,i]>0))/nz_genes * 100 } \n") - f.write("bplt <- barplot(nz_gene_mm,beside=T,border='NA',space=1.5,ylim=c(0,100),ylab='Genes reproducibly detected (%)',col='blue',names.arg=colnames(MM))\n") + f.write("bplt <- barplot(nz_gene_mm,beside=T,border='NA',space=1.5,ylim=c(0,100),ylab='Genes reproducibly detected (%)',col='blue',names.arg=colnames(MM),las=2)\n") f.write("text(y= nz_gene_mm+2, x= bplt, labels=paste(as.character(round(nz_gene_mm,digits=1)),'%',sep=''), xpd=TRUE)\n") @@ -956,7 +956,7 @@ - f.write('barplot(Fn_mm,main="Gene abundance (RPM)",xlab="Sample",ylab="Frequency",col=c("green","blue","red","yellow"),legend=xname)\n') + f.write('barplot(Fn_mm,main="Gene abundance (RPM)",xlab="Sample",ylab="Frequency",col=c("green","blue","red","yellow"),legend=xname,las=2)\n') f.write("dev.state = dev.off()\n") @@ -1022,7 +1022,7 @@ - f.write('barplot(Fn_mm,xlab="Sample",main="Mapping Quality",ylim=c(0,1),ylab="Frequency",col=c("blue","green","yellow","orange","red"),legend=xname)\n') + f.write('barplot(Fn_mm,xlab="Sample",main="Mapping Quality",ylim=c(0,1),ylab="Frequency",col=c("blue","green","yellow","orange","red"),legend=xname,las=2)\n') f.write("dev.state = dev.off()\n")
--- a/ezBAMQC/setup.py Wed Mar 30 12:03:10 2016 -0400 +++ b/ezBAMQC/setup.py Wed Mar 30 12:11:46 2016 -0400 @@ -89,28 +89,28 @@ sys.exit() BAMQC_HEADER = [ - 'src/bamqc/Constants.h', - 'src/bamqc/Coverage_prof.h', - 'src/bamqc/GeneFeatures.h', - 'src/bamqc/InnerDist_prof.h', - 'src/bamqc/IntervalTree.h', - 'src/bamqc/Mappability.h', - 'src/bamqc/parseBAM.h', - 'src/bamqc/ReadDup_prof.h', - 'src/bamqc/Results.h', - 'src/bamqc/rRNA.h' + 'src/ezBAMQC/Constants.h', + 'src/ezBAMQC/Coverage_prof.h', + 'src/ezBAMQC/GeneFeatures.h', + 'src/ezBAMQC/InnerDist_prof.h', + 'src/ezBAMQC/IntervalTree.h', + 'src/ezBAMQC/Mappability.h', + 'src/ezBAMQC/parseBAM.h', + 'src/ezBAMQC/ReadDup_prof.h', + 'src/ezBAMQC/Results.h', + 'src/ezBAMQC/rRNA.h' ] BAMQC_SOURCE = [ - 'src/bamqc/Coverage_prof.cpp', - 'src/bamqc/GeneFeatures.cpp', - 'src/bamqc/InnerDist_prof.cpp', - 'src/bamqc/IntervalTree.cpp', - 'src/bamqc/Mappability.cpp', - 'src/bamqc/parseBAM.cpp', - 'src/bamqc/ReadDup_prof.cpp', - 'src/bamqc/Results.cpp', - 'src/bamqc/rRNA.cpp' + 'src/ezBAMQC/Coverage_prof.cpp', + 'src/ezBAMQC/GeneFeatures.cpp', + 'src/ezBAMQC/InnerDist_prof.cpp', + 'src/ezBAMQC/IntervalTree.cpp', + 'src/ezBAMQC/Mappability.cpp', + 'src/ezBAMQC/parseBAM.cpp', + 'src/ezBAMQC/ReadDup_prof.cpp', + 'src/ezBAMQC/Results.cpp', + 'src/ezBAMQC/rRNA.cpp' ] ###TODO HAVE TO SPLIT INTO TWO AND MAKE THE A FILE @@ -174,7 +174,7 @@ BAMqc_CFLAGS = ['-fpermissive','-O3','-std=c++11','-Wno-error=declaration-after-statement'] BAMqc_DFLAGS = [('_FILE_OFFSET_BITS','64'),('_LARGEFILE64_SOURCE',''),('_CURSES_LIB','1')] BAMqc_INCLUDES = ['./src/htslib'] -BAMqc_HEADERS = ['./src/bamqc'] +BAMqc_HEADERS = ['./src/ezBAMQC'] BAMqc_EXTRA = ['build/lib.linux-x86_64-2.7/htslib.so'] htslib_CFLAGS = ['-Wno-error=declaration-after-statement'] @@ -182,7 +182,7 @@ htslib_DFLAGS = [('_FILE_OFFSET_BITS','64'),('_USE_KNETFILE','')] setup(name = "ezBAMQC", - version = "0.6.5", + version = "0.6.7", description = 'Quality control tools for NGS alignment file', keywords = 'Quality control BAM file', # make sure to add all the nessacary requires
--- a/tool_data_table_conf.xml.sample Wed Mar 30 12:03:10 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -<tables>cat - <!-- Locations of gene model GTFs --> - <table name="gene_GTF_database" comment_char="#"> - <columns>value, name, dbkey, id</columns> - <file path="tool-data/cshl_geneGTF.loc" /> - </table> - <!-- Locations of rRNA BED files for BAMqc --> - <table name="rRNA_BED_database" comment_char="#"> - <columns>value, name, dbkey, id</columns> - <file path="tool-data/cshl_rRNA_BED.loc" /> - </table> -</tables> \ No newline at end of file