view bamparse.py @ 2:8ea06787c08a draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 968c9ab925ed768027ff8012d0ff6410fc24f079
author artbio
date Tue, 09 Oct 2018 17:14:57 -0400
parents ae9ea0488850
children
line wrap: on
line source

#!/usr/bin/env python
import argparse
from collections import defaultdict

import pysam


def Parser():
    the_parser = argparse.ArgumentParser()
    the_parser.add_argument('--output', nargs='+', action='store', type=str,
                            help='Count tables')
    the_parser.add_argument('--alignments', nargs='+',
                            help="bam alignments files")
    the_parser.add_argument('--labels', nargs='+', help="Alignments labels")
    the_parser.add_argument('--number',
                            choices=["unique", "multiple"],
                            help="output is a single table or multiple tables")
    args = the_parser.parse_args()
    return args


def get_counts(bamfile):
    """
    Takes an AlignmentFile object and returns a dictionary of counts for sense,
    antisense, or both sense and antisense bam alignments to the references,
    depending on the pre-treatment performed by sambamba in the xml wrapper
    """
    counts = defaultdict(int)
    for ref_name in bamfile.references:
        counts[ref_name] = 0
    for ref_name in bamfile.references:
        counts[ref_name] = bamfile.count(reference=ref_name)
    return counts


def writetable(diclist, labels, output, number):
    ''' diclist is a list of count dictionnaries '''
    countlists = []
    for dic in diclist:
        counts = sorted(dic.items())
        counts = [j for (i, j) in counts]
        countlists.append(counts)
    if number == "unique":
        out = open("outputdir/table.tabular", "w")
        out.write("gene\t%s\n" % "\t".join(labels))
        for countline in zip(sorted(diclist[0]), *countlists):
            line = [str(i) for i in countline]
            out.write("%s\n" % "\t".join(line))
        out.close()
    else:
        for i, (dic, label) in enumerate(zip(diclist, labels)):
            out = open("outputdir/table" + str(i) + ".tabular", "w")
            out.write("gene\t%s\n" % label)
            for gene in sorted(dic):
                out.write("%s\t%s\n" % (gene, dic[gene]))
            out.close()


def main(alignments, labels, output, number):
    diclist = []
    for file in alignments:
        bam_object = pysam.AlignmentFile(file, 'rb')
        diclist.append(get_counts(bam_object))
    writetable(diclist, labels, output, number)


if __name__ == "__main__":
    args = Parser()
    main(args.alignments, args.labels, args.output, args.number)