Mercurial > repos > artbio > bamparse
diff bamparse.py @ 0:2a1a2bc6ae8b draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
author | artbio |
---|---|
date | Fri, 13 Oct 2017 02:59:36 -0400 |
parents | |
children | ae9ea0488850 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bamparse.py Fri Oct 13 02:59:36 2017 -0400 @@ -0,0 +1,94 @@ +#!/usr/bin/env python +import argparse +from collections import defaultdict + +import pysam + + +def Parser(): + the_parser = argparse.ArgumentParser() + the_parser.add_argument('--output', nargs='+', action='store', type=str, + help='Count tables') + the_parser.add_argument('--polarity', + choices=["sense", "antisense", "both"], + help="forward, reverse or both forward an\ + reverse reads are counted") + the_parser.add_argument('--alignments', nargs='+', + help="bam alignments files") + the_parser.add_argument('--labels', nargs='+', help="Alignments labels") + the_parser.add_argument('--number', + choices=["unique", "multiple"], + help="output is a single table or multiple tables") + args = the_parser.parse_args() + return args + + +def get_counts(bamfile, polarity="both"): + """ + Takes an AlignmentFile object and returns a dictionary of counts for sense, + antisense, or both sense and antisense reads aligning to the bam references + """ + def filter_sense_read(read): + if read.is_reverse: + return 0 + else: + return 1 + + def filter_antisense_read(read): + if read.is_reverse: + return 1 + else: + return 0 + + counts = defaultdict(int) + for ref_name in bamfile.references: + counts[ref_name] = 0 + if polarity == "both": + for ref_name in bamfile.references: + counts[ref_name] = bamfile.count(reference=ref_name) + if polarity == "sense": + for ref_name in bamfile.references: + for read in bamfile.fetch(ref_name): + counts[ref_name] += filter_sense_read(read) + if polarity == "antisense": + for ref_name in bamfile.references: + for read in bamfile.fetch(ref_name): + counts[ref_name] += filter_antisense_read(read) + return counts + + +def writetable(diclist, labels, output, number): + ''' diclist is a list of count dictionnaries ''' + countlists = [] + for dic in diclist: + counts = sorted(dic.items()) + counts = [j for (i,j) in counts] + countlists.append(counts) + if number == "unique": + out = open("outputdir/table.tabular", "w") + out.write("gene\t%s\n" % "\t".join(labels)) + for countline in zip(sorted(diclist[0]), *countlists): + line = [ str(i) for i in countline] + out.write("%s\n" % "\t".join(line)) + out.close() + else: + for i, (dic, label) in enumerate(zip(diclist, labels)): + out = open("outputdir/table" + str(i) + ".tabular", "w") + out.write("gene\t%s\n" % label) + for gene in sorted(dic): + out.write("%s\t%s\n" % (gene, dic[gene])) + out.close() + + + +def main(alignments, labels, polarity, output, number): + diclist = [] + for file in alignments: + bam_object = pysam.AlignmentFile(file, 'rb') + diclist.append(get_counts(bam_object, polarity=polarity)) + writetable(diclist, labels, output, number) + + +if __name__ == "__main__": + args = Parser() + main(args.alignments, args.labels, args.polarity, args.output, args.number)