diff bamparse.py @ 0:2a1a2bc6ae8b draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
author artbio
date Fri, 13 Oct 2017 02:59:36 -0400
parents
children ae9ea0488850
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bamparse.py	Fri Oct 13 02:59:36 2017 -0400
@@ -0,0 +1,94 @@
+#!/usr/bin/env python
+import argparse
+from collections import defaultdict
+
+import pysam
+
+
+def Parser():
+    the_parser = argparse.ArgumentParser()
+    the_parser.add_argument('--output', nargs='+', action='store', type=str,
+                            help='Count tables')
+    the_parser.add_argument('--polarity',
+                            choices=["sense", "antisense", "both"],
+                            help="forward, reverse or both forward an\
+                                reverse reads are counted")
+    the_parser.add_argument('--alignments', nargs='+',
+                            help="bam alignments files")
+    the_parser.add_argument('--labels', nargs='+', help="Alignments labels")
+    the_parser.add_argument('--number',
+                            choices=["unique", "multiple"],
+                            help="output is a single table or multiple tables")
+    args = the_parser.parse_args()
+    return args
+
+
+def get_counts(bamfile, polarity="both"):
+    """
+    Takes an AlignmentFile object and returns a dictionary of counts for sense,
+    antisense, or both sense and antisense reads aligning to the bam references
+    """
+    def filter_sense_read(read):
+        if read.is_reverse:
+            return 0
+        else:
+            return 1
+
+    def filter_antisense_read(read):
+        if read.is_reverse:
+            return 1
+        else:
+            return 0
+
+    counts = defaultdict(int)
+    for ref_name in bamfile.references:
+        counts[ref_name] = 0
+    if polarity == "both":
+        for ref_name in bamfile.references:
+            counts[ref_name] = bamfile.count(reference=ref_name)
+    if polarity == "sense":
+        for ref_name in bamfile.references:
+            for read in bamfile.fetch(ref_name):
+                counts[ref_name] += filter_sense_read(read)
+    if polarity == "antisense":
+        for ref_name in bamfile.references:
+            for read in bamfile.fetch(ref_name):
+                counts[ref_name] += filter_antisense_read(read)
+    return counts
+
+
+def writetable(diclist, labels, output, number):
+    ''' diclist is a list of count dictionnaries '''
+    countlists = []
+    for dic in diclist:
+        counts = sorted(dic.items())
+        counts = [j for (i,j) in counts]
+        countlists.append(counts)
+    if number == "unique":
+        out = open("outputdir/table.tabular", "w")
+        out.write("gene\t%s\n" % "\t".join(labels))
+        for countline in zip(sorted(diclist[0]), *countlists):
+            line = [ str(i) for i in countline]
+            out.write("%s\n" % "\t".join(line))
+        out.close()
+    else:
+        for i, (dic, label) in enumerate(zip(diclist, labels)):
+            out = open("outputdir/table" + str(i) + ".tabular", "w")
+            out.write("gene\t%s\n" % label)
+            for gene in sorted(dic):
+                out.write("%s\t%s\n" % (gene, dic[gene]))
+            out.close()
+
+
+
+def main(alignments, labels, polarity, output, number):
+    diclist = []
+    for file in alignments:
+        bam_object = pysam.AlignmentFile(file, 'rb')
+        diclist.append(get_counts(bam_object, polarity=polarity))
+    writetable(diclist, labels, output, number)
+
+
+if __name__ == "__main__":
+    args = Parser()
+    main(args.alignments, args.labels, args.polarity, args.output, args.number)