annotate bamparse.py @ 2:8ea06787c08a draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 968c9ab925ed768027ff8012d0ff6410fc24f079
author artbio
date Tue, 09 Oct 2018 17:14:57 -0400
parents ae9ea0488850
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
1 #!/usr/bin/env python
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
2 import argparse
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
3 from collections import defaultdict
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
4
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
5 import pysam
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
6
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
7
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
8 def Parser():
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
9 the_parser = argparse.ArgumentParser()
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
10 the_parser.add_argument('--output', nargs='+', action='store', type=str,
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
11 help='Count tables')
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
12 the_parser.add_argument('--alignments', nargs='+',
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
13 help="bam alignments files")
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
14 the_parser.add_argument('--labels', nargs='+', help="Alignments labels")
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
15 the_parser.add_argument('--number',
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
16 choices=["unique", "multiple"],
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
17 help="output is a single table or multiple tables")
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
18 args = the_parser.parse_args()
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
19 return args
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
20
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
21
2
8ea06787c08a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 968c9ab925ed768027ff8012d0ff6410fc24f079
artbio
parents: 1
diff changeset
22 def get_counts(bamfile):
0
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
23 """
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
24 Takes an AlignmentFile object and returns a dictionary of counts for sense,
2
8ea06787c08a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 968c9ab925ed768027ff8012d0ff6410fc24f079
artbio
parents: 1
diff changeset
25 antisense, or both sense and antisense bam alignments to the references,
8ea06787c08a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 968c9ab925ed768027ff8012d0ff6410fc24f079
artbio
parents: 1
diff changeset
26 depending on the pre-treatment performed by sambamba in the xml wrapper
0
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
27 """
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
28 counts = defaultdict(int)
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
29 for ref_name in bamfile.references:
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
30 counts[ref_name] = 0
2
8ea06787c08a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 968c9ab925ed768027ff8012d0ff6410fc24f079
artbio
parents: 1
diff changeset
31 for ref_name in bamfile.references:
8ea06787c08a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 968c9ab925ed768027ff8012d0ff6410fc24f079
artbio
parents: 1
diff changeset
32 counts[ref_name] = bamfile.count(reference=ref_name)
0
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
33 return counts
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
34
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
35
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
36 def writetable(diclist, labels, output, number):
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
37 ''' diclist is a list of count dictionnaries '''
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
38 countlists = []
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
39 for dic in diclist:
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
40 counts = sorted(dic.items())
1
ae9ea0488850 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 9aec415b3fcc66bf3ed2aad43fd70b57bfe2fb18
artbio
parents: 0
diff changeset
41 counts = [j for (i, j) in counts]
0
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
42 countlists.append(counts)
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
43 if number == "unique":
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
44 out = open("outputdir/table.tabular", "w")
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
45 out.write("gene\t%s\n" % "\t".join(labels))
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
46 for countline in zip(sorted(diclist[0]), *countlists):
1
ae9ea0488850 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 9aec415b3fcc66bf3ed2aad43fd70b57bfe2fb18
artbio
parents: 0
diff changeset
47 line = [str(i) for i in countline]
0
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
48 out.write("%s\n" % "\t".join(line))
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
49 out.close()
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
50 else:
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
51 for i, (dic, label) in enumerate(zip(diclist, labels)):
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
52 out = open("outputdir/table" + str(i) + ".tabular", "w")
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
53 out.write("gene\t%s\n" % label)
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
54 for gene in sorted(dic):
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
55 out.write("%s\t%s\n" % (gene, dic[gene]))
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
56 out.close()
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
57
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
58
2
8ea06787c08a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 968c9ab925ed768027ff8012d0ff6410fc24f079
artbio
parents: 1
diff changeset
59 def main(alignments, labels, output, number):
0
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
60 diclist = []
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
61 for file in alignments:
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
62 bam_object = pysam.AlignmentFile(file, 'rb')
2
8ea06787c08a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 968c9ab925ed768027ff8012d0ff6410fc24f079
artbio
parents: 1
diff changeset
63 diclist.append(get_counts(bam_object))
0
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
64 writetable(diclist, labels, output, number)
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
65
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
66
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
67 if __name__ == "__main__":
2a1a2bc6ae8b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff changeset
68 args = Parser()
2
8ea06787c08a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 968c9ab925ed768027ff8012d0ff6410fc24f079
artbio
parents: 1
diff changeset
69 main(args.alignments, args.labels, args.output, args.number)