17
|
1 #!/usr/bin/env python
|
|
2 import sys
|
|
3 import re
|
|
4 from collections import defaultdict
|
|
5 import argparse
|
19
|
6 import csv
|
17
|
7
|
|
8 parser = argparse.ArgumentParser()
|
|
9 parser.add_argument("-i" ,"--input", type=argparse.FileType('r'), help="path to file CLUSTER_table.csv")
|
|
10 parser.add_argument("-o" ,"--output", type=argparse.FileType('w'), help="output file name")
|
|
11 parser.add_argument("-m", "--use_manual", action='store_true', default=False)
|
|
12
|
|
13 args = parser.parse_args()
|
|
14
|
|
15 column = 6 if args.use_manual else 4
|
19
|
16 if args.use_manual:
|
|
17 annotation="Final_annotation"
|
|
18 else:
|
|
19 annotation="Automatic_annotation"
|
17
|
20
|
|
21 header = False
|
|
22 clust_info = {}
|
|
23 counts = defaultdict(lambda: 0)
|
|
24 top_clusters = 0
|
|
25 with open(args.input.name, 'r') as f:
|
19
|
26 csv_reader = csv.reader(f, delimiter = "\t")
|
|
27 for parts in csv_reader:
|
|
28 if len(parts) == 0:
|
|
29 continue
|
|
30 if parts[0] == "Cluster" and parts[1]== "Supercluster":
|
17
|
31 header = True
|
19
|
32 header_columns = parts
|
|
33 column = header_columns.index(annotation)
|
17
|
34 continue
|
|
35 if header:
|
|
36 classification = "Top_clusters\t" + "\t".join(parts[column].split("/")[1:]).replace('"','')
|
|
37 counts[classification] += int(parts[3])
|
|
38 top_clusters += int(parts[3])
|
19
|
39 elif len(parts) >= 2:
|
|
40 try:
|
|
41 clust_info[parts[0].replace('"', '')] = int(parts[1])
|
|
42 except ValueError:
|
|
43 pass
|
17
|
44
|
|
45
|
|
46 counts['Singlets'] = clust_info['Number_of_singlets']
|
|
47 counts['Small_cluster'] = int(clust_info['Number_of_reads_in_clusters']) - top_clusters
|
|
48
|
|
49 with open(args.output.name, 'w') as fout:
|
|
50 for cls_line, nreads in counts.items():
|
|
51 fout.write(str(nreads) +"\t" + cls_line + "\n")
|
|
52
|
|
53
|
|
54
|