| 17 | 1 #!/usr/bin/env python | 
|  | 2 import sys | 
|  | 3 import re | 
|  | 4 from collections import defaultdict | 
|  | 5 import argparse | 
| 19 | 6 import csv | 
| 17 | 7 | 
|  | 8 parser = argparse.ArgumentParser() | 
|  | 9 parser.add_argument("-i" ,"--input", type=argparse.FileType('r'), help="path to file CLUSTER_table.csv") | 
|  | 10 parser.add_argument("-o" ,"--output", type=argparse.FileType('w'), help="output file name") | 
|  | 11 parser.add_argument("-m", "--use_manual", action='store_true', default=False) | 
|  | 12 | 
|  | 13 args = parser.parse_args() | 
|  | 14 | 
|  | 15 column = 6 if args.use_manual else 4 | 
| 19 | 16 if args.use_manual: | 
|  | 17     annotation="Final_annotation" | 
|  | 18 else: | 
|  | 19     annotation="Automatic_annotation" | 
| 17 | 20 | 
|  | 21 header = False | 
|  | 22 clust_info = {} | 
|  | 23 counts = defaultdict(lambda: 0) | 
|  | 24 top_clusters = 0 | 
|  | 25 with open(args.input.name, 'r') as f: | 
| 19 | 26     csv_reader = csv.reader(f, delimiter = "\t") | 
|  | 27     for parts in csv_reader: | 
|  | 28         if len(parts) == 0: | 
|  | 29             continue | 
|  | 30         if parts[0] == "Cluster" and parts[1]== "Supercluster": | 
| 17 | 31             header = True | 
| 19 | 32             header_columns = parts | 
|  | 33             column = header_columns.index(annotation) | 
| 17 | 34             continue | 
|  | 35         if header: | 
|  | 36             classification = "Top_clusters\t" + "\t".join(parts[column].split("/")[1:]).replace('"','') | 
|  | 37             counts[classification] += int(parts[3]) | 
|  | 38             top_clusters += int(parts[3]) | 
| 19 | 39         elif len(parts) >= 2: | 
|  | 40             try: | 
|  | 41                 clust_info[parts[0].replace('"', '')] = int(parts[1]) | 
|  | 42             except ValueError: | 
|  | 43                 pass | 
| 17 | 44 | 
|  | 45 | 
|  | 46 counts['Singlets'] = clust_info['Number_of_singlets'] | 
|  | 47 counts['Small_cluster'] = int(clust_info['Number_of_reads_in_clusters']) - top_clusters | 
|  | 48 | 
|  | 49 with open(args.output.name, 'w') as fout: | 
|  | 50     for cls_line, nreads in counts.items(): | 
|  | 51         fout.write(str(nreads) +"\t" + cls_line + "\n") | 
|  | 52 | 
|  | 53 | 
|  | 54 |