| 
17
 | 
     1 #!/usr/bin/env python
 | 
| 
 | 
     2 import sys
 | 
| 
 | 
     3 import re
 | 
| 
 | 
     4 from collections import defaultdict
 | 
| 
 | 
     5 import argparse
 | 
| 
19
 | 
     6 import csv
 | 
| 
17
 | 
     7 
 | 
| 
 | 
     8 parser = argparse.ArgumentParser()
 | 
| 
 | 
     9 parser.add_argument("-i" ,"--input", type=argparse.FileType('r'), help="path to file CLUSTER_table.csv")
 | 
| 
 | 
    10 parser.add_argument("-o" ,"--output", type=argparse.FileType('w'), help="output file name")
 | 
| 
 | 
    11 parser.add_argument("-m", "--use_manual", action='store_true', default=False)
 | 
| 
 | 
    12 
 | 
| 
 | 
    13 args = parser.parse_args()
 | 
| 
 | 
    14 
 | 
| 
 | 
    15 column = 6 if args.use_manual else 4
 | 
| 
19
 | 
    16 if args.use_manual:
 | 
| 
 | 
    17     annotation="Final_annotation"
 | 
| 
 | 
    18 else:
 | 
| 
 | 
    19     annotation="Automatic_annotation"
 | 
| 
17
 | 
    20 
 | 
| 
 | 
    21 header = False
 | 
| 
 | 
    22 clust_info = {}
 | 
| 
 | 
    23 counts = defaultdict(lambda: 0)
 | 
| 
 | 
    24 top_clusters = 0
 | 
| 
 | 
    25 with open(args.input.name, 'r') as f:
 | 
| 
19
 | 
    26     csv_reader = csv.reader(f, delimiter = "\t")
 | 
| 
 | 
    27     for parts in csv_reader:
 | 
| 
 | 
    28         if len(parts) == 0:
 | 
| 
 | 
    29             continue
 | 
| 
 | 
    30         if parts[0] == "Cluster" and parts[1]== "Supercluster":
 | 
| 
17
 | 
    31             header = True
 | 
| 
19
 | 
    32             header_columns = parts
 | 
| 
 | 
    33             column = header_columns.index(annotation)
 | 
| 
17
 | 
    34             continue
 | 
| 
 | 
    35         if header:
 | 
| 
 | 
    36             classification = "Top_clusters\t" + "\t".join(parts[column].split("/")[1:]).replace('"','')
 | 
| 
 | 
    37             counts[classification] += int(parts[3])
 | 
| 
 | 
    38             top_clusters += int(parts[3])
 | 
| 
19
 | 
    39         elif len(parts) >= 2:
 | 
| 
 | 
    40             try:
 | 
| 
 | 
    41                 clust_info[parts[0].replace('"', '')] = int(parts[1])
 | 
| 
 | 
    42             except ValueError:
 | 
| 
 | 
    43                 pass
 | 
| 
17
 | 
    44 
 | 
| 
 | 
    45 
 | 
| 
 | 
    46 counts['Singlets'] = clust_info['Number_of_singlets']
 | 
| 
 | 
    47 counts['Small_cluster'] = int(clust_info['Number_of_reads_in_clusters']) - top_clusters
 | 
| 
 | 
    48 
 | 
| 
 | 
    49 with open(args.output.name, 'w') as fout:
 | 
| 
 | 
    50     for cls_line, nreads in counts.items():
 | 
| 
 | 
    51         fout.write(str(nreads) +"\t" + cls_line + "\n")
 | 
| 
 | 
    52 
 | 
| 
 | 
    53 
 | 
| 
 | 
    54 
 |