annotate cfsan_snp_pipeline_quast_select.py @ 0:fedc60909fbe draft default tip

Uploaded
author greg
date Tue, 17 Oct 2023 14:13:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
fedc60909fbe Uploaded
greg
parents:
diff changeset
1 #!/usr/bin/env python
fedc60909fbe Uploaded
greg
parents:
diff changeset
2
fedc60909fbe Uploaded
greg
parents:
diff changeset
3 import csv
fedc60909fbe Uploaded
greg
parents:
diff changeset
4 import sys
fedc60909fbe Uploaded
greg
parents:
diff changeset
5
fedc60909fbe Uploaded
greg
parents:
diff changeset
6
fedc60909fbe Uploaded
greg
parents:
diff changeset
7 def pick(rows, key, reverse=False):
fedc60909fbe Uploaded
greg
parents:
diff changeset
8 sorted_rows = sorted(rows, key=lambda r: r[key], reverse=reverse)
fedc60909fbe Uploaded
greg
parents:
diff changeset
9 return sorted_rows[0]['Assembly']
fedc60909fbe Uploaded
greg
parents:
diff changeset
10
fedc60909fbe Uploaded
greg
parents:
diff changeset
11
fedc60909fbe Uploaded
greg
parents:
diff changeset
12 def int_or_str(token):
fedc60909fbe Uploaded
greg
parents:
diff changeset
13 try:
fedc60909fbe Uploaded
greg
parents:
diff changeset
14 return int(token)
fedc60909fbe Uploaded
greg
parents:
diff changeset
15 except ValueError:
fedc60909fbe Uploaded
greg
parents:
diff changeset
16 return str(token)
fedc60909fbe Uploaded
greg
parents:
diff changeset
17
fedc60909fbe Uploaded
greg
parents:
diff changeset
18
fedc60909fbe Uploaded
greg
parents:
diff changeset
19 if __name__ == '__main__':
fedc60909fbe Uploaded
greg
parents:
diff changeset
20 path, criterion = sys.argv[1:]
fedc60909fbe Uploaded
greg
parents:
diff changeset
21 # QUAST tables have sample info as columns, so we need to transpose the table.
fedc60909fbe Uploaded
greg
parents:
diff changeset
22 rows = list(zip(*csv.reader(open(path, "r"), delimiter='\t', dialect='excel')))
fedc60909fbe Uploaded
greg
parents:
diff changeset
23 hed = rows.pop(0)
fedc60909fbe Uploaded
greg
parents:
diff changeset
24 dict_rows = [{h: int_or_str(r[i]) for i, h in enumerate(hed)} for r in rows]
fedc60909fbe Uploaded
greg
parents:
diff changeset
25 if "fewest" in criterion:
fedc60909fbe Uploaded
greg
parents:
diff changeset
26 # If it's a count, we want the fewest.
fedc60909fbe Uploaded
greg
parents:
diff changeset
27 reverse = False
fedc60909fbe Uploaded
greg
parents:
diff changeset
28 else:
fedc60909fbe Uploaded
greg
parents:
diff changeset
29 # Otherwise it's a length and we want the longest.
fedc60909fbe Uploaded
greg
parents:
diff changeset
30 reverse = True
fedc60909fbe Uploaded
greg
parents:
diff changeset
31 print(pick(dict_rows, criterion, reverse))