Mercurial > repos > bebatut > format_cd_hit_output
annotate format_cd_hit_output.py @ 1:64da677bcee2 draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
author | bgruening |
---|---|
date | Wed, 19 Oct 2022 14:42:33 +0000 |
parents | 4015e9d6d277 |
children |
rev | line source |
---|---|
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
1 #!/usr/bin/env python |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
3 |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
4 import argparse |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
5 |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
6 |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
7 def extract_mapping_info(input_mapping_filepath): |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
8 mapping_info = {} |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
9 categories = set([]) |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
10 |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
11 with open(input_mapping_filepath, 'r') as mapping_file: |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
12 for line in mapping_file.readlines(): |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
13 split_line = line[:-1].split('\t') |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
14 mapping_info.setdefault(split_line[0], split_line[1]) |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
15 categories.add(split_line[1]) |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
16 |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
17 return mapping_info, categories |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
18 |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
19 |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
20 def init_category_distribution(categories=None): |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
21 cluster_categ_distri = {} |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
22 if categories is not None: |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
23 for category in categories: |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
24 cluster_categ_distri[category] = 0 |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
25 return cluster_categ_distri |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
26 |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
27 |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
28 def flush_cluster_info(cluster_name, cluster_ref_seq, ref_seq_cluster, |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
29 cluster_categ_distri, categories, |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
30 output_category_distribution_file, cluster_seq_number): |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
31 if cluster_name != '': |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
32 if categories is not None: |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
33 string = cluster_name |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
34 string += '\t' + str(cluster_seq_number) |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
35 for category in categories: |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
36 string += '\t' |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
37 string += str(cluster_categ_distri[category]) |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
38 string += '\n' |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
39 output_category_distribution_file.write(string) |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
40 |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
41 if cluster_ref_seq == '': |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
42 string = "No reference sequence found for " |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
43 string += cluster_name |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
44 raise ValueError(string) |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
45 |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
46 ref_seq_cluster.setdefault(cluster_ref_seq, cluster_name) |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
47 |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
48 |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
49 def extract_cluster_info(args, mapping_info=None, categories=None): |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
50 ref_seq_cluster = {} |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
51 |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
52 if args.output_category_distribution is not None: |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
53 if mapping_info is None or categories is None: |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
54 string = "A file with category distribution is expected but " |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
55 string += "no mapping information are available" |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
56 raise ValueError(string) |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
57 output_cat_distri_file = open(args.output_category_distribution, 'w') |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
58 output_cat_distri_file.write('Cluster\tSequence_number') |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
59 for category in categories: |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
60 output_cat_distri_file.write('\t' + category) |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
61 |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
62 output_cat_distri_file.write('\n') |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
63 else: |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
64 output_cat_distri_file = None |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
65 |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
66 with open(args.input_cluster_info, 'r') as cluster_info_file: |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
67 cluster_name = '' |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
68 cluster_categ_distri = init_category_distribution(categories) |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
69 cluster_ref_seq = '' |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
70 cluster_seq_number = 0 |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
71 for line in cluster_info_file.readlines(): |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
72 if line[0] == '>': |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
73 flush_cluster_info( |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
74 cluster_name, |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
75 cluster_ref_seq, |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
76 ref_seq_cluster, |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
77 cluster_categ_distri, |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
78 categories, |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
79 output_cat_distri_file, |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
80 cluster_seq_number) |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
81 cluster_name = line[1:-1] |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
82 cluster_name = cluster_name.replace(' ', '_') |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
83 cluster_categ_distri = init_category_distribution(categories) |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
84 cluster_ref_seq = '' |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
85 cluster_seq_number = 0 |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
86 else: |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
87 seq_info = line[:-1].split('\t')[1].split(' ') |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
88 seq_name = seq_info[1][1:-3] |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
89 cluster_seq_number += 1 |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
90 |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
91 if categories is not None: |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
92 seq_count = 1 |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
93 if args.number_sum is not None: |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
94 if seq_name.find('size') != -1: |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
95 substring = seq_name[seq_name.find('size'):-1] |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
96 seq_count = int(substring.split('=')[1]) |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
97 if seq_name not in mapping_info: |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
98 string = seq_name + " not found in mapping" |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
99 raise ValueError(string) |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
100 category = mapping_info[seq_name] |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
101 cluster_categ_distri[category] += seq_count |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
102 |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
103 if seq_info[-1] == '*': |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
104 if cluster_ref_seq != '': |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
105 string = "A reference sequence (" + cluster_ref_seq |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
106 string += ") already found for cluster " + cluster_name |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
107 string += " (" + seq_name + ")" |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
108 raise ValueError(string) |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
109 cluster_ref_seq = seq_name |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
110 |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
111 flush_cluster_info( |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
112 cluster_name, |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
113 cluster_ref_seq, |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
114 ref_seq_cluster, |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
115 cluster_categ_distri, |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
116 categories, |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
117 output_cat_distri_file, |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
118 cluster_seq_number) |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
119 |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
120 if args.output_category_distribution is not None: |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
121 output_cat_distri_file.close() |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
122 |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
123 return ref_seq_cluster |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
124 |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
125 |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
126 def rename_representative_sequences(args, ref_seq_cluster): |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
127 with open(args.input_representative_sequences, 'r') as input_sequences: |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
128 with open(args.output_representative_sequences, 'w') as output_seq: |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
129 for line in input_sequences.readlines(): |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
130 if line[0] == '>': |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
131 seq_name = line[1:-1] |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
132 if seq_name not in ref_seq_cluster: |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
133 string = seq_name + " not found as reference sequence" |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
134 raise ValueError(string) |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
135 string = '>' + ref_seq_cluster[seq_name] + '\n' |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
136 output_seq.write(string) |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
137 else: |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
138 output_seq.write(line) |
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
139 |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
140 |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
141 def format_cd_hit_outputs(args): |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
142 if args.input_mapping is not None: |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
143 mapping_info, categories = extract_mapping_info(args.input_mapping) |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
144 else: |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
145 mapping_info = None |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
146 categories = None |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
147 |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
148 ref_seq_cluster = extract_cluster_info(args, mapping_info, categories) |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
149 |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
150 if args.input_representative_sequences is not None: |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
151 rename_representative_sequences(args, ref_seq_cluster) |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
152 |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
153 |
0
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
154 if __name__ == "__main__": |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
155 parser = argparse.ArgumentParser() |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
156 parser.add_argument('--input_cluster_info', required=True) |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
157 parser.add_argument('--input_representative_sequences') |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
158 parser.add_argument('--output_representative_sequences') |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
159 parser.add_argument('--input_mapping') |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
160 parser.add_argument('--output_category_distribution') |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
161 parser.add_argument('--number_sum') |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
162 args = parser.parse_args() |
4015e9d6d277
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
bebatut
parents:
diff
changeset
|
163 |
1
64da677bcee2
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
bgruening
parents:
0
diff
changeset
|
164 format_cd_hit_outputs(args) |