Mercurial > repos > bgruening > infernal
annotate infernal.py @ 8:c9e29ac5d099 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
author | bgruening |
---|---|
date | Thu, 23 Sep 2021 19:38:58 +0000 |
parents | 2c2c5e5e495b |
children |
rev | line source |
---|---|
0 | 1 # -*- coding: utf-8 -*- |
2 | |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
3 import logging |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
4 import os |
0 | 5 import subprocess |
6 | |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
7 from galaxy.datatypes.data import get_file_peek, Text |
0 | 8 from galaxy.datatypes.metadata import MetadataElement |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
9 |
0 | 10 |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
11 def count_special_lines(word, filename, invert=False): |
0 | 12 """ |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
13 searching for special 'words' using the grep tool |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
14 grep is used to speed up the searching and counting |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
15 The number of hits is returned. |
0 | 16 """ |
17 try: | |
18 cmd = ["grep", "-c"] | |
19 if invert: | |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
20 cmd.append("-v") |
0 | 21 cmd.extend([word, filename]) |
22 out = subprocess.Popen(cmd, stdout=subprocess.PIPE) | |
23 return int(out.communicate()[0].split()[0]) | |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
24 except Exception: |
0 | 25 pass |
26 return 0 | |
27 | |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
28 |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
29 def count_lines(filename, non_empty=False): |
0 | 30 """ |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
31 counting the number of lines from the 'filename' file |
0 | 32 """ |
33 try: | |
34 if non_empty: | |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
35 out = subprocess.Popen( |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
36 ["grep", "-cve", "^\s*$", filename], stdout=subprocess.PIPE # noqa W605 |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
37 ) |
0 | 38 else: |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
39 out = subprocess.Popen(["wc", "-l", filename], stdout=subprocess.PIPE) |
0 | 40 return int(out.communicate()[0].split()[0]) |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
41 except Exception: |
0 | 42 pass |
43 return 0 | |
44 | |
45 | |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
46 class Infernal_CM_1_1(Text): |
0 | 47 file_ext = "cm" |
48 | |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
49 MetadataElement( |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
50 name="number_of_models", |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
51 default=0, |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
52 desc="Number of covariance models", |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
53 readonly=True, |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
54 visible=True, |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
55 optional=True, |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
56 no_value=0, |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
57 ) |
0 | 58 |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
59 def set_peek(self, dataset, is_multi_byte=False): |
0 | 60 if not dataset.dataset.purged: |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
61 dataset.peek = get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte) |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
62 if dataset.metadata.number_of_models == 1: |
0 | 63 dataset.blurb = "1 model" |
64 else: | |
65 dataset.blurb = "%s models" % dataset.metadata.number_of_models | |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
66 dataset.peek = get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte) |
0 | 67 else: |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
68 dataset.peek = "file does not exist" |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
69 dataset.blurb = "file purged from disc" |
0 | 70 |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
71 def sniff(self, filename): |
0 | 72 if count_special_lines("^INFERNAL1/a", filename) > 0: |
73 return True | |
74 else: | |
75 return False | |
76 | |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
77 def set_meta(self, dataset, **kwd): |
0 | 78 """ |
79 Set the number of models in dataset. | |
80 """ | |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
81 dataset.metadata.number_of_models = count_special_lines( |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
82 "^INFERNAL1/a", dataset.file_name |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
83 ) |
0 | 84 |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
85 def split(cls, input_datasets, subdir_generator_function, split_params): |
0 | 86 """ |
87 Split the input files by model records. | |
88 """ | |
89 if split_params is None: | |
90 return None | |
91 | |
92 if len(input_datasets) > 1: | |
93 raise Exception("CM-file splitting does not support multiple files") | |
94 input_files = [ds.file_name for ds in input_datasets] | |
95 | |
96 chunk_size = None | |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
97 if split_params["split_mode"] == "number_of_parts": |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
98 raise Exception( |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
99 'Split mode "%s" is currently not implemented for CM-files.' |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
100 % split_params["split_mode"] |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
101 ) |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
102 elif split_params["split_mode"] == "to_size": |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
103 chunk_size = int(split_params["split_size"]) |
0 | 104 else: |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
105 raise Exception("Unsupported split mode %s" % split_params["split_mode"]) |
0 | 106 |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
107 def _read_cm_records(filename): |
0 | 108 lines = [] |
109 with open(filename) as handle: | |
110 for line in handle: | |
111 if line.startswith("INFERNAL1/a") and lines: | |
112 yield lines | |
113 lines = [line] | |
114 else: | |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
115 lines.append(line) |
0 | 116 yield lines |
117 | |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
118 def _write_part_cm_file(accumulated_lines): |
0 | 119 part_dir = subdir_generator_function() |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
120 part_path = os.path.join(part_dir, os.path.basename(input_files[0])) |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
121 part_file = open(part_path, "w") |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
122 part_file.writelines(accumulated_lines) |
0 | 123 part_file.close() |
124 | |
125 try: | |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
126 cm_records = _read_cm_records(input_files[0]) |
0 | 127 cm_lines_accumulated = [] |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
128 for counter, cm_record in enumerate(cm_records, start=1): |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
129 cm_lines_accumulated.extend(cm_record) |
0 | 130 if counter % chunk_size == 0: |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
131 _write_part_cm_file(cm_lines_accumulated) |
0 | 132 cm_lines_accumulated = [] |
133 if cm_lines_accumulated: | |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
134 _write_part_cm_file(cm_lines_accumulated) |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
135 except Exception as e: |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
136 logging.error("Unable to split files: %s" % str(e)) |
0 | 137 raise |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
138 |
0 | 139 split = classmethod(split) |
140 | |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
141 |
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
142 if __name__ == "__main__": |
0 | 143 Infernal_CM_1_1() |
8
c9e29ac5d099
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
bgruening
parents:
3
diff
changeset
|
144 # Stockholm_1_0() # ??? |