Mercurial > repos > bgruening > infernal
comparison infernal.py @ 8:c9e29ac5d099 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
author | bgruening |
---|---|
date | Thu, 23 Sep 2021 19:38:58 +0000 |
parents | 2c2c5e5e495b |
children |
comparison
equal
deleted
inserted
replaced
7:477d829d3250 | 8:c9e29ac5d099 |
---|---|
1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
2 | 2 |
3 from galaxy.datatypes.data import Text | 3 import logging |
4 from galaxy.datatypes.sniff import get_headers, get_test_fname | 4 import os |
5 from galaxy.datatypes.data import get_file_peek | |
6 import subprocess | 5 import subprocess |
7 import os | |
8 | 6 |
7 from galaxy.datatypes.data import get_file_peek, Text | |
9 from galaxy.datatypes.metadata import MetadataElement | 8 from galaxy.datatypes.metadata import MetadataElement |
10 from galaxy.datatypes import metadata | |
11 | 9 |
12 def count_special_lines( word, filename, invert = False ): | 10 |
11 def count_special_lines(word, filename, invert=False): | |
13 """ | 12 """ |
14 searching for special 'words' using the grep tool | 13 searching for special 'words' using the grep tool |
15 grep is used to speed up the searching and counting | 14 grep is used to speed up the searching and counting |
16 The number of hits is returned. | 15 The number of hits is returned. |
17 """ | 16 """ |
18 try: | 17 try: |
19 cmd = ["grep", "-c"] | 18 cmd = ["grep", "-c"] |
20 if invert: | 19 if invert: |
21 cmd.append('-v') | 20 cmd.append("-v") |
22 cmd.extend([word, filename]) | 21 cmd.extend([word, filename]) |
23 out = subprocess.Popen(cmd, stdout=subprocess.PIPE) | 22 out = subprocess.Popen(cmd, stdout=subprocess.PIPE) |
24 return int(out.communicate()[0].split()[0]) | 23 return int(out.communicate()[0].split()[0]) |
25 except: | 24 except Exception: |
26 pass | |
27 return 0 | |
28 | |
29 def count_lines( filename, non_empty = False): | |
30 """ | |
31 counting the number of lines from the 'filename' file | |
32 """ | |
33 try: | |
34 if non_empty: | |
35 out = subprocess.Popen(['grep', '-cve', '^\s*$', filename], stdout=subprocess.PIPE) | |
36 else: | |
37 out = subprocess.Popen(['wc', '-l', filename], stdout=subprocess.PIPE) | |
38 return int(out.communicate()[0].split()[0]) | |
39 except: | |
40 pass | 25 pass |
41 return 0 | 26 return 0 |
42 | 27 |
43 | 28 |
44 class Infernal_CM_1_1( Text ): | 29 def count_lines(filename, non_empty=False): |
30 """ | |
31 counting the number of lines from the 'filename' file | |
32 """ | |
33 try: | |
34 if non_empty: | |
35 out = subprocess.Popen( | |
36 ["grep", "-cve", "^\s*$", filename], stdout=subprocess.PIPE # noqa W605 | |
37 ) | |
38 else: | |
39 out = subprocess.Popen(["wc", "-l", filename], stdout=subprocess.PIPE) | |
40 return int(out.communicate()[0].split()[0]) | |
41 except Exception: | |
42 pass | |
43 return 0 | |
44 | |
45 | |
46 class Infernal_CM_1_1(Text): | |
45 file_ext = "cm" | 47 file_ext = "cm" |
46 | 48 |
47 MetadataElement( name="number_of_models", default=0, desc="Number of covariance models", readonly=True, visible=True, optional=True, no_value=0 ) | 49 MetadataElement( |
50 name="number_of_models", | |
51 default=0, | |
52 desc="Number of covariance models", | |
53 readonly=True, | |
54 visible=True, | |
55 optional=True, | |
56 no_value=0, | |
57 ) | |
48 | 58 |
49 def set_peek( self, dataset, is_multi_byte=False ): | 59 def set_peek(self, dataset, is_multi_byte=False): |
50 if not dataset.dataset.purged: | 60 if not dataset.dataset.purged: |
51 dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) | 61 dataset.peek = get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte) |
52 if (dataset.metadata.number_of_models == 1): | 62 if dataset.metadata.number_of_models == 1: |
53 dataset.blurb = "1 model" | 63 dataset.blurb = "1 model" |
54 else: | 64 else: |
55 dataset.blurb = "%s models" % dataset.metadata.number_of_models | 65 dataset.blurb = "%s models" % dataset.metadata.number_of_models |
56 dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) | 66 dataset.peek = get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte) |
57 else: | 67 else: |
58 dataset.peek = 'file does not exist' | 68 dataset.peek = "file does not exist" |
59 dataset.blurb = 'file purged from disc' | 69 dataset.blurb = "file purged from disc" |
60 | 70 |
61 def sniff( self, filename ): | 71 def sniff(self, filename): |
62 if count_special_lines("^INFERNAL1/a", filename) > 0: | 72 if count_special_lines("^INFERNAL1/a", filename) > 0: |
63 return True | 73 return True |
64 else: | 74 else: |
65 return False | 75 return False |
66 | 76 |
67 def set_meta( self, dataset, **kwd ): | 77 def set_meta(self, dataset, **kwd): |
68 """ | 78 """ |
69 Set the number of models in dataset. | 79 Set the number of models in dataset. |
70 """ | 80 """ |
71 dataset.metadata.number_of_models = count_special_lines("^INFERNAL1/a", dataset.file_name) | 81 dataset.metadata.number_of_models = count_special_lines( |
82 "^INFERNAL1/a", dataset.file_name | |
83 ) | |
72 | 84 |
73 def split( cls, input_datasets, subdir_generator_function, split_params): | 85 def split(cls, input_datasets, subdir_generator_function, split_params): |
74 """ | 86 """ |
75 Split the input files by model records. | 87 Split the input files by model records. |
76 """ | 88 """ |
77 if split_params is None: | 89 if split_params is None: |
78 return None | 90 return None |
80 if len(input_datasets) > 1: | 92 if len(input_datasets) > 1: |
81 raise Exception("CM-file splitting does not support multiple files") | 93 raise Exception("CM-file splitting does not support multiple files") |
82 input_files = [ds.file_name for ds in input_datasets] | 94 input_files = [ds.file_name for ds in input_datasets] |
83 | 95 |
84 chunk_size = None | 96 chunk_size = None |
85 if split_params['split_mode'] == 'number_of_parts': | 97 if split_params["split_mode"] == "number_of_parts": |
86 raise Exception('Split mode "%s" is currently not implemented for CM-files.' % split_params['split_mode']) | 98 raise Exception( |
87 elif split_params['split_mode'] == 'to_size': | 99 'Split mode "%s" is currently not implemented for CM-files.' |
88 chunk_size = int(split_params['split_size']) | 100 % split_params["split_mode"] |
101 ) | |
102 elif split_params["split_mode"] == "to_size": | |
103 chunk_size = int(split_params["split_size"]) | |
89 else: | 104 else: |
90 raise Exception('Unsupported split mode %s' % split_params['split_mode']) | 105 raise Exception("Unsupported split mode %s" % split_params["split_mode"]) |
91 | 106 |
92 def _read_cm_records( filename ): | 107 def _read_cm_records(filename): |
93 lines = [] | 108 lines = [] |
94 with open(filename) as handle: | 109 with open(filename) as handle: |
95 for line in handle: | 110 for line in handle: |
96 if line.startswith("INFERNAL1/a") and lines: | 111 if line.startswith("INFERNAL1/a") and lines: |
97 yield lines | 112 yield lines |
98 lines = [line] | 113 lines = [line] |
99 else: | 114 else: |
100 lines.append( line ) | 115 lines.append(line) |
101 yield lines | 116 yield lines |
102 | 117 |
103 def _write_part_cm_file( accumulated_lines ): | 118 def _write_part_cm_file(accumulated_lines): |
104 part_dir = subdir_generator_function() | 119 part_dir = subdir_generator_function() |
105 part_path = os.path.join( part_dir, os.path.basename( input_files[0] ) ) | 120 part_path = os.path.join(part_dir, os.path.basename(input_files[0])) |
106 part_file = open( part_path, 'w' ) | 121 part_file = open(part_path, "w") |
107 part_file.writelines( accumulated_lines ) | 122 part_file.writelines(accumulated_lines) |
108 part_file.close() | 123 part_file.close() |
109 | 124 |
110 try: | 125 try: |
111 cm_records = _read_cm_records( input_files[0] ) | 126 cm_records = _read_cm_records(input_files[0]) |
112 cm_lines_accumulated = [] | 127 cm_lines_accumulated = [] |
113 for counter, cm_record in enumerate( cm_records, start = 1): | 128 for counter, cm_record in enumerate(cm_records, start=1): |
114 cm_lines_accumulated.extend( cm_record ) | 129 cm_lines_accumulated.extend(cm_record) |
115 if counter % chunk_size == 0: | 130 if counter % chunk_size == 0: |
116 _write_part_cm_file( cm_lines_accumulated ) | 131 _write_part_cm_file(cm_lines_accumulated) |
117 cm_lines_accumulated = [] | 132 cm_lines_accumulated = [] |
118 if cm_lines_accumulated: | 133 if cm_lines_accumulated: |
119 _write_part_cm_file( cm_lines_accumulated ) | 134 _write_part_cm_file(cm_lines_accumulated) |
120 except Exception, e: | 135 except Exception as e: |
121 log.error('Unable to split files: %s' % str(e)) | 136 logging.error("Unable to split files: %s" % str(e)) |
122 raise | 137 raise |
138 | |
123 split = classmethod(split) | 139 split = classmethod(split) |
124 | 140 |
125 if __name__ == '__main__': | 141 |
142 if __name__ == "__main__": | |
126 Infernal_CM_1_1() | 143 Infernal_CM_1_1() |
127 Stockholm_1_0() | 144 # Stockholm_1_0() # ??? |
128 |