Mercurial > repos > bgruening > infernal
annotate infernal.py @ 5:6e18e0b098cd draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit 80c721dcfe02a2b8baf8e2c64b76cbcd71b23d86
author | bgruening |
---|---|
date | Sat, 21 Jan 2017 17:36:57 -0500 |
parents | 2c2c5e5e495b |
children | c9e29ac5d099 |
rev | line source |
---|---|
0 | 1 # -*- coding: utf-8 -*- |
2 | |
3 from galaxy.datatypes.data import Text | |
4 from galaxy.datatypes.sniff import get_headers, get_test_fname | |
5 from galaxy.datatypes.data import get_file_peek | |
6 import subprocess | |
7 import os | |
8 | |
9 from galaxy.datatypes.metadata import MetadataElement | |
10 from galaxy.datatypes import metadata | |
11 | |
12 def count_special_lines( word, filename, invert = False ): | |
13 """ | |
14 searching for special 'words' using the grep tool | |
15 grep is used to speed up the searching and counting | |
16 The number of hits is returned. | |
17 """ | |
18 try: | |
19 cmd = ["grep", "-c"] | |
20 if invert: | |
21 cmd.append('-v') | |
22 cmd.extend([word, filename]) | |
23 out = subprocess.Popen(cmd, stdout=subprocess.PIPE) | |
24 return int(out.communicate()[0].split()[0]) | |
25 except: | |
26 pass | |
27 return 0 | |
28 | |
29 def count_lines( filename, non_empty = False): | |
30 """ | |
31 counting the number of lines from the 'filename' file | |
32 """ | |
33 try: | |
34 if non_empty: | |
35 out = subprocess.Popen(['grep', '-cve', '^\s*$', filename], stdout=subprocess.PIPE) | |
36 else: | |
37 out = subprocess.Popen(['wc', '-l', filename], stdout=subprocess.PIPE) | |
38 return int(out.communicate()[0].split()[0]) | |
39 except: | |
40 pass | |
41 return 0 | |
42 | |
43 | |
44 class Infernal_CM_1_1( Text ): | |
45 file_ext = "cm" | |
46 | |
47 MetadataElement( name="number_of_models", default=0, desc="Number of covariance models", readonly=True, visible=True, optional=True, no_value=0 ) | |
48 | |
49 def set_peek( self, dataset, is_multi_byte=False ): | |
50 if not dataset.dataset.purged: | |
51 dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) | |
52 if (dataset.metadata.number_of_models == 1): | |
53 dataset.blurb = "1 model" | |
54 else: | |
55 dataset.blurb = "%s models" % dataset.metadata.number_of_models | |
3
2c2c5e5e495b
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit 9eeedfaf35c069d75014c5fb2e42046106bf813c-dirty
bgruening
parents:
0
diff
changeset
|
56 dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) |
0 | 57 else: |
58 dataset.peek = 'file does not exist' | |
59 dataset.blurb = 'file purged from disc' | |
60 | |
61 def sniff( self, filename ): | |
62 if count_special_lines("^INFERNAL1/a", filename) > 0: | |
63 return True | |
64 else: | |
65 return False | |
66 | |
67 def set_meta( self, dataset, **kwd ): | |
68 """ | |
69 Set the number of models in dataset. | |
70 """ | |
71 dataset.metadata.number_of_models = count_special_lines("^INFERNAL1/a", dataset.file_name) | |
72 | |
73 def split( cls, input_datasets, subdir_generator_function, split_params): | |
74 """ | |
75 Split the input files by model records. | |
76 """ | |
77 if split_params is None: | |
78 return None | |
79 | |
80 if len(input_datasets) > 1: | |
81 raise Exception("CM-file splitting does not support multiple files") | |
82 input_files = [ds.file_name for ds in input_datasets] | |
83 | |
84 chunk_size = None | |
85 if split_params['split_mode'] == 'number_of_parts': | |
86 raise Exception('Split mode "%s" is currently not implemented for CM-files.' % split_params['split_mode']) | |
87 elif split_params['split_mode'] == 'to_size': | |
88 chunk_size = int(split_params['split_size']) | |
89 else: | |
90 raise Exception('Unsupported split mode %s' % split_params['split_mode']) | |
91 | |
92 def _read_cm_records( filename ): | |
93 lines = [] | |
94 with open(filename) as handle: | |
95 for line in handle: | |
96 if line.startswith("INFERNAL1/a") and lines: | |
97 yield lines | |
98 lines = [line] | |
99 else: | |
100 lines.append( line ) | |
101 yield lines | |
102 | |
103 def _write_part_cm_file( accumulated_lines ): | |
104 part_dir = subdir_generator_function() | |
105 part_path = os.path.join( part_dir, os.path.basename( input_files[0] ) ) | |
106 part_file = open( part_path, 'w' ) | |
107 part_file.writelines( accumulated_lines ) | |
108 part_file.close() | |
109 | |
110 try: | |
111 cm_records = _read_cm_records( input_files[0] ) | |
112 cm_lines_accumulated = [] | |
113 for counter, cm_record in enumerate( cm_records, start = 1): | |
114 cm_lines_accumulated.extend( cm_record ) | |
115 if counter % chunk_size == 0: | |
116 _write_part_cm_file( cm_lines_accumulated ) | |
117 cm_lines_accumulated = [] | |
118 if cm_lines_accumulated: | |
119 _write_part_cm_file( cm_lines_accumulated ) | |
120 except Exception, e: | |
121 log.error('Unable to split files: %s' % str(e)) | |
122 raise | |
123 split = classmethod(split) | |
124 | |
125 if __name__ == '__main__': | |
126 Infernal_CM_1_1() | |
127 Stockholm_1_0() | |
128 |