Mercurial > repos > bgruening > infernal
annotate infernal.py @ 4:c47a7c52ac4f draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit 236abfe2af8034017994daea08079887e64b68c8
| author | bgruening |
|---|---|
| date | Mon, 19 Dec 2016 15:27:06 -0500 |
| parents | 2c2c5e5e495b |
| children | c9e29ac5d099 |
| rev | line source |
|---|---|
| 0 | 1 # -*- coding: utf-8 -*- |
| 2 | |
| 3 from galaxy.datatypes.data import Text | |
| 4 from galaxy.datatypes.sniff import get_headers, get_test_fname | |
| 5 from galaxy.datatypes.data import get_file_peek | |
| 6 import subprocess | |
| 7 import os | |
| 8 | |
| 9 from galaxy.datatypes.metadata import MetadataElement | |
| 10 from galaxy.datatypes import metadata | |
| 11 | |
| 12 def count_special_lines( word, filename, invert = False ): | |
| 13 """ | |
| 14 searching for special 'words' using the grep tool | |
| 15 grep is used to speed up the searching and counting | |
| 16 The number of hits is returned. | |
| 17 """ | |
| 18 try: | |
| 19 cmd = ["grep", "-c"] | |
| 20 if invert: | |
| 21 cmd.append('-v') | |
| 22 cmd.extend([word, filename]) | |
| 23 out = subprocess.Popen(cmd, stdout=subprocess.PIPE) | |
| 24 return int(out.communicate()[0].split()[0]) | |
| 25 except: | |
| 26 pass | |
| 27 return 0 | |
| 28 | |
| 29 def count_lines( filename, non_empty = False): | |
| 30 """ | |
| 31 counting the number of lines from the 'filename' file | |
| 32 """ | |
| 33 try: | |
| 34 if non_empty: | |
| 35 out = subprocess.Popen(['grep', '-cve', '^\s*$', filename], stdout=subprocess.PIPE) | |
| 36 else: | |
| 37 out = subprocess.Popen(['wc', '-l', filename], stdout=subprocess.PIPE) | |
| 38 return int(out.communicate()[0].split()[0]) | |
| 39 except: | |
| 40 pass | |
| 41 return 0 | |
| 42 | |
| 43 | |
| 44 class Infernal_CM_1_1( Text ): | |
| 45 file_ext = "cm" | |
| 46 | |
| 47 MetadataElement( name="number_of_models", default=0, desc="Number of covariance models", readonly=True, visible=True, optional=True, no_value=0 ) | |
| 48 | |
| 49 def set_peek( self, dataset, is_multi_byte=False ): | |
| 50 if not dataset.dataset.purged: | |
| 51 dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) | |
| 52 if (dataset.metadata.number_of_models == 1): | |
| 53 dataset.blurb = "1 model" | |
| 54 else: | |
| 55 dataset.blurb = "%s models" % dataset.metadata.number_of_models | |
|
3
2c2c5e5e495b
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit 9eeedfaf35c069d75014c5fb2e42046106bf813c-dirty
bgruening
parents:
0
diff
changeset
|
56 dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) |
| 0 | 57 else: |
| 58 dataset.peek = 'file does not exist' | |
| 59 dataset.blurb = 'file purged from disc' | |
| 60 | |
| 61 def sniff( self, filename ): | |
| 62 if count_special_lines("^INFERNAL1/a", filename) > 0: | |
| 63 return True | |
| 64 else: | |
| 65 return False | |
| 66 | |
| 67 def set_meta( self, dataset, **kwd ): | |
| 68 """ | |
| 69 Set the number of models in dataset. | |
| 70 """ | |
| 71 dataset.metadata.number_of_models = count_special_lines("^INFERNAL1/a", dataset.file_name) | |
| 72 | |
| 73 def split( cls, input_datasets, subdir_generator_function, split_params): | |
| 74 """ | |
| 75 Split the input files by model records. | |
| 76 """ | |
| 77 if split_params is None: | |
| 78 return None | |
| 79 | |
| 80 if len(input_datasets) > 1: | |
| 81 raise Exception("CM-file splitting does not support multiple files") | |
| 82 input_files = [ds.file_name for ds in input_datasets] | |
| 83 | |
| 84 chunk_size = None | |
| 85 if split_params['split_mode'] == 'number_of_parts': | |
| 86 raise Exception('Split mode "%s" is currently not implemented for CM-files.' % split_params['split_mode']) | |
| 87 elif split_params['split_mode'] == 'to_size': | |
| 88 chunk_size = int(split_params['split_size']) | |
| 89 else: | |
| 90 raise Exception('Unsupported split mode %s' % split_params['split_mode']) | |
| 91 | |
| 92 def _read_cm_records( filename ): | |
| 93 lines = [] | |
| 94 with open(filename) as handle: | |
| 95 for line in handle: | |
| 96 if line.startswith("INFERNAL1/a") and lines: | |
| 97 yield lines | |
| 98 lines = [line] | |
| 99 else: | |
| 100 lines.append( line ) | |
| 101 yield lines | |
| 102 | |
| 103 def _write_part_cm_file( accumulated_lines ): | |
| 104 part_dir = subdir_generator_function() | |
| 105 part_path = os.path.join( part_dir, os.path.basename( input_files[0] ) ) | |
| 106 part_file = open( part_path, 'w' ) | |
| 107 part_file.writelines( accumulated_lines ) | |
| 108 part_file.close() | |
| 109 | |
| 110 try: | |
| 111 cm_records = _read_cm_records( input_files[0] ) | |
| 112 cm_lines_accumulated = [] | |
| 113 for counter, cm_record in enumerate( cm_records, start = 1): | |
| 114 cm_lines_accumulated.extend( cm_record ) | |
| 115 if counter % chunk_size == 0: | |
| 116 _write_part_cm_file( cm_lines_accumulated ) | |
| 117 cm_lines_accumulated = [] | |
| 118 if cm_lines_accumulated: | |
| 119 _write_part_cm_file( cm_lines_accumulated ) | |
| 120 except Exception, e: | |
| 121 log.error('Unable to split files: %s' % str(e)) | |
| 122 raise | |
| 123 split = classmethod(split) | |
| 124 | |
| 125 if __name__ == '__main__': | |
| 126 Infernal_CM_1_1() | |
| 127 Stockholm_1_0() | |
| 128 |
