Mercurial > repos > iuc > msa_datatypes
comparison msa.py @ 0:70227007b991 draft default tip
Imported from capsule None
| author | iuc |
|---|---|
| date | Tue, 22 Apr 2014 13:55:42 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:70227007b991 |
|---|---|
| 1 # -*- coding: utf-8 -*- | |
| 2 | |
| 3 from galaxy.datatypes.data import Text | |
| 4 from galaxy.datatypes.sniff import get_headers, get_test_fname | |
| 5 from galaxy.datatypes.data import get_file_peek | |
| 6 import subprocess | |
| 7 import os | |
| 8 | |
| 9 from galaxy.datatypes.metadata import MetadataElement | |
| 10 from galaxy.datatypes import metadata | |
| 11 | |
| 12 def count_special_lines( word, filename, invert = False ): | |
| 13 """ | |
| 14 searching for special 'words' using the grep tool | |
| 15 grep is used to speed up the searching and counting | |
| 16 The number of hits is returned. | |
| 17 """ | |
| 18 try: | |
| 19 cmd = ["grep", "-c"] | |
| 20 if invert: | |
| 21 cmd.append('-v') | |
| 22 cmd.extend([word, filename]) | |
| 23 out = subprocess.Popen(cmd, stdout=subprocess.PIPE) | |
| 24 return int(out.communicate()[0].split()[0]) | |
| 25 except: | |
| 26 pass | |
| 27 return 0 | |
| 28 | |
| 29 class Stockholm_1_0( Text ): | |
| 30 file_ext = "stockholm" | |
| 31 | |
| 32 MetadataElement( name="number_of_alignments", default=0, desc="Number of multiple alignments", readonly=True, visible=True, optional=True, no_value=0 ) | |
| 33 | |
| 34 def set_peek( self, dataset, is_multi_byte=False ): | |
| 35 if not dataset.dataset.purged: | |
| 36 dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) | |
| 37 if (dataset.metadata.number_of_models == 1): | |
| 38 dataset.blurb = "1 alignment" | |
| 39 else: | |
| 40 dataset.blurb = "%s alignments" % dataset.metadata.number_of_models | |
| 41 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) | |
| 42 else: | |
| 43 dataset.peek = 'file does not exist' | |
| 44 dataset.blurb = 'file purged from disc' | |
| 45 | |
| 46 def sniff( self, filename ): | |
| 47 if count_special_lines('^#[[:space:]+]STOCKHOLM[[:space:]+]1.0', filename) > 0: | |
| 48 return True | |
| 49 else: | |
| 50 return False | |
| 51 | |
| 52 def set_meta( self, dataset, **kwd ): | |
| 53 """ | |
| 54 | |
| 55 Set the number of models in dataset. | |
| 56 """ | |
| 57 dataset.metadata.number_of_models = count_special_lines('^#[[:space:]+]STOCKHOLM[[:space:]+]1.0', dataset.file_name) | |
| 58 | |
| 59 def split( cls, input_datasets, subdir_generator_function, split_params): | |
| 60 """ | |
| 61 | |
| 62 Split the input files by model records. | |
| 63 """ | |
| 64 if split_params is None: | |
| 65 return None | |
| 66 | |
| 67 if len(input_datasets) > 1: | |
| 68 raise Exception("STOCKHOLM-file splitting does not support multiple files") | |
| 69 input_files = [ds.file_name for ds in input_datasets] | |
| 70 | |
| 71 chunk_size = None | |
| 72 if split_params['split_mode'] == 'number_of_parts': | |
| 73 raise Exception('Split mode "%s" is currently not implemented for STOCKHOLM-files.' % split_params['split_mode']) | |
| 74 elif split_params['split_mode'] == 'to_size': | |
| 75 chunk_size = int(split_params['split_size']) | |
| 76 else: | |
| 77 raise Exception('Unsupported split mode %s' % split_params['split_mode']) | |
| 78 | |
| 79 def _read_stockholm_records( filename ): | |
| 80 lines = [] | |
| 81 with open(filename) as handle: | |
| 82 for line in handle: | |
| 83 lines.append( line ) | |
| 84 if line.strip() == '//': | |
| 85 yield lines | |
| 86 lines = [] | |
| 87 | |
| 88 def _write_part_stockholm_file( accumulated_lines ): | |
| 89 part_dir = subdir_generator_function() | |
| 90 part_path = os.path.join( part_dir, os.path.basename( input_files[0] ) ) | |
| 91 part_file = open( part_path, 'w' ) | |
| 92 part_file.writelines( accumulated_lines ) | |
| 93 part_file.close() | |
| 94 | |
| 95 try: | |
| 96 | |
| 97 stockholm_records = _read_stockholm_records( input_files[0] ) | |
| 98 stockholm_lines_accumulated = [] | |
| 99 for counter, stockholm_record in enumerate( stockholm_records, start = 1): | |
| 100 stockholm_lines_accumulated.extend( stockholm_record ) | |
| 101 if counter % chunk_size == 0: | |
| 102 _write_part_stockholm_file( stockholm_lines_accumulated ) | |
| 103 stockholm_lines_accumulated = [] | |
| 104 if stockholm_lines_accumulated: | |
| 105 _write_part_stockholm_file( stockholm_lines_accumulated ) | |
| 106 except Exception, e: | |
| 107 log.error('Unable to split files: %s' % str(e)) | |
| 108 raise | |
| 109 split = classmethod(split) | |
| 110 |
