Mercurial > repos > damion > blast_reporting
comparison reference_bins.py @ 0:7db7ecc78ad6 draft
Uploaded
| author | damion |
|---|---|
| date | Mon, 02 Mar 2015 20:46:00 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:7db7ecc78ad6 |
|---|---|
| 1 import os.path | |
| 2 import common | |
| 3 | |
| 4 class ReferenceBins: | |
| 5 | |
| 6 def __init__(self, db_spec_path = None): | |
| 7 """ | |
| 8 @param db_spec_path string path to fasta databases specification file. This file has format: | |
| 9 #value id type active name path | |
| 10 16S_euzby 16S 1 Euzby /usr/local/galaxy/shared/ngs_data/ | |
| 11 ... | |
| 12 """ | |
| 13 self.reference_bins = [] | |
| 14 | |
| 15 if db_spec_path == None: # Default to the command-line lookup table in code's folder: | |
| 16 db_spec_path = os.path.join(os.path.dirname(__file__), 'fasta_reference_dbs.tab') | |
| 17 self.fieldSpec = common.FieldSpec(db_spec_path) | |
| 18 | |
| 19 def __main__(self): pass | |
| 20 | |
| 21 # Could double check to see if it exists? | |
| 22 def build_bins(self, bins, columns): | |
| 23 | |
| 24 if bins == None: | |
| 25 self.reference_bins = [] | |
| 26 return False | |
| 27 | |
| 28 for myfield in bins.strip().strip(';').split(';'): | |
| 29 field_spec = myfield.strip().split(':') | |
| 30 field_name = field_spec[0].strip() | |
| 31 | |
| 32 if field_name != '': | |
| 33 if not field_name.replace('_','').isalnum(): | |
| 34 common.stop_err("Invalid bin name: " + field_name + ':' + myfield) | |
| 35 | |
| 36 if len(field_spec) < 2: field_spec.append('column') # default grouping = column | |
| 37 if len(field_spec) < 3: field_spec.append('') # default filtering = none | |
| 38 if len(field_spec) < 4: field_spec.append('') # default no description | |
| 39 | |
| 40 grouping = field_spec[1].strip() | |
| 41 if not grouping in ['table', 'column', 'hidden']: | |
| 42 common.stop_err("Invalid bin layout: " + grouping) | |
| 43 | |
| 44 bin_filter = field_spec[2].strip() | |
| 45 if not bin_filter in ['', 'include', 'exclude']: | |
| 46 common.stop_err("Invalid bin sort: " + bin_filter) | |
| 47 | |
| 48 newbin = self.buildBin(field_name, bin_filter) | |
| 49 self.reference_bins.append(newbin) | |
| 50 | |
| 51 field = { # any time we have a bin we want sort descending | |
| 52 'field': field_name, | |
| 53 'group': grouping, | |
| 54 'sort': 'desc', | |
| 55 'label': newbin.name, | |
| 56 'type': 'bin' | |
| 57 } | |
| 58 columns.append(field) | |
| 59 if (field_spec[3] == 'true'): # description field requested | |
| 60 field = { | |
| 61 'field': field_name + '_desc', | |
| 62 'group': 'column', | |
| 63 'sort': '', # Allow other sorts???? | |
| 64 'label': newbin.name + ' Description', | |
| 65 'type': 'text' | |
| 66 } | |
| 67 columns.append(field) | |
| 68 | |
| 69 | |
| 70 def buildBin(self, bin_folder_name, bin_filter): | |
| 71 """ | |
| 72 Create a lookup table consisting of a dictionary entry for each accession id held in dictionary's file. | |
| 73 @param bin_folder_name string name of requested db, e.g 16S_ncbi | |
| 74 @param bin_filter string '' or 'include' or 'exclude' | |
| 75 | |
| 76 """ | |
| 77 bin = ReferenceBin(self.fieldSpec, bin_folder_name, bin_filter) | |
| 78 | |
| 79 try: | |
| 80 with open(bin.file_path) as file_in: | |
| 81 for line in file_in: # Should always contains succession id | |
| 82 #FUTURE: Preprocess so accession ID ready to go. | |
| 83 keyValue = line.rstrip().split("\t",1) | |
| 84 # keep only first term minus integer portion of id | |
| 85 accGeneralId = keyValue[0].split('.')[0] | |
| 86 if len(keyValue) >1: description = keyValue[1] | |
| 87 else: description = '' | |
| 88 bin.lookup[accGeneralId] = description | |
| 89 | |
| 90 file_in.close() | |
| 91 | |
| 92 except IOError: | |
| 93 stop_err("Reference bin could not be found or opened: " + self.path + bin_folder_name + '/accession_ids.tab') | |
| 94 | |
| 95 return bin | |
| 96 | |
| 97 def setStatus(self, record): | |
| 98 | |
| 99 if len(self.reference_bins) == 0: return #no bins | |
| 100 | |
| 101 # Use of "extended slices" http://docs.python.org/2.3/whatsnew/section-slices.html | |
| 102 # Example sallseqid is 'gi|194753780|ref|XR_046072.1|;gi|195119578|ref|XR_047594.1|;gi|195154052|ref|XR_047967.1|' | |
| 103 # Example accs is ['XR_046072.1', 'XR_047594.1', 'XR_047967.1'] | |
| 104 # Original code was "[1::2][1::2]" (select every 2nd item, then every 2nd item of that) | |
| 105 accs = record.sallseqid.split('|') | |
| 106 | |
| 107 if common.re_default_ncbi_id.match(record.sseqid): | |
| 108 accs = accs[3::4] #Select every 4th item starting offset 4 | |
| 109 | |
| 110 elif common.re_default_ref_id.match(record.sseqid): | |
| 111 accs = accs[1::2] | |
| 112 | |
| 113 | |
| 114 # Check each accession # against each bin. | |
| 115 for ptr, bin in enumerate(self.reference_bins): | |
| 116 setattr(record, bin.field, '') #Using '','1' not FALSE/TRUE because of tab delim output | |
| 117 setattr(record, bin.field + '_desc', '') | |
| 118 for acc in accs: | |
| 119 accGeneralId = acc.split('.')[0] | |
| 120 if accGeneralId in bin.lookup: | |
| 121 if bin.exclude: return False | |
| 122 setattr(record, bin.field, str(ptr+1)) | |
| 123 # Include any bin notes for this item | |
| 124 setattr(record, bin.field + '_desc', bin.lookup[accGeneralId]) | |
| 125 break # This result has been binned to this bin so break. | |
| 126 | |
| 127 | |
| 128 | |
| 129 | |
| 130 def __str__(self): | |
| 131 return "name: %s dict: %s" % (self.name, str(self.lookup)) | |
| 132 | |
| 133 class ReferenceBin: | |
| 134 def __init__(self, fieldSpec, bin_folder_name, bin_filter): | |
| 135 self.lookup = {} | |
| 136 self.folder = bin_folder_name | |
| 137 self.name = fieldSpec.getAttribute(bin_folder_name, 'name') | |
| 138 self.field = bin_folder_name | |
| 139 self.path = fieldSpec.getAttribute(bin_folder_name, 'path') | |
| 140 self.exclude = bin_filter | |
| 141 #absolute path to reference bins folder: /usr/local/galaxy/shared/ngs_data/ | |
| 142 self.file_path = os.path.join(self.path + self.folder + '/accession_ids.tab') | |
| 143 | |
| 144 if __name__ == '__main__': | |
| 145 | |
| 146 binManager = ReferenceBins() | |
| 147 binManager.__main__() | |
| 148 |
