annotate common.py @ 0:7db7ecc78ad6 draft

Uploaded
author damion
date Mon, 02 Mar 2015 20:46:00 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
1 import os.path
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
2 import sys
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
3 import re
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
4 import optparse
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
5 import subprocess
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
6 from shutil import move
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
7 import csv
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
8
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
9 re_default_query_id = re.compile("^Query_\d+$")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
10 #assert re_default_query_id.match("Query_101")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
11 #assert not re_default_query_id.match("Query_101a")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
12 #assert not re_default_query_id.match("MyQuery_101")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
13 re_default_subject_id = re.compile("^(Subject_|gnl\|BL_ORD_ID\|)\d+$") #requires some kind of numeric id
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
14 #assert self.re_default_subject_id.match("gnl|BL_ORD_ID|221")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
15 #assert re_default_subject_id.match("Subject_1")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
16 #assert not re_default_subject_id.match("Subject_")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
17 #assert not re_default_subject_id.match("Subject_12a")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
18 #assert not re_default_subject_id.match("TheSubject_1")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
19 # Spot sequence ids that have accession ids in them
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
20 re_default_ncbi_id = re.compile("^gi\|\d+\|[a-z]+\|[a-zA-Z0-9_]+(\.\d+)?\|")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
21 re_default_ref_id = re.compile("^ref\|[a-zA-Z0-9_]+\|[a-zA-Z0-9_]+(\.\d+)?\|")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
22
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
23
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
24 def stop_err( msg ):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
25 sys.stderr.write("%s\n" % msg)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
26 sys.exit(1)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
27
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
28 class MyParser(optparse.OptionParser):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
29 """
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
30 From http://stackoverflow.com/questions/1857346/python-optparse-how-to-include-additional-info-in-usage-output
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
31 Provides a better class for displaying formatted help info in epilog() portion of optParse; allows for carriage returns.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
32 """
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
33 def format_epilog(self, formatter):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
34 return self.epilog
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
35
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
36
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
37
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
38 ## *********************************** FieldFilter ****************************
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
39 class FieldFilter(object):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
40
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
41 def __init__(self, tagGroup, options):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
42 """ Creates dicitionary of fields that are to be filtered, and array of comparators and their values.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
43 Numeric filters have a single numeric value
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
44 Each text filter is a string of phrases separated by "|"
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
45
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
46 e.g. filters = "pident: > 97,score: > 37,sallseqdescr includes what | have|you"
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
47
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
48 @param filters string e.g. "[ [field name]: [comparator] [value],[[comparator] [value],]* ]*
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
49 @result .dict dictionary contains field name keys and arrays of [comparator, filterValue]
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
50
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
51 """
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
52 self.dict = {}
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
53 self.comparators = {
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
54 '==': lambda x,y: float(x) == float(y),
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
55 '!=': lambda x,y: float(x) != float(y),
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
56 'gt': lambda x,y: float(x) > float(y),
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
57 'gte': lambda x,y: float(x) >= float(y),
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
58 'lt': lambda x,y: float(x) < float(y),
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
59 'lte': lambda x,y: float(x) <= float(y),
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
60 'includes': self.includesPhrase,
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
61 'excludes': self.excludesPhrase
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
62 }
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
63 self.matches = {}
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
64 self.drop_redundant_hits = options.drop_redundant_hits
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
65
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
66
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
67
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
68 if options.filters != None:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
69 cleaned_filters = []
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
70 for colPtr, filterParam in enumerate(options.filters.strip().strip(';').split(';')):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
71 filterSpec = filterParam.strip().split(":")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
72 filterField = filterSpec[0].strip()
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
73 if len(filterField) > 0:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
74 if filterField in self.dict:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
75 stop_err("Filter field listed twice: \"" + filterField + "\". Please move constraints up to first use of field!")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
76 field_name = cleanField(tagGroup.columns_in, filterField, 'Invalid field for filtering eh')
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
77 if len(filterSpec) > 1: #we have start of filter field defn. "[field]:[crit]+,"
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
78
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
79 self.dict[field_name] = [] #new entry for filter field
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
80
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
81 for filterParam in filterSpec[1].strip().strip(',').split(','):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
82 filterSpec2 = filterParam.strip().split(' ')
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
83 comparator = filterSpec2[0]
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
84 if not comparator in self.comparators:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
85 stop_err("Invalid comparator for field filter: \"" + comparator + "\"")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
86 if len(filterSpec2) < 2:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
87 stop_err("Missing value for field comparator: \"" + comparator + "\"")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
88
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
89 #For text search, values are trimmed array of phrases
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
90 if comparator in ['includes','excludes']:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
91 filterValue = list(map(str.strip, ' '.join(filterSpec2[1:]).split('|')))
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
92 filterValue = filter(None, filterValue)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
93 else:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
94 filterValue = filterSpec2[1]
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
95
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
96 self.dict[field_name].append([comparator, filterValue])
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
97
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
98 cleaned_filters.append(field_name + ':' + filterSpec[1])
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
99
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
100 options.filters = ';'.join(cleaned_filters)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
101 # Adjust filter expression fieldnames.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
102 words = {'gt':'&gt;', 'gte':'&gt;=', 'lt':'&lt;', 'lte':'&lt;=',',':'',':':' '}
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
103 options.filters_HTML = word_replace_all(options.filters, words)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
104 words = {'gt':'>', 'gte':'>=', 'lt':'<', 'lte':'<=',',':'',':':' '}
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
105 options.filters = word_replace_all(options.filters, words)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
106
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
107 else:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
108 options.filters = None
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
109 options.filters_HTML = ''
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
110
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
111 def __str__(self):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
112 return "label: %s dict: %s" % (self.label, str(self.dict))
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
113
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
114 def includesPhrase(self, source, filter_phrases):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
115 """ Search for any words/phrases (separated by commas) in commastring in source string
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
116 @param source string Words separated by whitespace
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
117 @param filter_phrases array of phrases
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
118
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
119 """
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
120 return any(x in source for x in filter_phrases)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
121
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
122 def excludesPhrase(self, source, commastring):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
123 return not self.includesPhrase(source, commastring)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
124
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
125 def process(self, record):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
126 """ For given record (an object) cycle through filters to see if any of record's attributes fail filter conditions.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
127
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
128 FUTURE: MAKE GENERIC SO PASSED record field function for unique test.???
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
129
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
130 @uses self.dict
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
131 @uses self.drop_redundant_hits
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
132 @uses self.matches
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
133 @param record object An object containing field & values read from a <hit> line.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
134 @return boolean True if all filter criteria succeed, false otherwise
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
135
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
136 """
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
137
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
138 # Block out repeated hits
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
139 # THIS ASSUMES BLASTn XML file is listing BEST HIT FIRST. Only appropriate for searching for single hits within a reference sequence.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
140 if self.drop_redundant_hits == True:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
141 # parsing succession id from e.g. gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
142 #acc = str(record.sseqid.split('|')[3:4]).strip()
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
143 key = record.qseqid + '-' + record.accessionid #acc
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
144 if key in self.matches:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
145 return False
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
146 self.matches[key] = True
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
147
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
148 for key, constraints in self.dict.items():
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
149 try: # The .loc table of fields has fieldnames without leading _ underscore.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
150 # Such fields are assumed to be added by code;
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
151 # Leading underscore fields are raw values read from XML file directly.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
152 # Our filter names don't have underscore, but we see if underscore field exists if normal attr check fails
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
153 value = getattr(record, key)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
154 for ptr, constraint in enumerate(constraints):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
155 comparator = constraint[0]
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
156 userValue = constraint[1]
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
157 # print "constraint " + str(value) + comparator + str(userValue) + " -> " + \
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
158 # str (self.comparators[comparator](value, userValue) )
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
159 if not self.comparators[comparator](value, userValue):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
160 return False #failed a constraint
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
161 except AttributeError:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
162 print 'A filter on field [' + key + '] was requested, but this field does not exist.'
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
163 raise KeyError
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
164
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
165 return True
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
166
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
167
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
168
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
169 class FieldSpec(object):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
170
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
171 def __init__(self, file_path, columns_in = []):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
172 """ READ FIELD SPECIFICATIONS of a particular galaxy tool form/process from a .loc 'tabular data' file
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
173
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
174 Example blast_reporting_fields.tab file
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
175
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
176 #value type subtype sort filter default min max choose name
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
177 # Remember to edit tool_data_table_conf.xml for column spec!
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
178 qseqid numeric int 1 1 1 Query Seq-id
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
179 sseqid numeric int 1 1 1 Subject Seq-id
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
180 pident numeric float 1 1 97 90 100 1 Percentage of identical matches
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
181
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
182 - value is name of field: alphanumeric strings only.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
183 - type is 'text' or 'numeric' or 'bin'
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
184 - subtype where applicable, indicates further validation function
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
185 - sort indicates if field should be provided in sort menu
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
186 - filter indicates if field should be in menu of fields that can be filtered
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
187 - default is default value field should have if drawn on form
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
188 - min is minimum range of field
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
189 - max is maximum range of field
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
190 - choose indicates if field can be chosen for an output column (some are mandatory / some are to be avoided?)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
191 - name is textual name of field as it should appear on pulldown menus
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
192
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
193 @param file_path string full name and path of .loc file containing pertinent field names and their specifications.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
194 @result .dict dictionary
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
195
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
196 """
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
197 self.dict = {}
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
198 self.columns_in = columns_in
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
199
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
200 with open(file_path, 'rb') as f:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
201 reader = csv.DictReader(f, delimiter='\t') #1st row read as field name header by default
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
202 try:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
203 for row in reader:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
204 myKey = row['#value']
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
205 # Some lines begin with '#' for value. Ignore them
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
206 # Also, reader has read column names from first row; "#value" is name of first column
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
207 if not myKey[0] == '#': # 1st character is not a hash
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
208 row.pop("#value", None)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
209 self.dict[myKey] = row
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
210 # self.dict[myKey]['value']=row['#value'] # If we need this ever?
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
211
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
212
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
213 except csv.Error as e:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
214 stop_err('file %s, line %d: %s' % (filename, reader.line_num, e))
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
215
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
216
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
217 def initColumns(self, columns_out, custom_columns):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
218 """
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
219 # Augment columns with fieldSpec label and some sorting defaults.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
220 # Default sorts: qseqid is marked as sorted asc; score as sorted desc.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
221 # No need to move sorted fields around.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
222 # This basically creates spec to generate tab-delimited file.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
223 # The only other calculation done for that is the row_limit cut, if any.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
224 """
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
225 column_spec = list(columns_out)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
226 for (i, spec) in enumerate(column_spec):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
227 spec_field = spec.lstrip("_")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
228
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
229 if spec_field == 'qseqid':
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
230 sort = 'asc'
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
231 group = 'section'
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
232 elif spec_field == 'score':
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
233 sort = 'desc'
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
234 group = 'column'
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
235 else:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
236 sort = ''
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
237 group = 'column'
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
238
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
239 field = {
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
240 'field': spec,
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
241 'group': group,
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
242 'sort': sort,
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
243 'label': self.getAttribute(spec_field, 'short_name'),
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
244 'type': self.getAttribute(spec_field, 'type')
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
245 }
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
246 column_spec[i] = field
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
247
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
248 """
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
249 # For the HTML (OR XML) report we allow users to specify columns of data to represent sections of the report or table sections.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
250 # Selected columns either enhance an existing column's info, or add a new column.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
251 # If a selected column is sorted, it is inserted/moved to after last SORTED column in data.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
252 # In other words, primary/secondary etc sorting is preserved.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
253 """
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
254 if custom_columns != None:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
255
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
256
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
257 custom_spec = [x.strip() for x in custom_columns.split(';')]
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
258 for spec in custom_spec:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
259 params = [i.strip() for i in spec.rstrip(":").split(":")]
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
260 parlen = len(params)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
261 if parlen > 0 and params[0] != '':
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
262 field_name = cleanField(self.columns_in, params[0]) # Halts if it finds a field mismatch
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
263
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
264 group = 'column'
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
265 sort = ''
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
266 if parlen > 1 and params[1] in ['column','hidden','table','section']: group = params[1]
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
267 if parlen > 2 and params[2] in ['asc','desc']: sort = params[2]
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
268
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
269 # Enforce sort on section and table items....
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
270
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
271 # All self.column_spec have a fieldspec entry. Get default label from there.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
272 # HOW TO HANDLE CALCULATED FIELD LABELS? ENSURE THEY HAVE ENTRIES?
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
273 spec_field = field_name.lstrip("_")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
274 label = self.getAttribute(spec_field, 'short_name')
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
275 if parlen > 3: label = params[3]
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
276
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
277 field = {
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
278 'field': field_name,
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
279 'group': group,
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
280 'sort': sort,
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
281 'label': label,
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
282 'type': self.getAttribute(spec_field, 'type')
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
283 }
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
284
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
285 # If field is a 'section' move it right after last existing 'section' (if not matched)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
286 # if its a 'table' move it after last existing 'table' (if not matched)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
287 # otherwise append to column list.(if not matched)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
288
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
289 found = False # if found== true, rest of loop looks for existing mention of field, and removes it.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
290 for (ptr, target) in enumerate(column_spec):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
291
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
292 found_name = spec_field == target['field'].lstrip("_")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
293 if (found == True):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
294 if (found_name): # Found duplicate name
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
295 del column_spec[ptr]
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
296 break
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
297 elif (found_name):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
298 found = True
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
299 column_spec[ptr] = field # Overwrite spec.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
300 break
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
301
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
302 elif (field['group'] == 'section'):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
303 if (target['group'] != 'section'): # time to insert section
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
304 found = True
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
305 column_spec.insert(ptr, field)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
306
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
307 elif (field['group'] == 'table'):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
308 if (target['group'] == 'column' or target['group'] == 'hidden'):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
309 found = True
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
310 column_spec.insert(ptr, field)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
311
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
312 if found == False: # didn't find place for field above.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
313 column_spec.append(field)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
314 # print ("col spec: " + str(column_spec))
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
315
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
316 return column_spec
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
317
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
318
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
319
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
320 def getAttribute(self, fieldName, attribute):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
321 """ Retrieve attribute of a given field
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
322
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
323 @param fieldName string
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
324 @param attribute string
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
325 @return string value of attribute
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
326
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
327 """
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
328 return self.dict[fieldName][attribute]
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
329
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
330
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
331 def word_replace_all(text, dictionary):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
332 textArray = re.split('(\W+)', text) #Workaround: split() function is not allowing words next to punctuation.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
333 for ptr,w in enumerate(textArray):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
334 if w in dictionary: textArray[ptr] = dictionary[w]
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
335 return ''.join(textArray)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
336
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
337
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
338 def cleanField(columns_in, field_name, msg = 'Not a valid field name'):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
339
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
340 if not field_name.replace('_','').isalnum():
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
341 stop_err(msg + ': [' + field_name+']')
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
342 if field_name in columns_in:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
343 clean = field_name
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
344 elif '_' + field_name in columns_in: #passed from source file
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
345 clean = '_' + field_name
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
346 else: #column not found here
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
347 stop_err(msg + ':'+ field_name + '- no such field')
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
348 return clean
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
349
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
350
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
351 def fileSort (out_file, fields):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
352 """
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
353 fileSort() uses linux "sort" to handle possibility of giant file sizes.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
354 List of fields to sort on delivered in options.sorting string as:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
355
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
356 [{name:[field_name],order:[asc|desc],label:[label]},{name ... }] etc.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
357
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
358 Match "sorts" fields to columns to produce -k[col],[col] parameters that start and end sorting
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
359 Note that sort takes in columns with primary listed first, then secondary etc.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
360 Note that file to be sorted can't have 1st line column headers.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
361
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
362 sort attributes:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
363
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
364 -f ignore case;
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
365 -r reverse (i.e. descending)Good.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
366 -n numeric
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
367 -k[start col],[end col] range of text that sort will be performed on
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
368 -s stabilize sort : "If checked, this will stabilize sort by disabling its last-resort comparison so that lines in which all fields compare equal are left in their original relative order." Note, this might not be available on all linux flavours?
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
369 -V sorts numbers within text - if number is leading then field essentially treated as numeric. This means we don't have to specify -n for numeric fields in particular
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
370
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
371 Note: some attention may need to be given to locale settings for command line sort
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
372 May need to set export LC_ALL=C or export LANG=C to ensure same results on all systems
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
373
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
374 @param out_file string File path of file to resort
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
375 @param sorts string Comma-separated list of fields to sort, includes ascending/descending 2nd term;each field validated as an alphanumeric word + underscores.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
376 @param prelim_columns dictionary of files column header names
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
377 """
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
378
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
379 sortparam = []
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
380 for colPtr, field in enumerate(fields):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
381 if field['sort']:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
382 field_name = field['field']
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
383 if not field_name.replace('_','').isalnum():
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
384 stop_err("Invalid field to sort on: " + field)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
385
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
386 #print "sort term:" + field + ":" + str(prelim_columns)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
387 ordering = '' if field['sort'] == "asc" else 'r'
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
388 column = str(colPtr+1)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
389 # V sorts numbers AND text (check server's version of sort
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
390 sortparam.append('-k' + column + 'V' + ordering + ',' + column)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
391
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
392 if len(sortparam) > 0:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
393 args = ['sort','-s','-f','-V','-t\t'] + sortparam + ['-o' + out_file, out_file]
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
394 sort_a = subprocess.call(args)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
395
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
396
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
397
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
398 def fileTabular (in_file, tagGroup, options):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
399 """Produces tabular report format. Takes in tabular data + metainformation about that file, and iterates through rows. Not a query-based approach.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
400 It trims off the sort-only columns (prelim - final),
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
401 It optionally adds column label header. (not done in fileSort() because it gets mixed into sort there.)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
402 NOTE: RUN THIS AFTER fileHTML() BECAUSE IT MAY TRIM FIELDS THAT HTML REPORT NEEDS
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
403
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
404 @param in_file string Full file path
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
405 @param tagGroup object Includes prelim_columns, final_columns
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
406 @param options object Includes label_flag and row_limit
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
407
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
408 """
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
409 fp_in = open(in_file, "rb")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
410 fp_out = open(in_file + '.tmp', 'wb')
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
411
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
412 try:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
413
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
414 reader = csv.reader(fp_in, delimiter="\t")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
415 writer = csv.writer(fp_out, delimiter="\t")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
416
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
417 # WRITE TABULAR HEADER
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
418 if options.column_labels: # options.column_labels in ['name','field']:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
419 if options.column_labels == 'label':
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
420 tabHeader = [field['label'] for field in tagGroup.columns]
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
421 else:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
422 # Tabular data header: strip leading underscores off of any labels...
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
423 tabHeader = [field['field'].lstrip('_') for field in tagGroup.columns]
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
424
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
425 writer.writerow(tabHeader)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
426
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
427 for row in reader:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
428
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
429 rowdata=[]
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
430 for (idx, field) in enumerate(tagGroup.columns): # Exclude hidden columns here?
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
431 rowdata.append(row[idx])
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
432 writer.writerow(rowdata)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
433
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
434 move(in_file + '.tmp', in_file) # Overwrites in_file
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
435
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
436 except IOError as e:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
437 print 'Operation failed: %s' % e.strerror
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
438
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
439 fp_in.close()
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
440 fp_out.close()
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
441
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
442
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
443
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
444 def fileSelections (in_file, selection_file, tagGroup, options):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
445 """ Produces selection report format.
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
446 For selection file we need: qseqid, qseq, sseqid, sseq, and #
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
447
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
448 @param in_file string Full file path
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
449 @param tagGroup object Includes prelim_columns, final_columns
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
450 @param options object Includes label_flag and row_limit
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
451
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
452 """
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
453 fp_in = open(in_file, "rb")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
454
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
455 if selection_file != 'None':
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
456 fp_out = open(selection_file, 'w')
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
457
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
458
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
459 try:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
460
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
461 reader = csv.reader(fp_in, delimiter="\t")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
462 writer = csv.writer(fp_out, delimiter="\t")
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
463
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
464 for (idx, field) in enumerate(tagGroup.columns):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
465 fieldname = field['field']
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
466 if fieldname == 'qseqid': qseqid_col = idx
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
467 elif fieldname == '_qseq': qseq_col = idx
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
468 elif fieldname == 'sseqid': sseqid_col = idx
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
469 elif fieldname == '_sseq': sseq_col = idx
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
470 # else: stop_err("You : " + field)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
471
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
472 selectrow_count = 0
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
473 grouping = -1
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
474 old_section = ''
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
475 for row in reader:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
476
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
477 selectrow_count +=1
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
478 if row[qseqid_col] != old_section:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
479 old_section = row[qseqid_col]
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
480 grouping +=1
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
481 writer.writerow([row[qseqid_col], row[qseq_col], grouping, selectrow_count])
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
482 selectrow_count +=1
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
483
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
484 writer.writerow([row[sseqid_col], row[sseq_col], grouping, selectrow_count])
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
485
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
486
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
487 except IOError as e:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
488 print 'Operation failed: %s' % e.strerror
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
489
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
490 fp_in.close()
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
491 fp_out.close()
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
492
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
493 def testSuite(test_ids, tests, output_dir):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
494
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
495 if test_ids == 'all':
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
496 test_ids = sorted(tests.keys())
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
497 else:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
498 test_ids = test_ids.split(',')
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
499
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
500 for test_id in test_ids:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
501 if test_id in tests:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
502 test = tests[test_id]
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
503 test['base_dir'] = os.path.dirname(__file__)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
504 # Each output file has to be prefixed with the output folder
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
505 test['tmp_output'] = (' ' + test['outputs']).replace(' ',' ' + output_dir)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
506 # Note: output_dir output files don't get cleaned up after each test. Should they?!
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
507 params = '%(base_dir)s/blast_reporting.py %(base_dir)s/test-data/%(input)s%(tmp_output)s %(options)s' % test
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
508 print("Testing" + test_id + ': ' + params)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
509 os.system(params)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
510 for file in test['outputs'].split(' '):
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
511 #print(os.system('diff --suppress-common-lines ./test-data/%s %s%s' % (file, output_dir, file)))
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
512 f1 = open(test['base_dir'] + '/test-data/' + file)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
513 f2 = open(output_dir + file)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
514 import difflib #n=[number of context lines
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
515 diff = difflib.context_diff(f1.readlines(), f2.readlines(), lineterm='',n=0)
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
516 # One Galaxy issue: it doesn't convert entities when user downloads file. BUT IT DOES when generating directly to command line?
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
517 print '\nCompare ' + file
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
518 print '\n'.join(list(diff))
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
519
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
520 else:
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
521 stop_err("\nExpecting one or more test ids from " + str(sorted(tests.keys())))
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
522
7db7ecc78ad6 Uploaded
damion
parents:
diff changeset
523 stop_err("\nTest finished.")