annotate ParseDb.py @ 0:183edf446dcf draft default tip

Uploaded
author davidvanzessen
date Mon, 17 Jul 2017 07:44:27 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1 #!/usr/bin/env python3
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
2 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
3 Parses tab delimited database files
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
4 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
5 # Info
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
6 __author__ = 'Jason Anthony Vander Heiden'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
7 from changeo import __version__, __date__
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
8
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
9 # Imports
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
10 import csv
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
11 import os
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
12 import re
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
13 from argparse import ArgumentParser
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
14 from collections import OrderedDict
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
15
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
16 from textwrap import dedent
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
17 from time import time
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
18 from Bio import SeqIO
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
19 from Bio.Seq import Seq
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
20 from Bio.SeqRecord import SeqRecord
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
21 from Bio.Alphabet import IUPAC
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
22
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
23 # Presto and changeo imports
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
24 from presto.Defaults import default_delimiter, default_out_args
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
25 from presto.Annotation import flattenAnnotation
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
26 from presto.IO import getOutputHandle, printLog, printProgress, printMessage
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
27 from changeo.Defaults import default_csv_size
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
28 from changeo.Commandline import CommonHelpFormatter, checkArgs, getCommonArgParser, parseCommonArgs
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
29 from changeo.IO import getDbWriter, readDbFile, countDbFile
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
30
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
31 # System settings
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
32 csv.field_size_limit(default_csv_size)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
33
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
34 # Defaults
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
35 default_id_field = 'SEQUENCE_ID'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
36 default_seq_field = 'SEQUENCE_IMGT'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
37 default_germ_field = 'GERMLINE_IMGT_D_MASK'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
38 default_index_field = 'INDEX'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
39
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
40 # TODO: convert SQL-ish operations to modify_func() as per ParseHeaders
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
41
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
42 def getDbSeqRecord(db_record, id_field, seq_field, meta_fields=None,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
43 delimiter=default_delimiter):
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
44 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
45 Parses a database record into a SeqRecord
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
46
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
47 Arguments:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
48 db_record = a dictionary containing a database record
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
49 id_field = the field containing identifiers
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
50 seq_field = the field containing sequences
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
51 meta_fields = a list of fields to add to sequence annotations
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
52 delimiter = a tuple of delimiters for (fields, values, value lists)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
53
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
54 Returns:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
55 a SeqRecord
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
56 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
57 # Return None if ID or sequence fields are empty
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
58 if not db_record[id_field] or not db_record[seq_field]:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
59 return None
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
60
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
61 # Create description string
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
62 desc_dict = OrderedDict([('ID', db_record[id_field])])
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
63 if meta_fields is not None:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
64 desc_dict.update([(f, db_record[f]) for f in meta_fields if f in db_record])
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
65 desc_str = flattenAnnotation(desc_dict, delimiter=delimiter)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
66
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
67 # Create SeqRecord
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
68 seq_record = SeqRecord(Seq(db_record[seq_field], IUPAC.ambiguous_dna),
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
69 id=desc_str, name=desc_str, description='')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
70
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
71 return seq_record
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
72
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
73
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
74 def splitDbFile(db_file, field, num_split=None, out_args=default_out_args):
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
75 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
76 Divides a tab-delimited database file into segments by description tags
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
77
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
78 Arguments:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
79 db_file = filename of the tab-delimited database file to split
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
80 field = the field name by which to split db_file
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
81 num_split = the numerical threshold by which to group sequences;
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
82 if None treat field as textual
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
83 out_args = common output argument dictionary from parseCommonArgs
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
84
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
85 Returns:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
86 a list of output file names
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
87 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
88 log = OrderedDict()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
89 log['START'] = 'ParseDb'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
90 log['COMMAND'] = 'split'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
91 log['FILE'] = os.path.basename(db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
92 log['FIELD'] = field
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
93 log['NUM_SPLIT'] = num_split
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
94 printLog(log)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
95
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
96 # Open IgRecord reader iter object
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
97 reader = readDbFile(db_file, ig=False)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
98
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
99 # Determine total numbers of records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
100 rec_count = countDbFile(db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
101
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
102 start_time = time()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
103 count = 0
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
104 # Sort records into files based on textual field
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
105 if num_split is None:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
106 # Create set of unique field tags
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
107 tmp_iter = readDbFile(db_file, ig=False)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
108 tag_list = list(set([row[field] for row in tmp_iter]))
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
109
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
110 # Forbidden characters in filename and replacements
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
111 noGood = {'\/':'f','\\':'b','?':'q','\%':'p','*':'s',':':'c',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
112 '\|':'pi','\"':'dq','\'':'sq','<':'gt','>':'lt',' ':'_'}
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
113 # Replace forbidden characters in tag_list
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
114 tag_dict = {}
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
115 for tag in tag_list:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
116 for c,r in noGood.items():
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
117 tag_dict[tag] = (tag_dict.get(tag, tag).replace(c,r) \
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
118 if c in tag else tag_dict.get(tag, tag))
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
119
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
120 # Create output handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
121 handles_dict = {tag:getOutputHandle(db_file,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
122 '%s-%s' % (field, label),
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
123 out_type = out_args['out_type'],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
124 out_name = out_args['out_name'],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
125 out_dir = out_args['out_dir'])
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
126 for tag, label in tag_dict.items()}
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
127
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
128 # Create Db writer instances
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
129 writers_dict = {tag:getDbWriter(handles_dict[tag], db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
130 for tag in tag_dict}
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
131
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
132 # Iterate over IgRecords
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
133 for row in reader:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
134 printProgress(count, rec_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
135 count += 1
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
136 # Write row to appropriate file
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
137 tag = row[field]
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
138 writers_dict[tag].writerow(row)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
139
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
140 # Sort records into files based on numeric num_split
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
141 else:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
142 num_split = float(num_split)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
143
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
144 # Create output handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
145 handles_dict = {'under':getOutputHandle(db_file,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
146 'under-%.1f' % num_split,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
147 out_type = out_args['out_type'],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
148 out_name = out_args['out_name'],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
149 out_dir = out_args['out_dir']),
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
150 'atleast':getOutputHandle(db_file,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
151 'atleast-%.1f' % num_split,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
152 out_type = out_args['out_type'],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
153 out_name = out_args['out_name'],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
154 out_dir = out_args['out_dir'])}
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
155
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
156 # Create Db writer instances
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
157 writers_dict = {'under':getDbWriter(handles_dict['under'], db_file),
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
158 'atleast':getDbWriter(handles_dict['atleast'], db_file)}
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
159
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
160 # Iterate over IgRecords
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
161 for row in reader:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
162 printProgress(count, rec_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
163 count += 1
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
164 tag = row[field]
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
165 tag = 'under' if float(tag) < num_split else 'atleast'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
166 writers_dict[tag].writerow(row)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
167
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
168 # Write log
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
169 printProgress(count, rec_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
170 log = OrderedDict()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
171 for i, k in enumerate(handles_dict):
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
172 log['OUTPUT%i' % (i + 1)] = os.path.basename(handles_dict[k].name)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
173 log['RECORDS'] = rec_count
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
174 log['PARTS'] = len(handles_dict)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
175 log['END'] = 'ParseDb'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
176 printLog(log)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
177
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
178 # Close output file handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
179 for t in handles_dict: handles_dict[t].close()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
180
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
181 return [handles_dict[t].name for t in handles_dict]
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
182
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
183
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
184 # TODO: SHOULD ALLOW FOR UNSORTED CLUSTER COLUMN
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
185 # TODO: SHOULD ALLOW FOR GROUPING FIELDS
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
186 def convertDbBaseline(db_file, id_field=default_id_field, seq_field=default_seq_field,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
187 germ_field=default_germ_field, cluster_field=None,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
188 meta_fields=None, out_args=default_out_args):
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
189 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
190 Builds fasta files from database records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
191
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
192 Arguments:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
193 db_file = the database file name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
194 id_field = the field containing identifiers
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
195 seq_field = the field containing sample sequences
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
196 germ_field = the field containing germline sequences
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
197 cluster_field = the field containing clonal groupings
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
198 if None write the germline for each record
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
199 meta_fields = a list of fields to add to sequence annotations
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
200 out_args = common output argument dictionary from parseCommonArgs
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
201
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
202 Returns:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
203 the output file name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
204 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
205 log = OrderedDict()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
206 log['START'] = 'ParseDb'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
207 log['COMMAND'] = 'fasta'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
208 log['FILE'] = os.path.basename(db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
209 log['ID_FIELD'] = id_field
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
210 log['SEQ_FIELD'] = seq_field
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
211 log['GERM_FIELD'] = germ_field
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
212 log['CLUSTER_FIELD'] = cluster_field
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
213 if meta_fields is not None: log['META_FIELDS'] = ','.join(meta_fields)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
214 printLog(log)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
215
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
216 # Open file handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
217 db_iter = readDbFile(db_file, ig=False)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
218 pass_handle = getOutputHandle(db_file, out_label='sequences', out_dir=out_args['out_dir'],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
219 out_name=out_args['out_name'], out_type='clip')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
220 # Count records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
221 result_count = countDbFile(db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
222
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
223 # Iterate over records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
224 start_time = time()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
225 rec_count = germ_count = pass_count = fail_count = 0
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
226 cluster_last = None
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
227 for rec in db_iter:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
228 # Print progress for previous iteration
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
229 printProgress(rec_count, result_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
230 rec_count += 1
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
231
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
232 # Update cluster ID
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
233 cluster = rec.get(cluster_field, None)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
234
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
235 # Get germline SeqRecord when needed
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
236 if cluster_field is None:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
237 germ = getDbSeqRecord(rec, id_field, germ_field, meta_fields,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
238 delimiter=out_args['delimiter'])
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
239 germ.id = '>' + germ.id
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
240 elif cluster != cluster_last:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
241 germ = getDbSeqRecord(rec, cluster_field, germ_field,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
242 delimiter=out_args['delimiter'])
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
243 germ.id = '>' + germ.id
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
244 else:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
245 germ = None
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
246
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
247 # Get read SeqRecord
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
248 seq = getDbSeqRecord(rec, id_field, seq_field, meta_fields,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
249 delimiter=out_args['delimiter'])
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
250
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
251 # Write germline
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
252 if germ is not None:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
253 germ_count += 1
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
254 SeqIO.write(germ, pass_handle, 'fasta')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
255
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
256 # Write sequences
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
257 if seq is not None:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
258 pass_count += 1
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
259 SeqIO.write(seq, pass_handle, 'fasta')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
260 else:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
261 fail_count += 1
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
262
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
263 # Set last cluster ID
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
264 cluster_last = cluster
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
265
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
266 # Print counts
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
267 printProgress(rec_count, result_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
268 log = OrderedDict()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
269 log['OUTPUT'] = os.path.basename(pass_handle.name)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
270 log['RECORDS'] = rec_count
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
271 log['GERMLINES'] = germ_count
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
272 log['PASS'] = pass_count
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
273 log['FAIL'] = fail_count
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
274 log['END'] = 'ParseDb'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
275 printLog(log)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
276
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
277 # Close file handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
278 pass_handle.close()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
279
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
280 return pass_handle.name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
281
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
282
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
283 def convertDbFasta(db_file, id_field=default_id_field, seq_field=default_seq_field,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
284 meta_fields=None, out_args=default_out_args):
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
285 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
286 Builds fasta files from database records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
287
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
288 Arguments:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
289 db_file = the database file name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
290 id_field = the field containing identifiers
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
291 seq_field = the field containing sequences
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
292 meta_fields = a list of fields to add to sequence annotations
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
293 out_args = common output argument dictionary from parseCommonArgs
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
294
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
295 Returns:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
296 the output file name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
297 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
298 log = OrderedDict()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
299 log['START'] = 'ParseDb'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
300 log['COMMAND'] = 'fasta'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
301 log['FILE'] = os.path.basename(db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
302 log['ID_FIELD'] = id_field
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
303 log['SEQ_FIELD'] = seq_field
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
304 if meta_fields is not None: log['META_FIELDS'] = ','.join(meta_fields)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
305 printLog(log)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
306
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
307 # Open file handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
308 out_type = 'fasta'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
309 db_iter = readDbFile(db_file, ig=False)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
310 pass_handle = getOutputHandle(db_file, out_label='sequences', out_dir=out_args['out_dir'],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
311 out_name=out_args['out_name'], out_type=out_type)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
312 # Count records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
313 result_count = countDbFile(db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
314
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
315 # Iterate over records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
316 start_time = time()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
317 rec_count = pass_count = fail_count = 0
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
318 for rec in db_iter:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
319 # Print progress for previous iteration
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
320 printProgress(rec_count, result_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
321 rec_count += 1
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
322
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
323 # Get SeqRecord
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
324 seq = getDbSeqRecord(rec, id_field, seq_field, meta_fields, out_args['delimiter'])
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
325
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
326 # Write sequences
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
327 if seq is not None:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
328 pass_count += 1
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
329 SeqIO.write(seq, pass_handle, out_type)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
330 else:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
331 fail_count += 1
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
332
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
333 # Print counts
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
334 printProgress(rec_count, result_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
335 log = OrderedDict()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
336 log['OUTPUT'] = os.path.basename(pass_handle.name)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
337 log['RECORDS'] = rec_count
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
338 log['PASS'] = pass_count
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
339 log['FAIL'] = fail_count
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
340 log['END'] = 'ParseDb'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
341 printLog(log)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
342
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
343 # Close file handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
344 pass_handle.close()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
345
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
346 return pass_handle.name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
347
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
348
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
349 def addDbFile(db_file, fields, values, out_args=default_out_args):
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
350 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
351 Adds field and value pairs to a database file
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
352
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
353 Arguments:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
354 db_file = the database file name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
355 fields = a list of fields to add
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
356 values = a list of values to assign to all rows of each field
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
357 out_args = common output argument dictionary from parseCommonArgs
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
358
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
359 Returns:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
360 the output file name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
361 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
362 log = OrderedDict()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
363 log['START'] = 'ParseDb'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
364 log['COMMAND'] = 'add'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
365 log['FILE'] = os.path.basename(db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
366 log['FIELDS'] = ','.join(fields)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
367 log['VALUES'] = ','.join(values)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
368 printLog(log)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
369
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
370 # Open file handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
371 db_iter = readDbFile(db_file, ig=False)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
372 pass_handle = getOutputHandle(db_file, out_label='parse-add', out_dir=out_args['out_dir'],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
373 out_name=out_args['out_name'], out_type='tab')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
374 pass_writer = getDbWriter(pass_handle, db_file, add_fields=fields)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
375 # Count records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
376 result_count = countDbFile(db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
377
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
378 # Define fields and values to append
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
379 add_dict = {k:v for k,v in zip(fields, values) if k not in db_iter.fieldnames}
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
380
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
381 # Iterate over records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
382 start_time = time()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
383 rec_count = 0
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
384 for rec in db_iter:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
385 # Print progress for previous iteration
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
386 printProgress(rec_count, result_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
387 rec_count += 1
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
388 # Write updated row
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
389 rec.update(add_dict)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
390 pass_writer.writerow(rec)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
391
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
392 # Print counts
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
393 printProgress(rec_count, result_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
394 log = OrderedDict()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
395 log['OUTPUT'] = os.path.basename(pass_handle.name)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
396 log['RECORDS'] = rec_count
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
397 log['END'] = 'ParseDb'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
398 printLog(log)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
399
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
400 # Close file handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
401 pass_handle.close()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
402
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
403 return pass_handle.name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
404
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
405
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
406 def indexDbFile(db_file, field=default_index_field, out_args=default_out_args):
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
407 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
408 Adds an index column to a database file
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
409
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
410 Arguments:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
411 db_file = the database file name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
412 field = the name of the index field to add
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
413 out_args = common output argument dictionary from parseCommonArgs
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
414
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
415 Returns:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
416 the output file name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
417 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
418 log = OrderedDict()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
419 log['START'] = 'ParseDb'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
420 log['COMMAND'] = 'index'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
421 log['FILE'] = os.path.basename(db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
422 log['FIELD'] = field
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
423 printLog(log)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
424
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
425 # Open file handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
426 db_iter = readDbFile(db_file, ig=False)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
427 pass_handle = getOutputHandle(db_file, out_label='parse-index', out_dir=out_args['out_dir'],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
428 out_name=out_args['out_name'], out_type='tab')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
429 pass_writer = getDbWriter(pass_handle, db_file, add_fields=field)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
430 # Count records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
431 result_count = countDbFile(db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
432
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
433 # Iterate over records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
434 start_time = time()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
435 rec_count = 0
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
436 for rec in db_iter:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
437 # Print progress for previous iteration
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
438 printProgress(rec_count, result_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
439 rec_count += 1
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
440
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
441 # Add count and write updated row
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
442 rec.update({field:rec_count})
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
443 pass_writer.writerow(rec)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
444
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
445 # Print counts
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
446 printProgress(rec_count, result_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
447 log = OrderedDict()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
448 log['OUTPUT'] = os.path.basename(pass_handle.name)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
449 log['RECORDS'] = rec_count
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
450 log['END'] = 'ParseDb'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
451 printLog(log)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
452
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
453 # Close file handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
454 pass_handle.close()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
455
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
456 return pass_handle.name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
457
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
458
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
459 def dropDbFile(db_file, fields, out_args=default_out_args):
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
460 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
461 Deletes entire fields from a database file
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
462
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
463 Arguments:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
464 db_file = the database file name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
465 fields = a list of fields to drop
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
466 out_args = common output argument dictionary from parseCommonArgs
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
467
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
468 Returns:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
469 the output file name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
470 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
471 log = OrderedDict()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
472 log['START'] = 'ParseDb'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
473 log['COMMAND'] = 'add'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
474 log['FILE'] = os.path.basename(db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
475 log['FIELDS'] = ','.join(fields)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
476 printLog(log)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
477
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
478 # Open file handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
479 db_iter = readDbFile(db_file, ig=False)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
480 pass_handle = getOutputHandle(db_file, out_label='parse-drop', out_dir=out_args['out_dir'],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
481 out_name=out_args['out_name'], out_type='tab')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
482 pass_writer = getDbWriter(pass_handle, db_file, exclude_fields=fields)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
483 # Count records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
484 result_count = countDbFile(db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
485
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
486 # Iterate over records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
487 start_time = time()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
488 rec_count = 0
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
489 for rec in db_iter:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
490 # Print progress for previous iteration
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
491 printProgress(rec_count, result_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
492 rec_count += 1
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
493 # Write row
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
494 pass_writer.writerow(rec)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
495
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
496 # Print counts
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
497 printProgress(rec_count, result_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
498 log = OrderedDict()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
499 log['OUTPUT'] = os.path.basename(pass_handle.name)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
500 log['RECORDS'] = rec_count
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
501 log['END'] = 'ParseDb'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
502 printLog(log)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
503
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
504 # Close file handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
505 pass_handle.close()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
506
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
507 return pass_handle.name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
508
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
509
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
510 def deleteDbFile(db_file, fields, values, logic='any', regex=False,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
511 out_args=default_out_args):
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
512 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
513 Deletes records from a database file
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
514
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
515 Arguments:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
516 db_file = the database file name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
517 fields = a list of fields to check for deletion criteria
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
518 values = a list of values defining deletion targets
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
519 logic = one of 'any' or 'all' defining whether one or all fields must have a match.
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
520 regex = if False do exact full string matches; if True allow partial regex matches.
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
521 out_args = common output argument dictionary from parseCommonArgs
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
522
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
523 Returns:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
524 the output file name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
525 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
526 # Define string match function
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
527 if regex:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
528 def _match_func(x, patterns): return any([re.search(p, x) for p in patterns])
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
529 else:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
530 def _match_func(x, patterns): return x in patterns
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
531
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
532 # Define logic function
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
533 if logic == 'any':
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
534 _logic_func = any
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
535 elif logic == 'all':
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
536 _logic_func = all
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
537
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
538 log = OrderedDict()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
539 log['START'] = 'ParseDb'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
540 log['COMMAND'] = 'delete'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
541 log['FILE'] = os.path.basename(db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
542 log['FIELDS'] = ','.join(fields)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
543 log['VALUES'] = ','.join(values)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
544 printLog(log)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
545
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
546 # Open file handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
547 db_iter = readDbFile(db_file, ig=False)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
548 pass_handle = getOutputHandle(db_file, out_label='parse-delete', out_dir=out_args['out_dir'],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
549 out_name=out_args['out_name'], out_type='tab')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
550 pass_writer = getDbWriter(pass_handle, db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
551 # Count records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
552 result_count = countDbFile(db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
553
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
554 # Iterate over records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
555 start_time = time()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
556 rec_count = pass_count = fail_count = 0
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
557 for rec in db_iter:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
558 # Print progress for previous iteration
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
559 printProgress(rec_count, result_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
560 rec_count += 1
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
561
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
562 # Check for deletion values in all fields
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
563 delete = _logic_func([_match_func(rec.get(f, False), values) for f in fields])
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
564
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
565 # Write sequences
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
566 if not delete:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
567 pass_count += 1
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
568 pass_writer.writerow(rec)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
569 else:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
570 fail_count += 1
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
571
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
572 # Print counts
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
573 printProgress(rec_count, result_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
574 log = OrderedDict()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
575 log['OUTPUT'] = os.path.basename(pass_handle.name)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
576 log['RECORDS'] = rec_count
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
577 log['KEPT'] = pass_count
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
578 log['DELETED'] = fail_count
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
579 log['END'] = 'ParseDb'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
580 printLog(log)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
581
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
582 # Close file handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
583 pass_handle.close()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
584
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
585 return pass_handle.name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
586
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
587
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
588 def renameDbFile(db_file, fields, names, out_args=default_out_args):
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
589 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
590 Renames fields in a database file
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
591
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
592 Arguments:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
593 db_file = the database file name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
594 fields = a list of fields to rename
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
595 values = a list of new names for fields
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
596 out_args = common output argument dictionary from parseCommonArgs
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
597
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
598 Returns:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
599 the output file name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
600 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
601 log = OrderedDict()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
602 log['START'] = 'ParseDb'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
603 log['COMMAND'] = 'rename'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
604 log['FILE'] = os.path.basename(db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
605 log['FIELDS'] = ','.join(fields)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
606 log['NAMES'] = ','.join(names)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
607 printLog(log)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
608
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
609 # Open file handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
610 db_iter = readDbFile(db_file, ig=False)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
611 pass_handle = getOutputHandle(db_file, out_label='parse-rename', out_dir=out_args['out_dir'],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
612 out_name=out_args['out_name'], out_type='tab')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
613
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
614 # Get header and rename fields
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
615 header = (readDbFile(db_file, ig=False)).fieldnames
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
616 for f, n in zip(fields, names):
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
617 i = header.index(f)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
618 header[i] = n
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
619
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
620 # Open writer and write new header
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
621 # TODO: should modify getDbWriter to take a list of fields
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
622 pass_writer = csv.DictWriter(pass_handle, fieldnames=header, dialect='excel-tab')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
623 pass_writer.writeheader()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
624
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
625 # Count records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
626 result_count = countDbFile(db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
627
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
628 # Iterate over records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
629 start_time = time()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
630 rec_count = 0
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
631 for rec in db_iter:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
632 # Print progress for previous iteration
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
633 printProgress(rec_count, result_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
634 rec_count += 1
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
635 # TODO: repeating renaming is unnecessary. should had a non-dict reader/writer to DbCore
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
636 # Rename fields
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
637 for f, n in zip(fields, names):
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
638 rec[n] = rec.pop(f)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
639 # Write
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
640 pass_writer.writerow(rec)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
641
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
642 # Print counts
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
643 printProgress(rec_count, result_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
644 log = OrderedDict()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
645 log['OUTPUT'] = os.path.basename(pass_handle.name)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
646 log['RECORDS'] = rec_count
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
647 log['END'] = 'ParseDb'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
648 printLog(log)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
649
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
650 # Close file handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
651 pass_handle.close()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
652
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
653 return pass_handle.name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
654
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
655
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
656 def selectDbFile(db_file, fields, values, logic='any', regex=False,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
657 out_args=default_out_args):
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
658 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
659 Selects records from a database file
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
660
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
661 Arguments:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
662 db_file = the database file name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
663 fields = a list of fields to check for selection criteria
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
664 values = a list of values defining selection targets
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
665 logic = one of 'any' or 'all' defining whether one or all fields must have a match.
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
666 regex = if False do exact full string matches; if True allow partial regex matches.
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
667 out_args = common output argument dictionary from parseCommonArgs
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
668
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
669 Returns:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
670 the output file name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
671 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
672 # Define string match function
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
673 if regex:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
674 def _match_func(x, patterns): return any([re.search(p, x) for p in patterns])
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
675 else:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
676 def _match_func(x, patterns): return x in patterns
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
677
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
678 # Define logic function
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
679 if logic == 'any':
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
680 _logic_func = any
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
681 elif logic == 'all':
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
682 _logic_func = all
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
683
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
684 # Print console log
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
685 log = OrderedDict()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
686 log['START'] = 'ParseDb'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
687 log['COMMAND'] = 'select'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
688 log['FILE'] = os.path.basename(db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
689 log['FIELDS'] = ','.join(fields)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
690 log['VALUES'] = ','.join(values)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
691 log['REGEX'] =regex
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
692 printLog(log)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
693
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
694 # Open file handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
695 db_iter = readDbFile(db_file, ig=False)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
696 pass_handle = getOutputHandle(db_file, out_label='parse-select', out_dir=out_args['out_dir'],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
697 out_name=out_args['out_name'], out_type='tab')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
698 pass_writer = getDbWriter(pass_handle, db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
699 # Count records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
700 result_count = countDbFile(db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
701
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
702 # Iterate over records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
703 start_time = time()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
704 rec_count = pass_count = fail_count = 0
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
705 for rec in db_iter:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
706 # Print progress for previous iteration
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
707 printProgress(rec_count, result_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
708 rec_count += 1
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
709
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
710 # Check for selection values in all fields
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
711 select = _logic_func([_match_func(rec.get(f, False), values) for f in fields])
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
712
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
713 # Write sequences
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
714 if select:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
715 pass_count += 1
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
716 pass_writer.writerow(rec)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
717 else:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
718 fail_count += 1
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
719
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
720 # Print counts
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
721 printProgress(rec_count, result_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
722 log = OrderedDict()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
723 log['OUTPUT'] = os.path.basename(pass_handle.name)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
724 log['RECORDS'] = rec_count
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
725 log['SELECTED'] = pass_count
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
726 log['DISCARDED'] = fail_count
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
727 log['END'] = 'ParseDb'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
728 printLog(log)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
729
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
730 # Close file handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
731 pass_handle.close()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
732
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
733 return pass_handle.name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
734
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
735
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
736 def sortDbFile(db_file, field, numeric=False, descend=False,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
737 out_args=default_out_args):
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
738 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
739 Sorts records by values in an annotation field
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
740
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
741 Arguments:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
742 db_file = the database filename
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
743 field = the field name to sort by
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
744 numeric = if True sort field numerically;
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
745 if False sort field alphabetically
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
746 descend = if True sort in descending order;
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
747 if False sort in ascending order
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
748
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
749 out_args = common output argument dictionary from parseCommonArgs
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
750
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
751 Returns:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
752 the output file name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
753 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
754 log = OrderedDict()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
755 log['START'] = 'ParseDb'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
756 log['COMMAND'] = 'sort'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
757 log['FILE'] = os.path.basename(db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
758 log['FIELD'] = field
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
759 log['NUMERIC'] = numeric
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
760 printLog(log)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
761
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
762 # Open file handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
763 db_iter = readDbFile(db_file, ig=False)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
764 pass_handle = getOutputHandle(db_file, out_label='parse-sort', out_dir=out_args['out_dir'],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
765 out_name=out_args['out_name'], out_type='tab')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
766 pass_writer = getDbWriter(pass_handle, db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
767
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
768
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
769 # Store all records in a dictionary
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
770 start_time = time()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
771 printMessage("Indexing: Running", start_time=start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
772 db_dict = {i:r for i, r in enumerate(db_iter)}
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
773 result_count = len(db_dict)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
774
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
775 # Sort db_dict by field values
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
776 tag_dict = {k:v[field] for k, v in db_dict.items()}
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
777 if numeric: tag_dict = {k:float(v or 0) for k, v in tag_dict.items()}
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
778 sorted_keys = sorted(tag_dict, key=tag_dict.get, reverse=descend)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
779 printMessage("Indexing: Done", start_time=start_time, end=True)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
780
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
781 # Iterate over records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
782 start_time = time()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
783 rec_count = 0
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
784 for key in sorted_keys:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
785 # Print progress for previous iteration
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
786 printProgress(rec_count, result_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
787 rec_count += 1
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
788
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
789 # Write records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
790 pass_writer.writerow(db_dict[key])
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
791
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
792 # Print counts
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
793 printProgress(rec_count, result_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
794 log = OrderedDict()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
795 log['OUTPUT'] = os.path.basename(pass_handle.name)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
796 log['RECORDS'] = rec_count
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
797 log['END'] = 'ParseDb'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
798 printLog(log)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
799
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
800 # Close file handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
801 pass_handle.close()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
802
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
803 return pass_handle.name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
804
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
805
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
806 def updateDbFile(db_file, field, values, updates, out_args=default_out_args):
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
807 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
808 Updates field and value pairs to a database file
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
809
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
810 Arguments:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
811 db_file = the database file name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
812 field = the field to update
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
813 values = a list of values to specifying which rows to update
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
814 updates = a list of values to update each value with
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
815 out_args = common output argument dictionary from parseCommonArgs
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
816
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
817 Returns:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
818 the output file name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
819 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
820 log = OrderedDict()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
821 log['START'] = 'ParseDb'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
822 log['COMMAND'] = 'update'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
823 log['FILE'] = os.path.basename(db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
824 log['FIELD'] = field
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
825 log['VALUES'] = ','.join(values)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
826 log['UPDATES'] = ','.join(updates)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
827 printLog(log)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
828
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
829 # Open file handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
830 db_iter = readDbFile(db_file, ig=False)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
831 pass_handle = getOutputHandle(db_file, out_label='parse-update', out_dir=out_args['out_dir'],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
832 out_name=out_args['out_name'], out_type='tab')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
833 pass_writer = getDbWriter(pass_handle, db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
834 # Count records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
835 result_count = countDbFile(db_file)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
836
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
837 # Iterate over records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
838 start_time = time()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
839 rec_count = pass_count = 0
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
840 for rec in db_iter:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
841 # Print progress for previous iteration
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
842 printProgress(rec_count, result_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
843 rec_count += 1
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
844
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
845 # Updated values if found
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
846 for x, y in zip(values, updates):
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
847 if rec[field] == x:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
848 rec[field] = y
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
849 pass_count += 1
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
850
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
851 # Write records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
852 pass_writer.writerow(rec)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
853
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
854 # Print counts
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
855 printProgress(rec_count, result_count, 0.05, start_time)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
856 log = OrderedDict()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
857 log['OUTPUT'] = os.path.basename(pass_handle.name)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
858 log['RECORDS'] = rec_count
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
859 log['UPDATED'] = pass_count
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
860 log['END'] = 'ParseDb'
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
861 printLog(log)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
862
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
863 # Close file handles
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
864 pass_handle.close()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
865
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
866 return pass_handle.name
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
867
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
868
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
869 def getArgParser():
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
870 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
871 Defines the ArgumentParser
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
872
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
873 Arguments:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
874 None
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
875
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
876 Returns:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
877 an ArgumentParser object
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
878 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
879 # Define input and output field help message
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
880 fields = dedent(
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
881 '''
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
882 output files:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
883 sequences
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
884 FASTA formatted sequences output from the subcommands fasta and clip.
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
885 <field>-<value>
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
886 database files partitioned by annotation <field> and <value>.
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
887 parse-<command>
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
888 output of the database modification functions where <command> is one of
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
889 the subcommands add, index, drop, delete, rename, select, sort or update.
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
890
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
891 required fields:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
892 SEQUENCE_ID
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
893
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
894 optional fields:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
895 JUNCTION, SEQUENCE_IMGT, SEQUENCE_VDJ, GERMLINE_IMGT, GERMLINE_VDJ,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
896 GERMLINE_IMGT_D_MASK, GERMLINE_VDJ_D_MASK,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
897 GERMLINE_IMGT_V_REGION, GERMLINE_VDJ_V_REGION
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
898
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
899 output fields:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
900 None
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
901 ''')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
902
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
903 # Define ArgumentParser
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
904 parser = ArgumentParser(description=__doc__, epilog=fields,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
905 formatter_class=CommonHelpFormatter)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
906 parser.add_argument('--version', action='version',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
907 version='%(prog)s:' + ' %s-%s' %(__version__, __date__))
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
908 subparsers = parser.add_subparsers(title='subcommands', dest='command', metavar='',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
909 help='Database operation')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
910 # TODO: This is a temporary fix for Python issue 9253
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
911 subparsers.required = True
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
912
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
913 # Define parent parser
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
914 parser_parent = getCommonArgParser(seq_in=False, seq_out=False, db_in=True,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
915 failed=False, log=False)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
916
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
917 # Subparser to convert database entries to sequence file
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
918 parser_seq = subparsers.add_parser('fasta', parents=[parser_parent],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
919 formatter_class=CommonHelpFormatter,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
920 help='Creates a fasta file from database records.',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
921 description='Creates a fasta file from database records.')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
922 parser_seq.add_argument('--if', action='store', dest='id_field',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
923 default=default_id_field,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
924 help='The name of the field containing identifiers')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
925 parser_seq.add_argument('--sf', action='store', dest='seq_field',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
926 default=default_seq_field,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
927 help='The name of the field containing sequences')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
928 parser_seq.add_argument('--mf', nargs='+', action='store', dest='meta_fields',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
929 help='List of annotation fields to add to the sequence description')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
930 parser_seq.set_defaults(func=convertDbFasta)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
931
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
932 # Subparser to convert database entries to clip-fasta file
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
933 parser_baseln = subparsers.add_parser('baseline', parents=[parser_parent],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
934 formatter_class=CommonHelpFormatter,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
935 description='Creates a BASELINe fasta file from database records.',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
936 help='''Creates a specially formatted fasta file
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
937 from database records for input into the BASELINe
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
938 website. The format groups clonally related sequences
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
939 sequentially, with the germline sequence preceding
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
940 each clone and denoted by headers starting with ">>".''')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
941 parser_baseln.add_argument('--if', action='store', dest='id_field',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
942 default=default_id_field,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
943 help='The name of the field containing identifiers')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
944 parser_baseln.add_argument('--sf', action='store', dest='seq_field',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
945 default=default_seq_field,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
946 help='The name of the field containing reads')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
947 parser_baseln.add_argument('--gf', action='store', dest='germ_field',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
948 default=default_germ_field,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
949 help='The name of the field containing germline sequences')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
950 parser_baseln.add_argument('--cf', action='store', dest='cluster_field', default=None,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
951 help='The name of the field containing containing sorted clone IDs')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
952 parser_baseln.add_argument('--mf', nargs='+', action='store', dest='meta_fields',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
953 help='List of annotation fields to add to the sequence description')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
954 parser_baseln.set_defaults(func=convertDbBaseline)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
955
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
956 # Subparser to partition files by annotation values
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
957 parser_split = subparsers.add_parser('split', parents=[parser_parent],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
958 formatter_class=CommonHelpFormatter,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
959 help='Splits database files by field values.',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
960 description='Splits database files by field values')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
961 parser_split.add_argument('-f', action='store', dest='field', type=str, required=True,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
962 help='Annotation field by which to split database files.')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
963 parser_split.add_argument('--num', action='store', dest='num_split', type=float, default=None,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
964 help='''Specify to define the field as numeric and group
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
965 records by whether they are less than or at least
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
966 (greater than or equal to) the specified value.''')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
967 parser_split.set_defaults(func=splitDbFile)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
968
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
969 # Subparser to add records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
970 parser_add = subparsers.add_parser('add', parents=[parser_parent],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
971 formatter_class=CommonHelpFormatter,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
972 help='Adds field and value pairs.',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
973 description='Adds field and value pairs.')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
974 parser_add.add_argument('-f', nargs='+', action='store', dest='fields', required=True,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
975 help='The name of the fields to add.')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
976 parser_add.add_argument('-u', nargs='+', action='store', dest='values', required=True,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
977 help='The value to assign to all rows for each field.')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
978 parser_add.set_defaults(func=addDbFile)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
979
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
980 # Subparser to delete records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
981 parser_delete = subparsers.add_parser('delete', parents=[parser_parent],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
982 formatter_class=CommonHelpFormatter,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
983 help='Deletes specific records.',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
984 description='Deletes specific records.')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
985 parser_delete.add_argument('-f', nargs='+', action='store', dest='fields', required=True,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
986 help='The name of the fields to check for deletion criteria.')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
987 parser_delete.add_argument('-u', nargs='+', action='store', dest='values', default=['', 'NA'],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
988 help='''The values defining which records to delete. A value
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
989 may appear in any of the fields specified with -f.''')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
990 parser_delete.add_argument('--logic', action='store', dest='logic',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
991 choices=('any', 'all'), default='any',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
992 help='''Defines whether a value may appear in any field (any)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
993 or whether it must appear in all fields (all).''')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
994 parser_delete.add_argument('--regex', action='store_true', dest='regex',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
995 help='''If specified, treat values as regular expressions
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
996 and allow partial string matches.''')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
997 parser_delete.set_defaults(func=deleteDbFile)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
998
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
999 # Subparser to drop fields
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1000 parser_drop = subparsers.add_parser('drop', parents=[parser_parent],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1001 formatter_class=CommonHelpFormatter,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1002 help='Deletes entire fields.',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1003 description='Deletes specific records.')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1004 parser_drop.add_argument('-f', nargs='+', action='store', dest='fields', required=True,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1005 help='The name of the fields to delete from the database.')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1006 parser_drop.set_defaults(func=dropDbFile)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1007
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1008 # Subparser to index fields
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1009 parser_index = subparsers.add_parser('index', parents=[parser_parent],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1010 formatter_class=CommonHelpFormatter,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1011 help='Adds a numeric index field.',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1012 description='Adds a numeric index field.')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1013 parser_index.add_argument('-f', action='store', dest='field',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1014 default=default_index_field,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1015 help='The name of the index field to add to the database.')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1016 parser_index.set_defaults(func=indexDbFile)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1017
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1018 # Subparser to rename fields
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1019 parser_rename = subparsers.add_parser('rename', parents=[parser_parent],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1020 formatter_class=CommonHelpFormatter,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1021 help='Renames fields.',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1022 description='Renames fields.')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1023 parser_rename.add_argument('-f', nargs='+', action='store', dest='fields', required=True,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1024 help='List of fields to rename.')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1025 parser_rename.add_argument('-k', nargs='+', action='store', dest='names', required=True,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1026 help='List of new names for each field.')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1027 parser_rename.set_defaults(func=renameDbFile)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1028
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1029 # Subparser to select records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1030 parser_select = subparsers.add_parser('select', parents=[parser_parent],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1031 formatter_class=CommonHelpFormatter,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1032 help='Selects specific records.',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1033 description='Selects specific records.')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1034 parser_select.add_argument('-f', nargs='+', action='store', dest='fields', required=True,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1035 help='The name of the fields to check for selection criteria.')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1036 parser_select.add_argument('-u', nargs='+', action='store', dest='values', required=True,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1037 help='''The values defining with records to select. A value
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1038 may appear in any of the fields specified with -f.''')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1039 parser_select.add_argument('--logic', action='store', dest='logic',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1040 choices=('any', 'all'), default='any',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1041 help='''Defines whether a value may appear in any field (any)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1042 or whether it must appear in all fields (all).''')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1043 parser_select.add_argument('--regex', action='store_true', dest='regex',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1044 help='''If specified, treat values as regular expressions
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1045 and allow partial string matches.''')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1046 parser_select.set_defaults(func=selectDbFile)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1047
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1048 # Subparser to sort file by records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1049 parser_sort = subparsers.add_parser('sort', parents=[parser_parent],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1050 formatter_class=CommonHelpFormatter,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1051 help='Sorts records by field values.',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1052 description='Sorts records by field values.')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1053 parser_sort.add_argument('-f', action='store', dest='field', type=str, required=True,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1054 help='The annotation field by which to sort records.')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1055 parser_sort.add_argument('--num', action='store_true', dest='numeric', default=False,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1056 help='''Specify to define the sort column as numeric rather
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1057 than textual.''')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1058 parser_sort.add_argument('--descend', action='store_true', dest='descend',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1059 help='''If specified, sort records in descending, rather
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1060 than ascending, order by values in the target field.''')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1061 parser_sort.set_defaults(func=sortDbFile)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1062
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1063 # Subparser to update records
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1064 parser_update = subparsers.add_parser('update', parents=[parser_parent],
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1065 formatter_class=CommonHelpFormatter,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1066 help='Updates field and value pairs.',
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1067 description='Updates field and value pairs.')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1068 parser_update.add_argument('-f', action='store', dest='field', required=True,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1069 help='The name of the field to update.')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1070 parser_update.add_argument('-u', nargs='+', action='store', dest='values', required=True,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1071 help='The values that will be replaced.')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1072 parser_update.add_argument('-t', nargs='+', action='store', dest='updates', required=True,
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1073 help='''The new value to assign to each selected row.''')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1074 parser_update.set_defaults(func=updateDbFile)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1075
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1076 return parser
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1077
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1078
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1079 if __name__ == '__main__':
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1080 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1081 Parses command line arguments and calls main function
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1082 """
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1083 # Parse arguments
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1084 parser = getArgParser()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1085 checkArgs(parser)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1086 args = parser.parse_args()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1087 args_dict = parseCommonArgs(args)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1088 # Convert case of fields
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1089 if 'id_field' in args_dict:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1090 args_dict['id_field'] = args_dict['id_field'].upper()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1091 if 'seq_field' in args_dict:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1092 args_dict['seq_field'] = args_dict['seq_field'].upper()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1093 if 'germ_field' in args_dict:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1094 args_dict['germ_field'] = args_dict['germ_field'].upper()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1095 if 'field' in args_dict:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1096 args_dict['field'] = args_dict['field'].upper()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1097 if 'cluster_field' in args_dict and args_dict['cluster_field'] is not None:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1098 args_dict['cluster_field'] = args_dict['cluster_field'].upper()
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1099 if 'meta_fields' in args_dict and args_dict['meta_fields'] is not None:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1100 args_dict['meta_fields'] = [f.upper() for f in args_dict['meta_fields']]
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1101 if 'fields' in args_dict:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1102 args_dict['fields'] = [f.upper() for f in args_dict['fields']]
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1103
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1104 # Check modify_args arguments
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1105 if args.command == 'add' and len(args_dict['fields']) != len(args_dict['values']):
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1106 parser.error('You must specify exactly one value (-u) per field (-f)')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1107 elif args.command == 'rename' and len(args_dict['fields']) != len(args_dict['names']):
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1108 parser.error('You must specify exactly one new name (-k) per field (-f)')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1109 elif args.command == 'update' and len(args_dict['values']) != len(args_dict['updates']):
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1110 parser.error('You must specify exactly one value (-u) per replacement (-t)')
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1111
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1112 # Call parser function for each database file
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1113 del args_dict['command']
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1114 del args_dict['func']
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1115 del args_dict['db_files']
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1116 for f in args.__dict__['db_files']:
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1117 args_dict['db_file'] = f
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1118 args.func(**args_dict)
183edf446dcf Uploaded
davidvanzessen
parents:
diff changeset
1119