Mercurial > repos > galaxyp > percolator
comparison metafiles2pin.py @ 0:3a49065a05d6 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
author | galaxyp |
---|---|
date | Wed, 07 Dec 2016 16:43:51 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:3a49065a05d6 |
---|---|
1 import argparse | |
2 import os | |
3 import re | |
4 from collections import OrderedDict | |
5 | |
6 | |
7 def get_filename_index_with_identifier(spectrafiles, pool_id): | |
8 pool_indices = [] | |
9 for index, fn in enumerate(spectrafiles): | |
10 if re.search(pool_id, fn) is not None: | |
11 pool_indices.append(index) | |
12 return pool_indices | |
13 | |
14 | |
15 def get_perco_batches_from_spectrafiles(spectrafiles, batchsize, ppool_ids): | |
16 """For an amount of input spectra files, pool identifiers and a batch size, | |
17 return batches of files that can be percolated together""" | |
18 if ppool_ids: | |
19 filegroups = OrderedDict([(p_id, get_filename_index_with_identifier( | |
20 spectrafiles, p_id)) for p_id in ppool_ids]) | |
21 else: | |
22 filegroups = {1: range(len(spectrafiles))} | |
23 batch = [] | |
24 for grouped_indices in filegroups.values(): | |
25 for index in grouped_indices: | |
26 batch.append(index) | |
27 if len(batch) == int(batchsize): | |
28 yield batch | |
29 batch = [] | |
30 if len(batch) > 0: | |
31 yield batch | |
32 batch = [] | |
33 | |
34 | |
35 def main(): | |
36 parser = argparse.ArgumentParser() | |
37 parser.add_argument('--batchsize', dest='batchsize') | |
38 parser.add_argument('--spectrafiles', dest='spectrafiles', nargs='+') | |
39 parser.add_argument('--searchfiles', dest='searchfiles', nargs='+') | |
40 parser.add_argument('--percolator-pool-ids', dest='percopoolids', nargs='+', default=False) | |
41 args = parser.parse_args() | |
42 outpath = os.path.join(os.getcwd(), 'metafiles') | |
43 os.makedirs(outpath) | |
44 for count, batch in enumerate(get_perco_batches_from_spectrafiles( | |
45 args.spectrafiles, args.batchsize, args.percopoolids)): | |
46 batchfiles = [args.searchfiles[index] for index in batch] | |
47 out_file = os.path.join(outpath, 'percolatorpool{}.meta2pin'.format( | |
48 str(count))) | |
49 with open(out_file, 'w') as fp: | |
50 fp.write('\n'.join(batchfiles)) | |
51 | |
52 | |
53 if __name__ == '__main__': | |
54 main() |