Mercurial > repos > galaxyp > percolator
comparison nested_collection.py @ 1:86770eea5b09 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
author | galaxyp |
---|---|
date | Sat, 04 Mar 2017 20:36:03 -0500 |
parents | |
children | 7a0951d0e13e |
comparison
equal
deleted
inserted
replaced
0:3a49065a05d6 | 1:86770eea5b09 |
---|---|
1 import argparse | |
2 import os | |
3 import re | |
4 from collections import OrderedDict | |
5 | |
6 | |
7 def get_filename_index_with_identifier(realnames, pool_id): | |
8 pool_indices = [] | |
9 for index, fn in enumerate(realnames): | |
10 if re.search(pool_id, fn) is not None: | |
11 pool_indices.append(index) | |
12 return pool_indices | |
13 | |
14 | |
15 def get_batches_of_galaxyfiles(realnames, batchsize, pool_ids): | |
16 """For an amount of input files, pool identifiers and a batch size, | |
17 return batches of files for a list of lists""" | |
18 if pool_ids: | |
19 filegroups = OrderedDict([(p_id, get_filename_index_with_identifier( | |
20 realnames, p_id)) for p_id in pool_ids]) | |
21 else: | |
22 filegroups = {1: range(len(realnames))} | |
23 batch = [] | |
24 for pool_id, grouped_indices in filegroups.items(): | |
25 if pool_id == 1: | |
26 pool_id = 'pool0' | |
27 for index in grouped_indices: | |
28 batch.append(index) | |
29 if batchsize and len(batch) == int(batchsize): | |
30 yield pool_id, batch | |
31 batch = [] | |
32 if len(batch) > 0: | |
33 yield pool_id, batch | |
34 batch = [] | |
35 | |
36 | |
37 def main(): | |
38 parser = argparse.ArgumentParser() | |
39 parser.add_argument('--batchsize', dest='batchsize', default=False) | |
40 parser.add_argument('--real-names', dest='realnames', nargs='+') | |
41 parser.add_argument('--galaxy-files', dest='galaxyfiles', nargs='+') | |
42 parser.add_argument('--pool-ids', dest='poolids', nargs='+', default=False) | |
43 args = parser.parse_args() | |
44 for batchcount, (pool_id, batch) in enumerate(get_batches_of_galaxyfiles( | |
45 args.realnames, args.batchsize, args.poolids)): | |
46 for fncount, batchfile in enumerate([args.galaxyfiles[index] for index in batch]): | |
47 dsetname = '{}___batch{}_inputfn{}.mzid'.format(pool_id, batchcount, fncount) | |
48 print('producing', dsetname) | |
49 os.symlink(batchfile, dsetname) | |
50 | |
51 if __name__ == '__main__': | |
52 main() |