Mercurial > repos > galaxyp > percolator
comparison nested_collection.py @ 4:154147805a33 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
author | galaxyp |
---|---|
date | Fri, 28 Apr 2017 12:25:36 -0400 |
parents | abed51712ed0 |
children | 07107a686ce9 |
comparison
equal
deleted
inserted
replaced
3:abed51712ed0 | 4:154147805a33 |
---|---|
18 if pool_ids: | 18 if pool_ids: |
19 filegroups = OrderedDict([(p_id, get_filename_index_with_identifier( | 19 filegroups = OrderedDict([(p_id, get_filename_index_with_identifier( |
20 realnames, p_id)) for p_id in pool_ids]) | 20 realnames, p_id)) for p_id in pool_ids]) |
21 else: | 21 else: |
22 filegroups = {1: range(len(realnames))} | 22 filegroups = {1: range(len(realnames))} |
23 batch = [] | 23 batch, in_pool_indices = [], [] |
24 for pool_id, grouped_indices in filegroups.items(): | 24 for pool_id, grouped_indices in filegroups.items(): |
25 if pool_id == 1: | 25 if pool_id == 1: |
26 pool_id = 'pool0' | 26 pool_id = 'pool0' |
27 for index in grouped_indices: | 27 for in_pool_index, total_index in enumerate(grouped_indices): |
28 batch.append(index) | 28 batch.append(total_index) |
29 in_pool_indices.append(in_pool_index) | |
29 if batchsize and len(batch) == int(batchsize): | 30 if batchsize and len(batch) == int(batchsize): |
30 yield pool_id, batch | 31 yield pool_id, batch, in_pool_indices |
31 batch = [] | 32 batch, in_pool_indices = [], [] |
32 if len(batch) > 0: | 33 if len(batch) > 0: |
33 yield pool_id, batch | 34 yield pool_id, batch, in_pool_indices |
34 batch = [] | 35 batch, in_pool_indices = [], [] |
35 | 36 |
36 | 37 |
37 def main(): | 38 def main(): |
38 parser = argparse.ArgumentParser() | 39 parser = argparse.ArgumentParser() |
39 parser.add_argument('--batchsize', dest='batchsize', default=False) | 40 parser.add_argument('--batchsize', dest='batchsize', default=False) |
40 parser.add_argument('--real-names', dest='realnames', nargs='+') | 41 parser.add_argument('--real-names', dest='realnames', nargs='+') |
41 parser.add_argument('--galaxy-files', dest='galaxyfiles', nargs='+') | 42 parser.add_argument('--galaxy-files', dest='galaxyfiles', nargs='+') |
42 parser.add_argument('--pool-ids', dest='poolids', nargs='+', default=False) | 43 parser.add_argument('--pool-ids', dest='poolids', nargs='+', default=False) |
43 args = parser.parse_args() | 44 args = parser.parse_args() |
44 for batchcount, (pool_id, batch) in enumerate(get_batches_of_galaxyfiles( | 45 batches = [x for x in get_batches_of_galaxyfiles(args.realnames, args.batchsize, args.poolids)] |
45 args.realnames, args.batchsize, args.poolids)): | 46 batchdigits = len(str(len(batches))) |
46 for fncount, batchfile in enumerate([args.galaxyfiles[index] for index in batch]): | 47 if args.poolids: |
47 dsetname = '{}_batch{}___inputfn{}.data'.format(pool_id, batchcount, fncount) | 48 pooldigits = {pid: [] for pid in args.poolids} |
49 for batchdata in batches: | |
50 pooldigits[batchdata[0]].append(len(batchdata[1])) | |
51 pooldigits = {pid: len(str(sum(batchlengths))) for pid, batchlengths in pooldigits.items()} | |
52 else: | |
53 pooldigits = {'pool0': len(str(len(args.galaxyfiles)))} | |
54 for batchcount, (pool_id, batch, in_pool_indices) in enumerate(batches): | |
55 for fnindex, in_pool_index in zip(batch, in_pool_indices): | |
56 dsetname = '{pid}_batch{bi:0{bd}d}___inputfn{fi:0{pd}d}_{real}.data'.format(pid=pool_id, bi=batchcount, bd=batchdigits, fi=in_pool_index, pd=pooldigits[pool_id], real=args.realnames[fnindex]) | |
48 print('producing', dsetname) | 57 print('producing', dsetname) |
49 os.symlink(batchfile, dsetname) | 58 os.symlink(args.galaxyfiles[fnindex], dsetname) |
50 | 59 |
51 if __name__ == '__main__': | 60 if __name__ == '__main__': |
52 main() | 61 main() |