Mercurial > repos > galaxyp > percolator
annotate nested_collection.py @ 3:abed51712ed0 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b3873302e7bb7917a43b455875208e6e9fcf8f66
author | galaxyp |
---|---|
date | Sat, 08 Apr 2017 08:23:12 -0400 |
parents | 7a0951d0e13e |
children | 154147805a33 |
rev | line source |
---|---|
1
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
1 import argparse |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
2 import os |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
3 import re |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
4 from collections import OrderedDict |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
5 |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
6 |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
7 def get_filename_index_with_identifier(realnames, pool_id): |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
8 pool_indices = [] |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
9 for index, fn in enumerate(realnames): |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
10 if re.search(pool_id, fn) is not None: |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
11 pool_indices.append(index) |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
12 return pool_indices |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
13 |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
14 |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
15 def get_batches_of_galaxyfiles(realnames, batchsize, pool_ids): |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
16 """For an amount of input files, pool identifiers and a batch size, |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
17 return batches of files for a list of lists""" |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
18 if pool_ids: |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
19 filegroups = OrderedDict([(p_id, get_filename_index_with_identifier( |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
20 realnames, p_id)) for p_id in pool_ids]) |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
21 else: |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
22 filegroups = {1: range(len(realnames))} |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
23 batch = [] |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
24 for pool_id, grouped_indices in filegroups.items(): |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
25 if pool_id == 1: |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
26 pool_id = 'pool0' |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
27 for index in grouped_indices: |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
28 batch.append(index) |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
29 if batchsize and len(batch) == int(batchsize): |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
30 yield pool_id, batch |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
31 batch = [] |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
32 if len(batch) > 0: |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
33 yield pool_id, batch |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
34 batch = [] |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
35 |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
36 |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
37 def main(): |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
38 parser = argparse.ArgumentParser() |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
39 parser.add_argument('--batchsize', dest='batchsize', default=False) |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
40 parser.add_argument('--real-names', dest='realnames', nargs='+') |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
41 parser.add_argument('--galaxy-files', dest='galaxyfiles', nargs='+') |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
42 parser.add_argument('--pool-ids', dest='poolids', nargs='+', default=False) |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
43 args = parser.parse_args() |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
44 for batchcount, (pool_id, batch) in enumerate(get_batches_of_galaxyfiles( |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
45 args.realnames, args.batchsize, args.poolids)): |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
46 for fncount, batchfile in enumerate([args.galaxyfiles[index] for index in batch]): |
3
abed51712ed0
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b3873302e7bb7917a43b455875208e6e9fcf8f66
galaxyp
parents:
2
diff
changeset
|
47 dsetname = '{}_batch{}___inputfn{}.data'.format(pool_id, batchcount, fncount) |
1
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
48 print('producing', dsetname) |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
49 os.symlink(batchfile, dsetname) |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
50 |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
51 if __name__ == '__main__': |
86770eea5b09
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
galaxyp
parents:
diff
changeset
|
52 main() |