Mercurial > repos > galaxyp > percolator
annotate metafiles2pin.py @ 0:3a49065a05d6 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
author | galaxyp |
---|---|
date | Wed, 07 Dec 2016 16:43:51 -0500 |
parents | |
children |
rev | line source |
---|---|
0
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
1 import argparse |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
2 import os |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
3 import re |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
4 from collections import OrderedDict |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
5 |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
6 |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
7 def get_filename_index_with_identifier(spectrafiles, pool_id): |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
8 pool_indices = [] |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
9 for index, fn in enumerate(spectrafiles): |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
10 if re.search(pool_id, fn) is not None: |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
11 pool_indices.append(index) |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
12 return pool_indices |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
13 |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
14 |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
15 def get_perco_batches_from_spectrafiles(spectrafiles, batchsize, ppool_ids): |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
16 """For an amount of input spectra files, pool identifiers and a batch size, |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
17 return batches of files that can be percolated together""" |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
18 if ppool_ids: |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
19 filegroups = OrderedDict([(p_id, get_filename_index_with_identifier( |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
20 spectrafiles, p_id)) for p_id in ppool_ids]) |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
21 else: |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
22 filegroups = {1: range(len(spectrafiles))} |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
23 batch = [] |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
24 for grouped_indices in filegroups.values(): |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
25 for index in grouped_indices: |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
26 batch.append(index) |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
27 if len(batch) == int(batchsize): |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
28 yield batch |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
29 batch = [] |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
30 if len(batch) > 0: |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
31 yield batch |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
32 batch = [] |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
33 |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
34 |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
35 def main(): |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
36 parser = argparse.ArgumentParser() |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
37 parser.add_argument('--batchsize', dest='batchsize') |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
38 parser.add_argument('--spectrafiles', dest='spectrafiles', nargs='+') |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
39 parser.add_argument('--searchfiles', dest='searchfiles', nargs='+') |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
40 parser.add_argument('--percolator-pool-ids', dest='percopoolids', nargs='+', default=False) |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
41 args = parser.parse_args() |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
42 outpath = os.path.join(os.getcwd(), 'metafiles') |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
43 os.makedirs(outpath) |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
44 for count, batch in enumerate(get_perco_batches_from_spectrafiles( |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
45 args.spectrafiles, args.batchsize, args.percopoolids)): |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
46 batchfiles = [args.searchfiles[index] for index in batch] |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
47 out_file = os.path.join(outpath, 'percolatorpool{}.meta2pin'.format( |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
48 str(count))) |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
49 with open(out_file, 'w') as fp: |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
50 fp.write('\n'.join(batchfiles)) |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
51 |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
52 |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
53 if __name__ == '__main__': |
3a49065a05d6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
54 main() |