Mercurial > repos > galaxyp > percolator
comparison metafiles2pin.py @ 0:3a49065a05d6 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
| author | galaxyp |
|---|---|
| date | Wed, 07 Dec 2016 16:43:51 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:3a49065a05d6 |
|---|---|
| 1 import argparse | |
| 2 import os | |
| 3 import re | |
| 4 from collections import OrderedDict | |
| 5 | |
| 6 | |
| 7 def get_filename_index_with_identifier(spectrafiles, pool_id): | |
| 8 pool_indices = [] | |
| 9 for index, fn in enumerate(spectrafiles): | |
| 10 if re.search(pool_id, fn) is not None: | |
| 11 pool_indices.append(index) | |
| 12 return pool_indices | |
| 13 | |
| 14 | |
| 15 def get_perco_batches_from_spectrafiles(spectrafiles, batchsize, ppool_ids): | |
| 16 """For an amount of input spectra files, pool identifiers and a batch size, | |
| 17 return batches of files that can be percolated together""" | |
| 18 if ppool_ids: | |
| 19 filegroups = OrderedDict([(p_id, get_filename_index_with_identifier( | |
| 20 spectrafiles, p_id)) for p_id in ppool_ids]) | |
| 21 else: | |
| 22 filegroups = {1: range(len(spectrafiles))} | |
| 23 batch = [] | |
| 24 for grouped_indices in filegroups.values(): | |
| 25 for index in grouped_indices: | |
| 26 batch.append(index) | |
| 27 if len(batch) == int(batchsize): | |
| 28 yield batch | |
| 29 batch = [] | |
| 30 if len(batch) > 0: | |
| 31 yield batch | |
| 32 batch = [] | |
| 33 | |
| 34 | |
| 35 def main(): | |
| 36 parser = argparse.ArgumentParser() | |
| 37 parser.add_argument('--batchsize', dest='batchsize') | |
| 38 parser.add_argument('--spectrafiles', dest='spectrafiles', nargs='+') | |
| 39 parser.add_argument('--searchfiles', dest='searchfiles', nargs='+') | |
| 40 parser.add_argument('--percolator-pool-ids', dest='percopoolids', nargs='+', default=False) | |
| 41 args = parser.parse_args() | |
| 42 outpath = os.path.join(os.getcwd(), 'metafiles') | |
| 43 os.makedirs(outpath) | |
| 44 for count, batch in enumerate(get_perco_batches_from_spectrafiles( | |
| 45 args.spectrafiles, args.batchsize, args.percopoolids)): | |
| 46 batchfiles = [args.searchfiles[index] for index in batch] | |
| 47 out_file = os.path.join(outpath, 'percolatorpool{}.meta2pin'.format( | |
| 48 str(count))) | |
| 49 with open(out_file, 'w') as fp: | |
| 50 fp.write('\n'.join(batchfiles)) | |
| 51 | |
| 52 | |
| 53 if __name__ == '__main__': | |
| 54 main() |
