Mercurial > repos > galaxyp > percolator
view metafiles2pin.py @ 0:3a49065a05d6 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
author | galaxyp |
---|---|
date | Wed, 07 Dec 2016 16:43:51 -0500 |
parents | |
children |
line wrap: on
line source
import argparse import os import re from collections import OrderedDict def get_filename_index_with_identifier(spectrafiles, pool_id): pool_indices = [] for index, fn in enumerate(spectrafiles): if re.search(pool_id, fn) is not None: pool_indices.append(index) return pool_indices def get_perco_batches_from_spectrafiles(spectrafiles, batchsize, ppool_ids): """For an amount of input spectra files, pool identifiers and a batch size, return batches of files that can be percolated together""" if ppool_ids: filegroups = OrderedDict([(p_id, get_filename_index_with_identifier( spectrafiles, p_id)) for p_id in ppool_ids]) else: filegroups = {1: range(len(spectrafiles))} batch = [] for grouped_indices in filegroups.values(): for index in grouped_indices: batch.append(index) if len(batch) == int(batchsize): yield batch batch = [] if len(batch) > 0: yield batch batch = [] def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchsize', dest='batchsize') parser.add_argument('--spectrafiles', dest='spectrafiles', nargs='+') parser.add_argument('--searchfiles', dest='searchfiles', nargs='+') parser.add_argument('--percolator-pool-ids', dest='percopoolids', nargs='+', default=False) args = parser.parse_args() outpath = os.path.join(os.getcwd(), 'metafiles') os.makedirs(outpath) for count, batch in enumerate(get_perco_batches_from_spectrafiles( args.spectrafiles, args.batchsize, args.percopoolids)): batchfiles = [args.searchfiles[index] for index in batch] out_file = os.path.join(outpath, 'percolatorpool{}.meta2pin'.format( str(count))) with open(out_file, 'w') as fp: fp.write('\n'.join(batchfiles)) if __name__ == '__main__': main()