Mercurial > repos > galaxyp > percolator
diff metafiles2pin.py @ 0:3a49065a05d6 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
author | galaxyp |
---|---|
date | Wed, 07 Dec 2016 16:43:51 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/metafiles2pin.py Wed Dec 07 16:43:51 2016 -0500 @@ -0,0 +1,54 @@ +import argparse +import os +import re +from collections import OrderedDict + + +def get_filename_index_with_identifier(spectrafiles, pool_id): + pool_indices = [] + for index, fn in enumerate(spectrafiles): + if re.search(pool_id, fn) is not None: + pool_indices.append(index) + return pool_indices + + +def get_perco_batches_from_spectrafiles(spectrafiles, batchsize, ppool_ids): + """For an amount of input spectra files, pool identifiers and a batch size, + return batches of files that can be percolated together""" + if ppool_ids: + filegroups = OrderedDict([(p_id, get_filename_index_with_identifier( + spectrafiles, p_id)) for p_id in ppool_ids]) + else: + filegroups = {1: range(len(spectrafiles))} + batch = [] + for grouped_indices in filegroups.values(): + for index in grouped_indices: + batch.append(index) + if len(batch) == int(batchsize): + yield batch + batch = [] + if len(batch) > 0: + yield batch + batch = [] + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--batchsize', dest='batchsize') + parser.add_argument('--spectrafiles', dest='spectrafiles', nargs='+') + parser.add_argument('--searchfiles', dest='searchfiles', nargs='+') + parser.add_argument('--percolator-pool-ids', dest='percopoolids', nargs='+', default=False) + args = parser.parse_args() + outpath = os.path.join(os.getcwd(), 'metafiles') + os.makedirs(outpath) + for count, batch in enumerate(get_perco_batches_from_spectrafiles( + args.spectrafiles, args.batchsize, args.percopoolids)): + batchfiles = [args.searchfiles[index] for index in batch] + out_file = os.path.join(outpath, 'percolatorpool{}.meta2pin'.format( + str(count))) + with open(out_file, 'w') as fp: + fp.write('\n'.join(batchfiles)) + + +if __name__ == '__main__': + main()