# HG changeset patch # User immport-devteam # Date 1488217808 18000 # Node ID 21b2dc3934ede19811d8f9dd18e544c9e5cf17a5 Uploaded diff -r 000000000000 -r 21b2dc3934ed extract_pop/extractpop.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_pop/extractpop.py Mon Feb 27 12:50:08 2017 -0500 @@ -0,0 +1,85 @@ +#!/usr/bin/env python + +###################################################################### +# Copyright (c) 2016 Northrop Grumman. +# All rights reserved. +###################################################################### + +from __future__ import print_function +import sys +import pandas as pd + +from argparse import ArgumentParser + + +def is_int(s): + try: + int(s) + return True + except ValueError: + return False + + +def extract_pop(in_file, pop_list, out_file): + df = pd.read_table(in_file, dtype={'Population': object}) + dfout = df.loc[df['Population'].isin(pop_list)] + dfout.to_csv(out_file, sep="\t", index=False) + return + + +def remove_pop(in_file, pop_list, out_file): + df = pd.read_table(in_file, dtype={'Population': object}) + dfout = df.loc[~df['Population'].isin(pop_list)] + dfout.to_csv(out_file, sep="\t", index=False) + return + + +if __name__ == "__main__": + parser = ArgumentParser( + prog="ExtractPop", + description="Extract events associated to given population numbers.") + + parser.add_argument( + '-i', + dest="input_file", + required=True, + help="File location for the text file.") + + parser.add_argument( + '-p', + dest="pops", + required=True, + help="List of populations to extract.") + + parser.add_argument( + '-o', + dest="output_file", + required=True, + help="Name of the output file.") + + parser.add_argument( + '-m', + dest="method", + required=True, + help="What to do with the populations.") + + args = parser.parse_args() + + # check populations + default_values = ["i.e.:2,3,11,25", "default", "Default"] + populations = [] + if args.pops: + if args.pops not in default_values: + tmp_pops = args.pops.split(",") + for popn in tmp_pops: + populations.append(popn.strip()) + else: + sys.exit(2) + for pops in populations: + if not is_int(pops): + sys.exit(3) + if args.method == "selected": + extract_pop(args.input_file, populations, args.output_file) + else: + remove_pop(args.input_file, populations, args.output_file) + sys.exit(0) diff -r 000000000000 -r 21b2dc3934ed extract_pop/extractpop.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_pop/extractpop.xml Mon Feb 27 12:50:08 2017 -0500 @@ -0,0 +1,90 @@ + + of interest from FLOCK or Cross Sample output. + + pandas + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 21b2dc3934ed extract_pop/test-data/input.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_pop/test-data/input.txt Mon Feb 27 12:50:08 2017 -0500 @@ -0,0 +1,10 @@ +Forward Scatter Side Scatter FITC CD4 PE CCR3 PP CD8 APC CCR4 Population +449 157 551 129 169 292 1 +894 1023 199 277 320 227 4 +262 73 437 69 0 146 1 +340 115 509 268 0 74 2 +316 76 50 0 60 129 5 +394 144 83 138 335 194 3 +383 139 499 0 0 224 6 +800 1023 239 284 288 280 2 +388 97 534 111 83 177 4 diff -r 000000000000 -r 21b2dc3934ed extract_pop/test-data/output.flowtext --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_pop/test-data/output.flowtext Mon Feb 27 12:50:08 2017 -0500 @@ -0,0 +1,5 @@ +Forward Scatter Side Scatter FITC CD4 PE CCR3 PP CD8 APC CCR4 Population +894 1023 199 277 320 227 4 +340 115 509 268 0 74 2 +800 1023 239 284 288 280 2 +388 97 534 111 83 177 4