0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 ######################################################################
|
|
4 # Copyright (c) 2016 Northrop Grumman.
|
|
5 # All rights reserved.
|
|
6 ######################################################################
|
|
7 from __future__ import print_function
|
|
8 import sys
|
|
9 import pandas as pd
|
|
10 from argparse import ArgumentParser
|
|
11
|
|
12
|
|
13 def auto_collapse(input_file, profile_file, output, report):
|
|
14 profile_pop_list = {}
|
|
15 pop_to_collapse = []
|
|
16 markers = []
|
|
17 with open(profile_file, "r") as pf:
|
|
18 pffl = pf.readline()
|
|
19 markers = pffl.strip().split("\t")
|
|
20 for pfline in pf:
|
|
21 line = pfline.strip().split("\t")
|
|
22 pop = line[0]
|
|
23 profil = "\t".join(line[1:-2])
|
|
24 if profil in profile_pop_list:
|
|
25 profile_pop_list[profil].append(pop)
|
|
26 else:
|
|
27 profile_pop_list[profil] = [pop]
|
|
28 i = 1
|
|
29 with open(report, "w") as rt:
|
|
30 rt.write("New_Population\tFormer_Populations\t")
|
|
31 rt.write("\t".join(markers[1:-2]) + "\n")
|
|
32 for profs in profile_pop_list:
|
|
33 pop_to_collapse.append(profile_pop_list[profs])
|
|
34 pop_ls = ", ".join(profile_pop_list[profs])
|
|
35 rt.write("\t".join([str(i), pop_ls, profs]) + "\n")
|
|
36 i += 1
|
|
37 df = pd.read_table(input_file, dtype={'Population': object})
|
|
38 df['new_population'] = df.Population
|
|
39 for i, sets_pop in enumerate(pop_to_collapse):
|
|
40 df.loc[df['Population'].isin(sets_pop), ['new_population']] = i + 1
|
|
41
|
|
42 df.Population = df.new_population
|
|
43 df.drop(['new_population'], inplace=True, axis=1)
|
|
44 df.to_csv(output, sep="\t", index=False)
|
|
45
|
|
46
|
|
47 if __name__ == "__main__":
|
|
48 parser = ArgumentParser(
|
|
49 prog="auto_pop_collapse_from_profile",
|
|
50 description="collapses FLOCK populations based on profile.")
|
|
51
|
|
52 parser.add_argument(
|
|
53 '-i',
|
|
54 dest="input_file",
|
|
55 required=True,
|
|
56 help="FLOCK output file")
|
|
57
|
|
58 parser.add_argument(
|
|
59 '-o',
|
|
60 dest="output",
|
|
61 required=True,
|
|
62 help="Name of the output file.")
|
|
63
|
|
64 parser.add_argument(
|
|
65 '-r',
|
|
66 dest="report",
|
|
67 required=True,
|
|
68 help="Name of the report file.")
|
|
69
|
|
70 parser.add_argument(
|
|
71 '-p',
|
|
72 dest="profile",
|
|
73 required=True,
|
|
74 help="File location for the profile.txt from FLOCK.")
|
|
75
|
|
76 args = parser.parse_args()
|
|
77
|
|
78 auto_collapse(args.input_file, args.profile, args.output, args.report)
|
|
79 sys.exit(0)
|