annotate add_fst_column.py @ 25:cba0d7a63b82

workaround for gd_genotype datatype admix shift int -> float
author Richard Burhans <burhans@bx.psu.edu>
date Wed, 29 May 2013 13:49:19 -0400
parents 248b06e86022
children 8997f2ca8c7a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
1 #!/usr/bin/env python
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
2
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
3 # <command interpreter="python">
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
4 # add_fst_column.py "$input" "$p1_input" "$p2_input" "$data_source.choice" "$data_source.min_value" "$retain" "$discard_fixed" "$biased" "$output"
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
5 # #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
6 # #set $arg = '%s:%s' % ($individual_col, $individual)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
7 # "$arg"
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
8 # #end for
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
9 # </command>
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
10
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
11 import sys
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
12 import subprocess
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
13 from Population import Population
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
14
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
15 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
16
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
17 if len(sys.argv) < 12:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
18 print >> sys.stderr, "Usage"
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
19 sys.exit(1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
20
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
21 input, p1_input, p2_input, input_type, genotypes, min_reads, min_qual, retain, discard_fixed, biased, output = sys.argv[1:12]
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
22 individual_metadata = sys.argv[12:]
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
23
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
24 p_total = Population()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
25 p_total.from_tag_list(individual_metadata)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
26
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
27 p1 = Population()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
28 p1.from_population_file(p1_input)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
29 if not p_total.is_superset(p1):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
30 print >> sys.stderr, 'There is an individual in population 1 that is not in the SNP table'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
31 sys.exit(1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
32
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
33 p2 = Population()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
34 p2.from_population_file(p2_input)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
35 if not p_total.is_superset(p2):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
36 print >> sys.stderr, 'There is an individual in population 2 that is not in the SNP table'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
37 sys.exit(1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
38
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
39 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
40
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
41 prog = 'Fst_column'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
42
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
43 args = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
44 args.append(prog)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
45 args.append(input)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
46 args.append(genotypes)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
47 args.append(min_reads)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
48 args.append(min_qual)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
49 args.append(retain)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
50 args.append(discard_fixed)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
51 args.append(biased)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
52
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
53 columns = p1.column_list()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
54 for column in columns:
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
55 if input_type == 'gd_genotype':
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
56 column = int(column) - 2
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
57 args.append('{0}:1'.format(column))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
58
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
59 columns = p2.column_list()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
60 for column in columns:
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
61 if input_type == 'gd_genotype':
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
62 column = int(column) - 2
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
63 args.append('{0}:2'.format(column))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
64
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
65 fh = open(output, 'w')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
66
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
67 #print "args:", ' '.join(args)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
68 p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
69 rc = p.wait()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
70 fh.close()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
71
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
72 sys.exit(0)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
73