Mercurial > repos > miller-lab > genome_diversity
annotate specify.xml @ 36:51cd0307fb70
Phylip's extra ouputs are now stored in the job working directory
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Wed, 20 Nov 2013 16:32:01 -0500 |
parents | 8997f2ca8c7a |
children |
rev | line source |
---|---|
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
13
diff
changeset
|
1 <tool id="gd_specify" name="Specify Individuals" version="1.1.0"> |
13 | 2 <description>: Define a collection of individuals from a gd_snp dataset</description> |
3 | |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
13
diff
changeset
|
4 <command interpreter="python"> |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
5 #import json |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
6 #import base64 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
7 #import zlib |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
8 #set $ind_names = $input.dataset.metadata.individual_names |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
9 #set $ind_colms = $input.dataset.metadata.individual_columns |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
10 #set $ind_dict = dict(zip($ind_names, $ind_colms)) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
11 #set $ind_json = json.dumps($ind_dict, separators=(',',':')) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
12 #set $ind_comp = zlib.compress($ind_json, 9) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
13 #set $ind_arg = base64.b64encode($ind_comp) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
14 #set $cb_string = str($individuals).strip() |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
15 #if $cb_string != 'None' |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
16 #set $cb_dict = dict.fromkeys($cb_string.split('\t')) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
17 #for $cb_name in $cb_dict: |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
18 #set $cb_idx = $input.dataset.metadata.individual_names.index($cb_name) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
19 #set $cb_dict[$cb_name] = str($input.dataset.metadata.individual_columns[$cb_idx]) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
20 #end for |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
21 #else |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
22 #set $cb_dict = dict() |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
13
diff
changeset
|
23 #end if |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
24 #set $cb_json = json.dumps($cb_dict, separators=(',',':')) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
25 #set $cb_comp = zlib.compress($cb_json, 9) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
26 #set $cb_arg = base64.b64encode($cb_comp) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
27 #set $str_string = str($string).strip() |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
28 #set $str_comp = zlib.compress($str_string, 9) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
29 #set $str_arg = base64.b64encode($str_comp) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
30 specify.py '$input' '$output' '$ind_arg' '$cb_arg' '$str_arg' |
13 | 31 </command> |
32 | |
33 <inputs> | |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
13
diff
changeset
|
34 <param name="input" type="data" format="gd_snp,gd_genotype" label="SNP or Genotype dataset"/> |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
35 <param name="individuals" type="select" display="checkboxes" multiple="true" separator="	" label="Individuals to include"> |
13 | 36 <options> |
37 <filter type="data_meta" ref="input" key="individual_names" /> | |
38 </options> | |
39 </param> | |
40 <param name="outname" type="text" size="20" label="Label for this collection"> | |
41 <validator type="empty_field" message="You must enter a label."/> | |
42 #used to be "Individuals from ${input.hid}" | |
43 </param> | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
44 <param name="string" type="text" area="true" size="5x40" label="Individuals to include"> |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
13
diff
changeset
|
45 <sanitizer> |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
13
diff
changeset
|
46 <valid initial="string.printable"/> |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
13
diff
changeset
|
47 </sanitizer> |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
13
diff
changeset
|
48 </param> |
13 | 49 </inputs> |
50 | |
51 <outputs> | |
52 <data name="output" format="gd_indivs" label="${outname}" /> | |
53 </outputs> | |
54 | |
55 <tests> | |
56 <test> | |
57 <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" /> | |
58 <param name="individuals" value="PB1,PB2" /> | |
59 <output name="output" file="test_in/a.gd_indivs" /> | |
60 </test> | |
61 </tests> | |
62 | |
63 <help> | |
64 | |
65 **Dataset formats** | |
66 | |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
13
diff
changeset
|
67 The input dataset is in gd_snp_ or gd_genotype_ format; |
13 | 68 the output is in gd_indivs_ format. (`Dataset missing?`_) |
69 | |
70 .. _gd_snp: ./static/formatHelp.html#gd_snp | |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
13
diff
changeset
|
71 .. _gd_genotype: ./static/formatHelp.html#gd_genotype |
13 | 72 .. _gd_indivs: ./static/formatHelp.html#gd_indivs |
73 .. _Dataset missing?: ./static/formatHelp.html | |
74 | |
75 ----- | |
76 | |
77 **What it does** | |
78 | |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
13
diff
changeset
|
79 This tool makes a list of selected entities, i.e., the sets of four |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
13
diff
changeset
|
80 columns representing individuals or groups from a gd_snp dataset, or |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
13
diff
changeset
|
81 sets of single columns in a gd_genotype file. It does not copy the |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
13
diff
changeset
|
82 data; it just records which entities should be considered as belonging |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
13
diff
changeset
|
83 to some collection or population. The label you specify is used to |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
13
diff
changeset
|
84 name the output dataset in your history. This list can then be used |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
13
diff
changeset
|
85 to instruct other tools to work on just part of the original gd_snp or |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
13
diff
changeset
|
86 gd_genotype dataset. The entities can be specified with the checklist |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
13
diff
changeset
|
87 and/or by pasting their names (possibly with extraneous characters, as |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
13
diff
changeset
|
88 in a portion of the Newick-format output of the Phylogenetic Tree tool) |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
13
diff
changeset
|
89 into the box provided at the bottom of the page. |
13 | 90 |
91 ----- | |
92 | |
93 **Example** | |
94 | |
95 - input:: | |
96 | |
97 Contig161_chr1_4641264_4641879 115 C T 73.5 chr1 4641382 C 6 0 2 45 8 0 2 51 15 0 2 72 5 0 2 42 6 0 2 45 10 0 2 57 Y 54 0.323 0 | |
98 Contig48_chr1_10150253_10151311 11 A G 94.3 chr1 10150264 A 1 0 2 30 1 0 2 30 1 0 2 30 3 0 2 36 1 0 2 30 1 0 2 30 Y 22 +99. 0 | |
99 Contig20_chr1_21313469_21313570 66 C T 54.0 chr1 21313534 C 4 0 2 39 4 0 2 39 5 0 2 42 4 0 2 39 4 0 2 39 5 0 2 42 N 1 +99. 0 | |
100 etc. | |
101 | |
102 - input metadata:: | |
103 | |
104 #{"column_names":["scaf","pos","A","B","qual","ref","rpos","rnuc", | |
105 #"1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q","4A","4B","4G","4Q","5A","5B","5G","5Q","6A","6B","6G","6Q", | |
106 #"pair","dist","prim","rflp"],"dbkey":"canFam2","individuals":[["PB1",9],["PB2",13],["PB3",17],["PB4",21],["PB6",25],["PB8",29]], | |
107 #"pos":2,"rPos":7,"ref":6,"scaffold":1,"species":"bear"} | |
108 | |
109 - output when individuals PB1, PB2, and PB3 are selected:: | |
110 | |
111 9 PB1 | |
112 13 PB2 | |
113 17 PB3 | |
114 | |
115 </help> | |
116 </tool> |