Mercurial > repos > miller-lab > genome_diversity
annotate make_gd_file.xml @ 28:184d14e4270d
Update to Miller Lab devshed revision 4ede22dd5500
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Wed, 17 Jul 2013 12:46:46 -0400 |
parents | 8997f2ca8c7a |
children |
rev | line source |
---|---|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
1 <tool id="gd_make_gd_file" name="Make File" version="1.0.0"> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
2 <description>: Build a gd_snp or gd_genotype file</description> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
3 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
4 <command interpreter="python"> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
5 #import base64 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
6 #set $preamble_arg = base64.b64encode(str($preamble_names)) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
7 #set $species_arg = base64.b64encode(str($species)) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
8 make_gd_file.py '$input' '$scaffold_col' '$pos_col' '$ref_col' '$rPos_col' '$preamble_arg' '$names' '$species_arg' '$dbkey' '$output_type' '$output' |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
9 </command> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
10 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
11 <inputs> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
12 <param name="input" type="data" format="tabular" label="Input dataset" /> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
13 <param name="scaffold_col" type="data_column" data_ref="input" label="Column with scaffold/contig" /> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
14 <param name="pos_col" type="data_column" numerical="true" data_ref="input" label="Column with position" /> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
15 <param name="ref_col" type="data_column" data_ref="input" label="Column with reference species chromosome" /> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
16 <param name="rPos_col" type="data_column" numerical="true" data_ref="input" label="Column with reference species position" /> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
17 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
18 <param name="preamble_names" type="text" area="true" size="5x40" label="Preamble column names"> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
19 <sanitizer> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
20 <valid initial="string.printable"/> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
21 </sanitizer> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
22 </param> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
23 <param name="names" type="data" format="txt" label="Names dataset" /> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
24 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
25 <param name="species" type="text" label="Focus species"> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
26 <sanitizer> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
27 <valid initial="string.printable"/> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
28 </sanitizer> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
29 </param> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
30 <param name="dbkey" type="genomebuild" label="Reference species" /> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
31 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
32 <param name="output_type" type="select" label="Output format"> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
33 <option value="gd_snp" selected="true">gd_snp</option> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
34 <option value="gd_genotype">gd_genotype</option> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
35 </param> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
36 </inputs> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
37 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
38 <outputs> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
39 <data name="output" format="gd_snp"> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
40 <change_format> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
41 <when input="output_type" value="gd_genotype" format="gd_genotype" /> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
42 </change_format> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
43 </data> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
44 </outputs> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
45 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
46 <!-- |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
47 <tests> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
48 <test> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
49 <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" /> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
50 <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" /> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
51 <param name="lo_coverage" value="0" /> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
52 <param name="hi_coverage" value="1000" /> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
53 <param name="low_ind_cov" value="3" /> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
54 <param name="lo_quality" value="30" /> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
55 <output name="output" file="test_out/modify_snp_table/modify.gd_snp" /> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
56 </test> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
57 </tests> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
58 --> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
59 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
60 <help> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
61 **Dataset formats** |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
62 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
63 The input datasets are in tabular_ and text_ formats. |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
64 The output dataset is in gd_snp_ or gd_genotype_ format. (`Dataset missing?`_) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
65 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
66 .. _tabular: ./static/formatHelp.html#tab |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
67 .. _text: ./static/formatHelp.html#text |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
68 .. _gd_snp: ./static/formatHelp.html#gd_snp |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
69 .. _gd_genotype: ./static/formatHelp.html#gd_genotype |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
70 .. _Dataset missing?: ./static/formatHelp.html |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
71 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
72 ----- |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
73 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
74 **What it does** |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
75 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
76 This tool simplifies the job of creating a Galaxy file with format gd_snp |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
77 or gd_genotype. Often, the most complex part of preparing one of these |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
78 files is to specify how individuals are related to columns of the table, |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
79 a task facilitated by this command. Each gd_snp or gd_genotype file |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
80 typically has columnns giving: |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
81 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
82 1. scaffold/contig name |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
83 2. zero-based position in the scaffold/contig |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
84 3. reference species chromosome |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
85 4. zero-based position in the reference species chromosome |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
86 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
87 The user needs to specify the columns containing these data. Columns are |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
88 numbered starting with 1. The user also specifies brief column names for |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
89 these columns. When the focus species and the reference species are the |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
90 same, the scaffold/contig name and reference species chromosome columns |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
91 will be identical, as will the position in the scaffold/contig and |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
92 position in the reference species chromosome columns. |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
93 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
94 To inform Galaxy about the correpondence between individuals and columns |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
95 of the table, the user directs the tool to a history item that lists |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
96 the individuals in order. Each line starts with unique name for the |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
97 individuals (no embedded space or tab character), followed by an arbitrary |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
98 (possibly empty) set of words that are helpful for specifying groups |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
99 of individuals. |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
100 </help> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
101 </tool> |