annotate make_gd_file.xml @ 28:184d14e4270d

Update to Miller Lab devshed revision 4ede22dd5500
author Richard Burhans <burhans@bx.psu.edu>
date Wed, 17 Jul 2013 12:46:46 -0400
parents 8997f2ca8c7a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
1 <tool id="gd_make_gd_file" name="Make File" version="1.0.0">
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
2 <description>: Build a gd_snp or gd_genotype file</description>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
3
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
4 <command interpreter="python">
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
5 #import base64
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
6 #set $preamble_arg = base64.b64encode(str($preamble_names))
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
7 #set $species_arg = base64.b64encode(str($species))
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
8 make_gd_file.py '$input' '$scaffold_col' '$pos_col' '$ref_col' '$rPos_col' '$preamble_arg' '$names' '$species_arg' '$dbkey' '$output_type' '$output'
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
9 </command>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
10
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
11 <inputs>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
12 <param name="input" type="data" format="tabular" label="Input dataset" />
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
13 <param name="scaffold_col" type="data_column" data_ref="input" label="Column with scaffold/contig" />
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
14 <param name="pos_col" type="data_column" numerical="true" data_ref="input" label="Column with position" />
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
15 <param name="ref_col" type="data_column" data_ref="input" label="Column with reference species chromosome" />
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
16 <param name="rPos_col" type="data_column" numerical="true" data_ref="input" label="Column with reference species position" />
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
17
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
18 <param name="preamble_names" type="text" area="true" size="5x40" label="Preamble column names">
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
19 <sanitizer>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
20 <valid initial="string.printable"/>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
21 </sanitizer>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
22 </param>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
23 <param name="names" type="data" format="txt" label="Names dataset" />
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
24
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
25 <param name="species" type="text" label="Focus species">
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
26 <sanitizer>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
27 <valid initial="string.printable"/>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
28 </sanitizer>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
29 </param>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
30 <param name="dbkey" type="genomebuild" label="Reference species" />
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
31
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
32 <param name="output_type" type="select" label="Output format">
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
33 <option value="gd_snp" selected="true">gd_snp</option>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
34 <option value="gd_genotype">gd_genotype</option>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
35 </param>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
36 </inputs>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
37
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
38 <outputs>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
39 <data name="output" format="gd_snp">
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
40 <change_format>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
41 <when input="output_type" value="gd_genotype" format="gd_genotype" />
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
42 </change_format>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
43 </data>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
44 </outputs>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
45
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
46 <!--
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
47 <tests>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
48 <test>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
49 <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
50 <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" />
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
51 <param name="lo_coverage" value="0" />
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
52 <param name="hi_coverage" value="1000" />
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
53 <param name="low_ind_cov" value="3" />
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
54 <param name="lo_quality" value="30" />
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
55 <output name="output" file="test_out/modify_snp_table/modify.gd_snp" />
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
56 </test>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
57 </tests>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
58 -->
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
59
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
60 <help>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
61 **Dataset formats**
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
62
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
63 The input datasets are in tabular_ and text_ formats.
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
64 The output dataset is in gd_snp_ or gd_genotype_ format. (`Dataset missing?`_)
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
65
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
66 .. _tabular: ./static/formatHelp.html#tab
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
67 .. _text: ./static/formatHelp.html#text
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
68 .. _gd_snp: ./static/formatHelp.html#gd_snp
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
69 .. _gd_genotype: ./static/formatHelp.html#gd_genotype
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
70 .. _Dataset missing?: ./static/formatHelp.html
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
71
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
72 -----
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
73
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
74 **What it does**
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
75
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
76 This tool simplifies the job of creating a Galaxy file with format gd_snp
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
77 or gd_genotype. Often, the most complex part of preparing one of these
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
78 files is to specify how individuals are related to columns of the table,
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
79 a task facilitated by this command. Each gd_snp or gd_genotype file
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
80 typically has columnns giving:
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
81
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
82 1. scaffold/contig name
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
83 2. zero-based position in the scaffold/contig
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
84 3. reference species chromosome
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
85 4. zero-based position in the reference species chromosome
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
86
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
87 The user needs to specify the columns containing these data. Columns are
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
88 numbered starting with 1. The user also specifies brief column names for
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
89 these columns. When the focus species and the reference species are the
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
90 same, the scaffold/contig name and reference species chromosome columns
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
91 will be identical, as will the position in the scaffold/contig and
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
92 position in the reference species chromosome columns.
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
93
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
94 To inform Galaxy about the correpondence between individuals and columns
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
95 of the table, the user directs the tool to a history item that lists
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
96 the individuals in order. Each line starts with unique name for the
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
97 individuals (no embedded space or tab character), followed by an arbitrary
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
98 (possibly empty) set of words that are helpful for specifying groups
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
99 of individuals.
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
100 </help>
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
diff changeset
101 </tool>