0
|
1 <tool id="rgfakePed1" name="Null genotypes" version="0.02">
|
|
2 <description>for testing</description>
|
|
3 <command interpreter="python">rgfakePed.py --title '$title'
|
|
4 -o '$out_file1' -p '$out_file1.files_path' -c '$ncases' -n '$ntotal'
|
|
5 -s '$nsnp' -w '$lowmaf' -v '$missingValue' -l '$outFormat'
|
|
6 -d '$mafdist' -m '$missingRate' -M '$mendelRate' </command>
|
|
7 <inputs>
|
|
8
|
|
9 <param name="title"
|
|
10 type="text" value="Fake_test_geno_data"
|
|
11 help="Name for outputs from this job"
|
|
12 label="Descriptive short name"/>
|
|
13 <param name="ntotal"
|
|
14 type="integer" value = "200"
|
|
15 help="N total: total number of subjects"
|
|
16 label="Create this total N subjects"/>
|
|
17 <param name="ncases" type="integer"
|
|
18 value="100"
|
|
19 help = "N cases: Independent subjects with status set to 2. Set 0 for family data (NSubj/3 trios)"
|
|
20 label="Total N Cases (0=generate family data - trios)"/>
|
|
21 <param name="nsnp"
|
|
22 type="integer" value="1000"
|
|
23 help="nsnp: total number of markers"
|
|
24 label="Total N SNP"/>
|
|
25 <param name="lowmaf" type="float"
|
|
26 value="0.01"
|
|
27 help = "Lower limit for MAF distribution"
|
|
28 label="Lower MAF limit (default=1%)"/>
|
|
29 <param name="mafdist"
|
|
30 type="select"
|
|
31 help="Choose a MAF distribution"
|
|
32 label="SNP Minor Allele Frequency distribution">
|
|
33 <option value="U" selected="true">Uniform</option>
|
|
34 <option value="T">Triangular (more low frequency SNPs)</option>
|
|
35 </param>
|
|
36 <param name="outFormat"
|
|
37 type="select"
|
|
38 help="Choose an output format"
|
|
39 label="Output format file type - linkage ped or fbat ped">
|
|
40 <option value="L" selected="true">Linkage format - separate .map file</option>
|
|
41 <option value="F">fbat style - marker names in a header row</option>
|
|
42 </param>
|
|
43 <param name="missingRate" type="float"
|
|
44 value="0.05"
|
|
45 help = "Fraction of genotypes to be randomly set missing"
|
|
46 label="Missing genotype call fraction"/>
|
|
47 <param name="mendelRate"
|
|
48 type="float" value = "0.05"
|
|
49 help="(family data) Fraction of apparently non-Mendelian transmission patterns"
|
|
50 label="Mendel error transmission rate"/>
|
|
51
|
|
52 <param name="missingValue" type="text" size="1"
|
|
53 value='0'
|
|
54 help = "Missing allele value"
|
|
55 label="Missing value for an allele for the output ped file"/>
|
|
56
|
|
57 </inputs>
|
|
58
|
|
59 <outputs>
|
|
60 <data format="lped" name="out_file1" label="${title}.lped"/>
|
|
61 </outputs>
|
|
62 <tests>
|
|
63 <test>
|
|
64 <param name='title' value='rgfakePedtest1' />
|
|
65 <param name="ntotal" value="40" />
|
|
66 <param name="ncases" value="20" />
|
|
67 <param name="nsnp" value="10" />
|
|
68 <param name="lowmaf" value="0" />
|
|
69 <param name="mafdist" value="T" />
|
|
70 <param name="outFormat" value="L" />
|
|
71 <param name="missingRate" value="0" />
|
|
72 <param name="mendelRate" value="0" />
|
|
73 <param name="missingValue" value="0" />
|
|
74 <output name='out_file1' file='rgtestouts/rgfakePed/rgfakePedtest1.lped' ftype='lped' compare="diff" lines_diff='5'>
|
|
75 <extra_files type="file" name='RgeneticsData.ped' value="rgtestouts/rgfakePed/rgfakePedtest1.ped" compare="diff" lines_diff='80'/>
|
|
76 <extra_files type="file" name='RgeneticsData.map' value="rgtestouts/rgfakePed/rgfakePedtest1.map" compare="diff" />
|
|
77 </output>
|
|
78 </test>
|
|
79 </tests>
|
|
80 <help>
|
|
81 .. class:: infomark
|
|
82
|
|
83 This tool allows you to generate an arbitrary (sort of)
|
|
84 synthetic genotype file (no attempt at LD - the markers are independent)
|
|
85 with optional missingness, Mendel errors, minor allele frequency settings, family structure
|
|
86 These might be used for testing under
|
|
87 the null hypothesis of no association and are certainly useful for
|
|
88 scale testing.
|
|
89
|
|
90 Note that although it runs reasonably fast given it's a script, generating a large data set takes
|
|
91 a while. An hour or so should get you a reasonable (3GB) sized simulated null data set..
|
|
92
|
|
93 A better simulator can easily be swapped in with this tool interface.
|
|
94
|
|
95 -----
|
|
96
|
|
97 .. class:: warningmark
|
|
98
|
|
99 This tool is very experimental
|
|
100
|
|
101 .. class:: infomark
|
|
102
|
|
103 **Attribution and Licensing**
|
|
104
|
|
105 Designed and written for the Rgenetics Galaxy tools
|
|
106 copyright Ross Lazarus 2007 (ross.lazarus@gmail.com)
|
|
107 Licensed under the terms of the _LGPL
|
|
108
|
|
109 .. _LGPL: http://www.gnu.org/copyleft/lesser.html
|
|
110
|
|
111 </help>
|
|
112 </tool>
|