Mercurial > repos > iuc > structure
comparison structure.xml @ 0:a1574aada200 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/structure commit b4d0a8f3dfee920840c77befdf626c52a5d617cb
author | iuc |
---|---|
date | Wed, 15 Nov 2017 16:31:24 -0500 |
parents | |
children | 64e681a1cad5 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a1574aada200 |
---|---|
1 <tool id="structure" name="Structure" version="2.3.4"> | |
2 <description>using multi-locus genotype data to investigate population structure</description> | |
3 <requirements> | |
4 <requirement type="package" version="2.3.4">structure</requirement> | |
5 </requirements> | |
6 <version_command><![CDATA[ | |
7 structure | grep -E -o 'Version.+' | |
8 ]]></version_command> | |
9 <command detect_errors="exit_code"><![CDATA[ | |
10 mv '$mainparams' '$out_mainparams' && | |
11 mv '$extraparams' '$out_extraparams' && | |
12 | |
13 mkdir out log | |
14 | |
15 #for $run in range(1, int($nb_run) + 1): | |
16 && structure -i '$infile' -o outfile -m '$out_mainparams' -e '$out_extraparams' > 'log/run${run}_K_${main.MAXPOPS}.log' | |
17 && mv 'outfile_f' 'out/run${run}_K_${main.MAXPOPS}.out' | |
18 #end for | |
19 | |
20 ]]></command> | |
21 <configfiles> | |
22 <configfile name="mainparams"><![CDATA[ | |
23 KEY PARAMETERS FOR THE PROGRAM structure. YOU WILL NEED TO SET THESE | |
24 IN ORDER TO RUN THE PROGRAM. VARIOUS OPTIONS CAN BE ADJUSTED IN THE | |
25 FILE extraparams. | |
26 | |
27 | |
28 "(int)" means that this takes an integer value. | |
29 "(B)" means that this variable is Boolean | |
30 (ie insert 1 for True, and 0 for False) | |
31 "(str)" means that this is a string (but not enclosed in quotes!) | |
32 | |
33 | |
34 Basic Program Parameters | |
35 | |
36 #define MAXPOPS $main.MAXPOPS // default:2 // (int) number of populations assumed | |
37 #define BURNIN $main.BURNIN // default:10000 // (int) length of burnin period | |
38 #define NUMREPS $main.NUMREPS // default:20000 // (int) number of MCMC reps after burnin | |
39 | |
40 Input/Output files | |
41 | |
42 #define INFILE $infile // (str) name of input data file | |
43 #define OUTFILE outfile //(str) name of output data file | |
44 | |
45 Data file format | |
46 | |
47 #define NUMINDS $main.NUMINDS // default:100 // (int) number of diploid individuals in data file | |
48 #define NUMLOCI $main.NUMLOCI // default:100 // (int) number of loci in data file | |
49 #define PLOIDY $main.PLOIDY // default:2 // (int) ploidy of data | |
50 #define MISSING $main.MISSING // default:-9 // (int) value given to missing genotype data | |
51 #define ONEROWPERIND $main.ONEROWPERIND // default:0 // (B) store data for individuals in a single line | |
52 | |
53 | |
54 #define LABEL $main.LABEL // default:1 // (B) Input file contains individual labels | |
55 #define POPDATA $main.POPDATA // default:1 // (B) Input file contains a population identifier | |
56 #define POPFLAG ${extra.usepopinfo_cond.POPFLAG} // default:0 // (B) Input file contains a flag which says | |
57 whether to use popinfo when USEPOPINFO==1 | |
58 #define LOCDATA $main.LOCDATA // default:0 // (B) Input file contains a location identifier | |
59 | |
60 #define PHENOTYPE $main.PHENOTYPE // default:0 // (B) Input file contains phenotype information | |
61 #define EXTRACOLS $main.EXTRACOLS // default:0 // (int) Number of additional columns of data | |
62 before the genotype data start. | |
63 | |
64 #define MARKERNAMES $main.MARKERNAMES // default:1 // (B) data file contains row of marker names | |
65 #define RECESSIVEALLELES $main.recessivealleles_cond.RECESSIVEALLELES // default:0 // (B) data file contains dominant markers (eg AFLPs) | |
66 // and a row to indicate which alleles are recessive | |
67 #define MAPDISTANCES $main.MAPDISTANCES // default:0 // (B) data file contains row of map distances | |
68 // between loci | |
69 | |
70 | |
71 Advanced data file options | |
72 | |
73 #define PHASED $main.PHASED // default:0 // (B) Data are in correct phase (relevant for linkage model only) | |
74 #define PHASEINFO $main.PHASEINFO // default:0 // (B) the data for each individual contains a line | |
75 indicating phase (linkage model) | |
76 #define MARKOVPHASE $main.MARKOVPHASE // default:0 // (B) the phase info follows a Markov model. | |
77 #define NOTAMBIGUOUS $main.recessivealleles_cond.NOTAMBIGUOUS // default:-999 // (int) for use in some analyses of polyploid data | |
78 | |
79 | |
80 | |
81 Command line options: | |
82 | |
83 -m mainparams | |
84 -e extraparams | |
85 -s stratparams | |
86 -K MAXPOPS | |
87 -L NUMLOCI | |
88 -N NUMINDS | |
89 -i input file | |
90 -o output file | |
91 -D SEED | |
92 | |
93 ]]></configfile> | |
94 <configfile name="extraparams"><![CDATA[ | |
95 EXTRA PARAMS FOR THE PROGRAM structure. THESE PARAMETERS CONTROL HOW THE | |
96 PROGRAM RUNS. ATTRIBUTES OF THE DATAFILE AS WELL AS K AND RUNLENGTH ARE | |
97 SPECIFIED IN mainparams. | |
98 | |
99 "(int)" means that this takes an integer value. | |
100 "(d)" means that this is a double (ie, a Real number such as 3.14). | |
101 "(B)" means that this variable is Boolean | |
102 (ie insert 1 for True, and 0 for False). | |
103 | |
104 PROGRAM OPTIONS | |
105 | |
106 #define NOADMIX $extra.NOADMIX // default:0 // (B) Use no admixture model (0=admixture model, 1=no-admix) | |
107 #define LINKAGE $extra.LINKAGE // default:0 // (B) Use the linkage model model | |
108 #define USEPOPINFO $extra.usepopinfo_cond.USEPOPINFO // default:0 // (B) Use prior population information to pre-assign individuals | |
109 to clusters | |
110 #define LOCPRIOR $extra.LOCPRIOR // default:0 //(B) Use location information to improve weak data | |
111 | |
112 #define FREQSCORR $extra.FREQSCORR // default:1 // (B) allele frequencies are correlated among pops | |
113 #define ONEFST $extra.ONEFST // default:0 // (B) assume same value of Fst for all subpopulations. | |
114 | |
115 #define INFERALPHA $extra.inferalpha_cond.INFERALPHA // default:1 // (B) Infer ALPHA (the admixture parameter) | |
116 #define POPALPHAS $extra.POPALPHAS // default:0 // (B) Individual alpha for each population | |
117 #define ALPHA $extra.inferalpha_cond.ALPHA // default:1.0 // (d) Dirichlet parameter for degree of admixture | |
118 (this is the initial value if INFERALPHA==1). | |
119 | |
120 #define INFERLAMBDA $extra.inferlambda_cond.INFERLAMBDA // default:0 // (B) Infer LAMBDA (the allele frequencies parameter) | |
121 #define POPSPECIFICLAMBDA $extra.inferlambda_cond.POPSPECIFICLAMBDA // default:0 //(B) infer a separate lambda for each pop | |
122 (only if INFERLAMBDA=1). | |
123 #define LAMBDA $extra.LAMBDA // default:1.0 // (d) Dirichlet parameter for allele frequencies | |
124 | |
125 | |
126 | |
127 | |
128 PRIORS | |
129 | |
130 #define FPRIORMEAN $extra.FPRIORMEAN // default:0.01 // (d) Prior mean and SD of Fst for pops. | |
131 #define FPRIORSD $extra.FPRIORSD // default:0.05 // (d) The prior is a Gamma distribution with these parameters | |
132 | |
133 #define UNIFPRIORALPHA $extra.unifprioralpha_cond.UNIFPRIORALPHA // default:1 // (B) use a uniform prior for alpha; | |
134 otherwise gamma prior | |
135 #define ALPHAMAX $extra.ALPHAMAX // default:10.0 // (d) max value of alpha if uniform prior | |
136 #define ALPHAPRIORA $extra.unifprioralpha_cond.ALPHAPRIORA // default:1.0 // (only if UNIFPRIORALPHA==0): alpha has a gamma | |
137 prior with mean A*B, and | |
138 #define ALPHAPRIORB $extra.unifprioralpha_cond.ALPHAPRIORB // default:2.0 // variance A*B^2. | |
139 | |
140 | |
141 #define LOG10RMIN $extra.LOG10RMIN // default:-4.0 //(d) Log10 of minimum allowed value of r under linkage model | |
142 #define LOG10RMAX $extra.LOG10RMAX // default:1.0 //(d) Log10 of maximum allowed value of r | |
143 #define LOG10RPROPSD $extra.LOG10RPROPSD // default:0.1 //(d) standard deviation of log r in update | |
144 #define LOG10RSTART $extra.LOG10RSTART // default:-2.0 //(d) initial value of log10 r | |
145 | |
146 | |
147 USING PRIOR POPULATION INFO (USEPOPINFO) | |
148 | |
149 #define GENSBACK $extra.GENSBACK // default:2 //(int) For use when inferring whether an indiv- | |
150 idual is an immigrant, or has an immigrant an- | |
151 cestor in the past GENSBACK generations. eg, if | |
152 GENSBACK==2, it tests for immigrant ancestry | |
153 back to grandparents. | |
154 #define MIGRPRIOR $extra.usepopinfo_cond.MIGRPRIOR // default:0.01 //(d) prior prob that an individual is a migrant | |
155 (used only when USEPOPINFO==1). This should | |
156 be small, eg 0.01 or 0.1. | |
157 #define PFROMPOPFLAGONLY $extra.PFROMPOPFLAGONLY // default:0 // (B) only use individuals with POPFLAG=1 to update P. | |
158 This is to enable use of a reference set of | |
159 individuals for clustering additional "test" | |
160 individuals. | |
161 | |
162 LOCPRIOR MODEL FOR USING LOCATION INFORMATION | |
163 | |
164 #define LOCISPOP $extra.LOCISPOP // default:1 //(B) use POPDATA for location information | |
165 #define LOCPRIORINIT $extra.LOCPRIORINIT // default:1.0 //(d) initial value for r, the location prior | |
166 #define MAXLOCPRIOR $extra.MAXLOCPRIOR // default:20.0 //(d) max allowed value for r | |
167 | |
168 | |
169 | |
170 | |
171 OUTPUT OPTIONS | |
172 | |
173 #define PRINTNET $extra.PRINTNET // default:1 // (B) Print the "net nucleotide distance" to screen during the run | |
174 #define PRINTLAMBDA $extra.PRINTLAMBDA // default:1 // (B) Print current value(s) of lambda to screen | |
175 #define PRINTQSUM $extra.PRINTQSUM // default:1 // (B) Print summary of current population membership to screen | |
176 | |
177 #define SITEBYSITE $extra.SITEBYSITE // default:0 // (B) whether or not to print site by site results. | |
178 (Linkage model only) This is a large file! | |
179 #define PRINTQHAT $extra.PRINTQHAT // default:0 // (B) Q-hat printed to a separate file. Turn this | |
180 on before using STRAT. | |
181 #define UPDATEFREQ $extra.UPDATEFREQ // default:100 // (int) frequency of printing update on the screen. | |
182 Set automatically if this is 0. | |
183 #define PRINTLIKES $extra.PRINTLIKES // default:0 // (B) print current likelihood to screen every rep | |
184 #define INTERMEDSAVE $extra.INTERMEDSAVE // default:0 // (int) number of saves to file during run | |
185 | |
186 #define ECHODATA $extra.ECHODATA // default:1 // (B) Print some of data file to screen to check | |
187 that the data entry is correct. | |
188 (NEXT 3 ARE FOR COLLECTING DISTRIBUTION OF Q:) | |
189 #define ANCESTDIST $extra.ANCESTDIST // default:0 // (B) collect data about the distribution of an- | |
190 cestry coefficients (Q) for each individual | |
191 #define NUMBOXES $extra.NUMBOXES // default:1000 // (int) the distribution of Q values is stored as | |
192 a histogram with this number of boxes. | |
193 #define ANCESTPINT $extra.ANCESTPINT // default:0.90 // (d) the size of the displayed probability | |
194 interval on Q (values between 0.0--1.0) | |
195 | |
196 | |
197 | |
198 MISCELLANEOUS | |
199 | |
200 #define COMPUTEPROB $extra.COMPUTEPROB // default:1 // (B) Estimate the probability of the Data under | |
201 the model. This is used when choosing the | |
202 best number of subpopulations. | |
203 #define ADMBURNIN $extra.ADMBURNIN // default:500 // (int) [only relevant for linkage model]: | |
204 Initial period of burnin with admixture model (see Readme) | |
205 #define ALPHAPROPSD $extra.ALPHAPROPSD // default:0.025 // (d) SD of proposal for updating alpha | |
206 #define STARTATPOPINFO $extra.STARTATPOPINFO // default:0 // Use given populations as the initial condition | |
207 for population origins. (Need POPDATA==1). It | |
208 is assumed that the PopData in the input file | |
209 are between 1 and k where k<=MAXPOPS. | |
210 #define RANDOMIZE $extra.randomize_cond.RANDOMIZE // default:1 // (B) use new random seed for each run | |
211 #define SEED $extra.randomize_cond.SEED // default:2245 // (int) seed value for random number generator | |
212 (must set RANDOMIZE=0) | |
213 #define METROFREQ $extra.METROFREQ // default:10 // (int) Frequency of using Metropolis step to update | |
214 Q under admixture model (ie use the metr. move every | |
215 i steps). If this is set to 0, it is never used. | |
216 (Proposal for each q^(i) sampled from prior. The | |
217 goal is to improve mixing for small alpha.) | |
218 #define REPORTHITRATE $extra.REPORTHITRATE // default:0 // (B) report hit rate if using METROFREQ | |
219 | |
220 ]]></configfile> | |
221 </configfiles> | |
222 <inputs> | |
223 <param name="infile" type="data" label="Genotype data" format="tabular" /> | |
224 <param name="nb_run" value="1" type="integer" label="Number of runs" min="1" max="10" help="Note that the runs are sequential. Please launch separate runs if it's too long" /> | |
225 <section name="main" title="mainparams" expanded="True"> | |
226 <!--Basic Program Parameters--> | |
227 <param argument="MAXPOPS" value="" type="integer" label="Number of populations assumed" help="or [K]"/> | |
228 <param argument="BURNIN" value="10000" type="integer" label="Length of burnin period" /> | |
229 <param argument="NUMREPS" value="20000" type="integer" label="Number of MCMC reps after burnin" /> | |
230 | |
231 <!--Data file format--> | |
232 <param argument="NUMINDS" value="" type="integer" label="Number of diploid individuals in data file" help="or [N]"/> | |
233 <param argument="NUMLOCI" value="" type="integer" label="Number of loci in data file" help="or [L]"/> | |
234 <param argument="PLOIDY" value="2" type="integer" label="Ploidy of data" /> | |
235 <param argument="MISSING" value="-9" type="integer" label="Value given to missing genotype data" /> | |
236 <param argument="ONEROWPERIND" checked="False" type="boolean" label="Store data for individuals in a single line" truevalue="1" falsevalue="0" help=" E.g., for diploid data, this would mean that the two alleles for each locus are in consecutive order in the same row, rather than being arranged in the same column, in two consecutive rows "/> | |
237 | |
238 | |
239 <param argument="LABEL" checked="true" type="boolean" label="Input file contains individual labels" truevalue="1" falsevalue="0" /> | |
240 <param argument="POPDATA" checked="true" type="boolean" label="Input file contains a user-defined population-of-origin for each individual" truevalue="1" falsevalue="0" /> | |
241 <param argument="LOCDATA" checked="false" type="boolean" label="Input file contains a location identifier" truevalue="1" falsevalue="0" /> | |
242 | |
243 <param argument="PHENOTYPE" checked="false" type="boolean" label="Input file contains phenotype information" truevalue="1" falsevalue="0" /> | |
244 <param argument="EXTRACOLS" value="0" type="integer" label="Number of additional columns of data before the genotype data start." /> | |
245 | |
246 <param argument="MARKERNAMES" checked="true" type="boolean" label="Data file contains row of marker names" truevalue="1" falsevalue="0" /> | |
247 <conditional name="recessivealleles_cond"> | |
248 <param argument="RECESSIVEALLELES" type="select" label="Data file contains dominant markers (eg AFLPs) and a row to indicate which alleles are recessive" > | |
249 <option value="0" selected="True">No</option> | |
250 <option value="1">Yes</option> | |
251 </param> | |
252 <when value="0"> | |
253 <param argument="NOTAMBIGUOUS" value="-999" type="hidden" label="Defines the code indicating that genotype data at a marker are unambiguous." help="For use with polyploids when RECESSIVEALLELES=1/True. Must not match MISSING or any allele value in the data." /> | |
254 </when> | |
255 <when value="1"> | |
256 <param argument="NOTAMBIGUOUS" value="-999" type="integer" label="Defines the code indicating that genotype data at a marker are unambiguous." help="For use with polyploids when RECESSIVEALLELES=1/True. Must not match MISSING or any allele value in the data." /> | |
257 </when> | |
258 </conditional> | |
259 <param argument="MAPDISTANCES" checked="false" type="boolean" label="Data file contains row of map distances between loci" truevalue="1" falsevalue="0" /> | |
260 | |
261 | |
262 <!--Advanced data file options--> | |
263 | |
264 <param argument="PHASED" checked="false" type="boolean" label="Data are in correct phase (relevant for linkage model only)" truevalue="1" falsevalue="0" /> | |
265 <param argument="PHASEINFO" checked="false" type="boolean" label="The data for each individual contains a line indicating phase (linkage model)" truevalue="1" falsevalue="0" /> | |
266 <param argument="MARKOVPHASE" checked="false" type="boolean" label="The phase info follows a Markov model." truevalue="1" falsevalue="0" /> | |
267 </section> | |
268 <section name="extra" title="extraparams" expanded="False"> | |
269 | |
270 <param argument="NOADMIX" checked="false" type="boolean" label="Use no admixture model" help="(0/False=admixture model, 1/True=no-admix)" truevalue="1" falsevalue="0" /> | |
271 <param argument="LINKAGE" checked="false" type="boolean" label="Use the linkage model model" truevalue="1" falsevalue="0" /> | |
272 <conditional name="usepopinfo_cond"> | |
273 <param argument="USEPOPINFO" type="select" label="Use prior population information to pre-assign individuals to clusters"> | |
274 <option value="0" selected="True">No</option> | |
275 <option value="1">Yes</option> | |
276 </param> | |
277 <when value="0"> | |
278 <param argument="POPFLAG" value="0" type="hidden" label="Input file contains a flag which says whether to use popinfo" help="[mainparams] when USEPOPINFO is 1/True" /> | |
279 <param argument="MIGRPRIOR" value="0.01" type="hidden" label="Prior prob that an individual is a migrant" help="(used only when USEPOPINFO==1/True). This should be small, eg 0.01 or 0.1." /> | |
280 </when> | |
281 <when value="1"> | |
282 <param argument="POPFLAG" checked="false" type="boolean" label="Input file contains a flag which says whether to use popinfo" help="[mainparams] when USEPOPINFO is 1/True" truevalue="1" falsevalue="0" /> | |
283 <param argument="MIGRPRIOR" value="0.01" type="float" label="Prior prob that an individual is a migrant" help="(used only when USEPOPINFO==1/True). This should be small, eg 0.01 or 0.1." /> | |
284 </when> | |
285 </conditional> | |
286 <param argument="LOCPRIOR" checked="false" type="boolean" label="Use location information to improve weak data" truevalue="1" falsevalue="0" /> | |
287 | |
288 <param argument="FREQSCORR" checked="true" type="boolean" label="Allele frequencies are correlated among pops" truevalue="1" falsevalue="0" /> | |
289 <param argument="ONEFST" checked="false" type="boolean" label="Assume same value of Fst for all subpopulations" truevalue="1" falsevalue="0" /> | |
290 | |
291 <conditional name="inferalpha_cond"> | |
292 <param argument="INFERALPHA" type="select" label="Infer ALPHA (the admixture parameter)"> | |
293 <option value="1" selected="True">Yes</option> | |
294 <option value="0">No</option> | |
295 </param> | |
296 <when value="1"> | |
297 <param argument="ALPHA" value="1.0" type="float" label="Dirichlet parameter for degree of admixture" help="this is the initial value if INFERALPHA is 1/True." /> | |
298 </when> | |
299 <when value="0"> | |
300 <param argument="ALPHA" value="1.0" type="hidden" label="Dirichlet parameter for degree of admixture" help="this is the initial value if INFERALPHA is 1/True." /> | |
301 </when> | |
302 </conditional> | |
303 <param argument="POPALPHAS" checked="false" type="boolean" label="Individual alpha for each population" truevalue="1" falsevalue="0" /> | |
304 | |
305 <conditional name="inferlambda_cond"> | |
306 <param argument="INFERLAMBDA" type="select" label="Infer LAMBDA (the allele frequencies parameter)"> | |
307 <option value="0" selected="True">No</option> | |
308 <option value="1">Yes</option> | |
309 </param> | |
310 <when value="0"> | |
311 <param argument="POPSPECIFICLAMBDA" value="0" type="hidden" label="Infer a separate lambda for each pop" help="(only if INFERLAMBDA=1/True)." /> | |
312 </when> | |
313 <when value="1"> | |
314 <param argument="POPSPECIFICLAMBDA" checked="false" type="boolean" label="Infer a separate lambda for each pop" help="(only if INFERLAMBDA=1/True)." truevalue="1" falsevalue="0" /> | |
315 </when> | |
316 </conditional> | |
317 <param argument="LAMBDA" value="1.0" type="float" label="Dirichlet parameter for allele frequencies" /> | |
318 | |
319 | |
320 <!-- PRIORS --> | |
321 | |
322 <param argument="FPRIORMEAN" value="0.01" type="float" label="The Prior (Gamma distribution) mean of Fst for pops." /> | |
323 <param argument="FPRIORSD" value="0.05" type="float" label="The Prior (Gamma distribution) Standard Deviation of Fst for pops." /> | |
324 | |
325 <conditional name="unifprioralpha_cond"> | |
326 <param argument="UNIFPRIORALPHA" type="select" label="Use a uniform prior for alpha; otherwise gamma prior"> | |
327 <option value="1" selected="True">Yes</option> | |
328 <option value="0">No</option> | |
329 </param> | |
330 <when value="1"> | |
331 <param argument="ALPHAPRIORA" value="1.0" type="hidden" label="Alpha has a gamma prior with mean A*B, and variance A*B^2." help="(only if UNIFPRIORALPHA==0/False)" /> | |
332 <param argument="ALPHAPRIORB" value="2.0" type="hidden" label="Alpha has a gamma prior with mean A*B, and variance A*B^2." help="(only if UNIFPRIORALPHA==0/False)" /> | |
333 </when> | |
334 <when value="0"> | |
335 <param argument="ALPHAPRIORA" value="1.0" type="float" label="Alpha has a gamma prior with mean A*B, and variance A*B^2." help="(only if UNIFPRIORALPHA==0/False)"/> | |
336 <param argument="ALPHAPRIORB" value="2.0" type="float" label="Alpha has a gamma prior with mean A*B, and variance A*B^2." help="(only if UNIFPRIORALPHA==0/False)"/> | |
337 </when> | |
338 </conditional> | |
339 <param argument="ALPHAMAX" value="10.0" type="float" label="Max value of alpha if uniform prior" /> | |
340 | |
341 | |
342 <param argument="LOG10RMIN" value="-4.0" type="float" label="Log10 of minimum allowed value of r under linkage model" /> | |
343 <param argument="LOG10RMAX" value="1.0" type="float" label="Log10 of maximum allowed value of r" /> | |
344 <param argument="LOG10RPROPSD" value="0.1" type="float" label="Standard deviation of log r in update" /> | |
345 <param argument="LOG10RSTART" value="-2.0" type="float" label="Initial value of log10 r" /> | |
346 | |
347 | |
348 <!-- USING PRIOR POPULATION INFO (USEPOPINFO) --> | |
349 | |
350 <param argument="GENSBACK" value="2" type="integer" label="For use when inferring whether an individual is an immigrant, or has an immigrant an cestor in the past GENSBACK generations." help="eg, if GENSBACK==2, it tests for immigrant ancestry back to grandparents." /> | |
351 <param argument="PFROMPOPFLAGONLY" checked="false" type="boolean" label="Only use individuals with POPFLAG=1 to update P." help="This is to enable use of a reference set of individuals for clustering additional 'test' individuals." truevalue="1" falsevalue="0" /> | |
352 | |
353 <!-- LOCPRIOR MODEL FOR USING LOCATION INFORMATION --> | |
354 | |
355 <param argument="LOCISPOP" checked="true" type="boolean" label="Use POPDATA for location information" truevalue="1" falsevalue="0" /> | |
356 <param argument="LOCPRIORINIT" value="1.0" type="float" label="Initial value for r, the location prior" /> | |
357 <param argument="MAXLOCPRIOR" value="20.0" type="float" label="Max allowed value for r" /> | |
358 | |
359 <!-- OUTPUT OPTIONS --> | |
360 | |
361 <param argument="PRINTNET" checked="true" type="boolean" label="Print the 'net nucleotide distance' to screen during the run" truevalue="1" falsevalue="0" /> | |
362 <param argument="PRINTLAMBDA" checked="true" type="boolean" label="Print current value(s) of lambda to screen" truevalue="1" falsevalue="0" /> | |
363 <param argument="PRINTQSUM" checked="true" type="boolean" label="Print summary of current population membership to screen" truevalue="1" falsevalue="0" /> | |
364 | |
365 <param argument="SITEBYSITE" checked="false" type="boolean" label="whether or not to print site by site results." help="(Linkage model only) This is a large file!" truevalue="1" falsevalue="0" /> | |
366 <param argument="PRINTQHAT" checked="false" type="boolean" label="Q-hat printed to a separate file." help="Turn this on before using STRAT." truevalue="1" falsevalue="0" /> | |
367 <param argument="UPDATEFREQ" value="100" type="integer" label="Frequency of printing update on the screen." help="Set automatically if this is 0/False." /> | |
368 <param argument="PRINTLIKES" checked="false" type="boolean" label="Print current likelihood to screen every rep" truevalue="1" falsevalue="0" /> | |
369 <param argument="INTERMEDSAVE" value="0" type="integer" label="Number of saves to file during run" /> | |
370 | |
371 <param argument="ECHODATA" checked="false" type="boolean" label="Print some of data file to screen to check that the data entry is correct." help="(NEXT 3 ARE FOR COLLECTING DISTRIBUTION OF Q:)" truevalue="1" falsevalue="0" /> | |
372 <param argument="ANCESTDIST" checked="false" type="boolean" label="Collect data about the distribution of ancestry coefficients (Q) for each individual" truevalue="1" falsevalue="0" /> | |
373 <param argument="NUMBOXES" value="1000" type="integer" label="The distribution of Q values is stored as a histogram with this number of boxes." /> | |
374 <param argument="ANCESTPINT" value="0.90" type="float" label="The size of the displayed probability interval on Q (values between 0.0--1.0)" /> | |
375 | |
376 | |
377 | |
378 <!-- MISCELLANEOUS --> | |
379 | |
380 <param argument="COMPUTEPROB" checked="true" type="boolean" label="Estimate the probability of the Data under the model." help="This is used when choosing the best number of subpopulations." truevalue="1" falsevalue="0" /> | |
381 <param argument="ADMBURNIN" value="500" type="integer" label="Initial period of burnin with admixture model" help="[only relevant for linkage model] see Documentation" /> | |
382 <param argument="ALPHAPROPSD" value="0.025" type="float" label="SD of proposal for updating alpha" /> | |
383 <param argument="STARTATPOPINFO" checked="false" type="boolean" label="Use given populations as the initial condition for population origins." help="(Need POPDATA==1). It is assumed that the PopData in the input file are between 1 and k where k is less or equal MAXPOPS." truevalue="1" falsevalue="0" /> | |
384 <conditional name="randomize_cond"> | |
385 <param argument="RANDOMIZE" type="select" label="=use new random seed for each run"> | |
386 <option value="1" selected="True">Yes</option> | |
387 <option value="0">No</option> | |
388 </param> | |
389 <when value="1"> | |
390 <param argument="SEED" value="2245" type="hidden" label="Seed value for random number generator" help="(must set RANDOMIZE=0)" /> | |
391 </when> | |
392 <when value="0"> | |
393 <param argument="SEED" value="2245" type="integer" label="seed value for random number generator" help="(must set RANDOMIZE=0)" /> | |
394 </when> | |
395 </conditional> | |
396 <param argument="METROFREQ" value="10" type="integer" label="Frequency of using Metropolis step to update Q under admixture model" help="(ie use the metr. move every i steps). If this is set to 0, it is never used. (Proposal for each q^(i) sampled from prior. The goal is to improve mixing for small alpha.)" /> | |
397 <param argument="REPORTHITRATE" checked="false" type="boolean" label="Report hit rate if using METROFREQ" truevalue="1" falsevalue="0" /> | |
398 </section> | |
399 </inputs> | |
400 <outputs> | |
401 <data name="out_mainparams" format="txt" label="run_K_${main.MAXPOPS}.mainparams" /> | |
402 <data name="out_extraparams" format="txt" label="run_K_${main.MAXPOPS}.extraparams" /> | |
403 <collection name="out" type="list" label="run_K_${main.MAXPOPS}.out"> | |
404 <discover_datasets pattern="__name__" format="tabular" directory="out" /> | |
405 </collection> | |
406 <collection name="log" type="list" label="run_K_${main.MAXPOPS}.log"> | |
407 <discover_datasets pattern="__name__" format="tabular" directory="log" /> | |
408 </collection> | |
409 </outputs> | |
410 <tests> | |
411 <test> | |
412 <!-- https://web.stanford.edu/group/pritchardlab/structure_software/release_versions/v2.3.4/html/structure-data.html --> | |
413 <param name="infile" value="testdata1" /> | |
414 <param name="nb_run" value="2" /> | |
415 <section name="main"> | |
416 <param name="NUMINDS" value="200" /> | |
417 <param name="MAXPOPS" value="2" /> | |
418 <param name="LABEL" value="1" /> | |
419 <param name="POPDATA" value="1" /> | |
420 <param name="NUMLOCI" value="5" /> | |
421 <param name="LOCDATA" value="1" /> | |
422 <param name="PLOIDY" value="2" /> | |
423 <param name="MISSING" value="-999" /> | |
424 <param name="ONEROWPERIND" value="0" /> | |
425 <param name="MARKERNAMES" value="0" /> | |
426 </section> | |
427 <section name="extra"> | |
428 <conditional name="randomize_cond"> | |
429 <param name="RANDOMIZE" value="0" /> | |
430 </conditional> | |
431 </section> | |
432 <output_collection name="out" type="list"> | |
433 <element name="run1_K_2.out" value="testdata1_f" lines_diff="6" /> | |
434 <element name="run2_K_2.out" value="testdata1_f" lines_diff="6" /> | |
435 </output_collection> | |
436 <output_collection name="log" type="list"> | |
437 <element name="run1_K_2.log"> | |
438 <assert_contents> | |
439 <has_line line="Final results printed to file outfile_f" /> | |
440 </assert_contents> | |
441 </element> | |
442 <element name="run2_K_2.log"> | |
443 <assert_contents> | |
444 <has_line line="Final results printed to file outfile_f" /> | |
445 </assert_contents> | |
446 </element> | |
447 </output_collection> | |
448 </test> | |
449 </tests> | |
450 <help><![CDATA[ | |
451 **Introduction** | |
452 | |
453 The program structure_ implements a model-based clustering method for inferring population structure | |
454 using genotype data consisting of unlinked markers. The method was introduced in a paper | |
455 by Pritchard, Stephens and Donnelly (2000a) and extended in sequels by Falush, Stephens and | |
456 Pritchard (2003a, 2007). Applications of our method include demonstrating the presence of population | |
457 structure, identifying distinct genetic populations, assigning individuals to populations, and | |
458 identifying migrants and admixed individuals. | |
459 | |
460 Briefly, we assume a model in which there are K populations (where K may be unknown), | |
461 each of which is characterized by a set of allele frequencies at each locus. Individuals in the | |
462 sample are assigned (probabilistically) to populations, or jointly to two or more populations if their | |
463 genotypes indicate that they are admixed. It is assumed that within populations, the loci are at | |
464 Hardy-Weinberg equilibrium, and linkage equilibrium. Loosely speaking, individuals are assigned | |
465 to populations in such a way as to achieve this. | |
466 | |
467 Our model does not assume a particular mutation process, and it can be applied to most of the | |
468 commonly used genetic markers including microsatellites, SNPs and RFLPs. The model assumes | |
469 that markers are not in linkage disequilibrium (LD) within subpopulations, so we can’t handle | |
470 markers that are extremely close together. Starting with version 2.0, we can now deal with weakly | |
471 linked markers. | |
472 | |
473 While the computational approaches implemented here are fairly powerful, some care is needed | |
474 in running the program in order to ensure sensible answers. For example, it is not possible to | |
475 determine suitable run-lengths theoretically, and this requires some experimentation on the part of | |
476 the user. This document describes the use and interpretation of the software and supplements the | |
477 published papers, which provide more formal descriptions and evaluations of the methods. | |
478 | |
479 .. _structure: https://web.stanford.edu/group/pritchardlab/structure.html | |
480 | |
481 **Documentation** | |
482 | |
483 Please see the full Sructure documentation_ | |
484 | |
485 .. _documentation: https://web.stanford.edu/group/pritchardlab/structure_software/release_versions/v2.3.4/structure_doc.pdf | |
486 | |
487 **Upstream** | |
488 | |
489 Inputs can be produced from: | |
490 | |
491 - Microsatellite analysis | |
492 - RADSeq analysis (eg: using populations_ from Stacks suite) | |
493 | |
494 .. _populations: http://catchenlab.life.illinois.edu/stacks/manual/#export | |
495 | |
496 **Input** | |
497 | |
498 ======= === ===== ===== ===== ===== ===== | |
499 loc_a loc_b loc_c loc_d loc_e | |
500 ======= === ===== ===== ===== ===== ===== | |
501 George 1 -9 145 66 0 92 | |
502 George 1 -9 -9 64 0 94 | |
503 Paula 1 106 142 68 1 92 | |
504 Paula 1 106 148 64 0 94 | |
505 Matthew 2 110 145 -9 0 92 | |
506 Matthew 2 110 148 66 1 -9 | |
507 Bob 2 108 142 64 1 94 | |
508 Bob 2 -9 142 -9 0 94 | |
509 Anja 1 112 142 -9 1 -9 | |
510 Anja 1 114 142 66 1 94 | |
511 Peter 1 -9 145 66 0 -9 | |
512 Peter 1 110 145 -9 1 -9 | |
513 Carsten 2 108 145 62 0 -9 | |
514 Carsten 2 110 145 64 1 92 | |
515 ======= === ===== ===== ===== ===== ===== | |
516 | |
517 You will find other sample data sets: here_ | |
518 | |
519 .. _here: https://web.stanford.edu/group/pritchardlab/structure_software/release_versions/v2.3.4/html/structure-data.html | |
520 | |
521 **Downstream** | |
522 | |
523 - Clumpp_ | |
524 - Distruct_ | |
525 - Structure-harvester_ | |
526 | |
527 .. _Clumpp: https://rosenberglab.stanford.edu/clumpp.html | |
528 .. _Distruct: https://rosenberglab.stanford.edu/distruct.html | |
529 .. _Structure-harvester: http://taylor0.biology.ucla.edu/structureHarvester/ | |
530 | |
531 ]]></help> | |
532 <citations> | |
533 <citation type="doi">10.1111/j.1471-8286.2007.01758.x</citation> | |
534 <citation type="doi">10.1111/j.1755-0998.2009.02591.x</citation> | |
535 </citations> | |
536 </tool> |