comparison structure.xml @ 0:a1574aada200 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/structure commit b4d0a8f3dfee920840c77befdf626c52a5d617cb
author iuc
date Wed, 15 Nov 2017 16:31:24 -0500
parents
children 64e681a1cad5
comparison
equal deleted inserted replaced
-1:000000000000 0:a1574aada200
1 <tool id="structure" name="Structure" version="2.3.4">
2 <description>using multi-locus genotype data to investigate population structure</description>
3 <requirements>
4 <requirement type="package" version="2.3.4">structure</requirement>
5 </requirements>
6 <version_command><![CDATA[
7 structure | grep -E -o 'Version.+'
8 ]]></version_command>
9 <command detect_errors="exit_code"><![CDATA[
10 mv '$mainparams' '$out_mainparams' &&
11 mv '$extraparams' '$out_extraparams' &&
12
13 mkdir out log
14
15 #for $run in range(1, int($nb_run) + 1):
16 && structure -i '$infile' -o outfile -m '$out_mainparams' -e '$out_extraparams' > 'log/run${run}_K_${main.MAXPOPS}.log'
17 && mv 'outfile_f' 'out/run${run}_K_${main.MAXPOPS}.out'
18 #end for
19
20 ]]></command>
21 <configfiles>
22 <configfile name="mainparams"><![CDATA[
23 KEY PARAMETERS FOR THE PROGRAM structure. YOU WILL NEED TO SET THESE
24 IN ORDER TO RUN THE PROGRAM. VARIOUS OPTIONS CAN BE ADJUSTED IN THE
25 FILE extraparams.
26
27
28 "(int)" means that this takes an integer value.
29 "(B)" means that this variable is Boolean
30 (ie insert 1 for True, and 0 for False)
31 "(str)" means that this is a string (but not enclosed in quotes!)
32
33
34 Basic Program Parameters
35
36 #define MAXPOPS $main.MAXPOPS // default:2 // (int) number of populations assumed
37 #define BURNIN $main.BURNIN // default:10000 // (int) length of burnin period
38 #define NUMREPS $main.NUMREPS // default:20000 // (int) number of MCMC reps after burnin
39
40 Input/Output files
41
42 #define INFILE $infile // (str) name of input data file
43 #define OUTFILE outfile //(str) name of output data file
44
45 Data file format
46
47 #define NUMINDS $main.NUMINDS // default:100 // (int) number of diploid individuals in data file
48 #define NUMLOCI $main.NUMLOCI // default:100 // (int) number of loci in data file
49 #define PLOIDY $main.PLOIDY // default:2 // (int) ploidy of data
50 #define MISSING $main.MISSING // default:-9 // (int) value given to missing genotype data
51 #define ONEROWPERIND $main.ONEROWPERIND // default:0 // (B) store data for individuals in a single line
52
53
54 #define LABEL $main.LABEL // default:1 // (B) Input file contains individual labels
55 #define POPDATA $main.POPDATA // default:1 // (B) Input file contains a population identifier
56 #define POPFLAG ${extra.usepopinfo_cond.POPFLAG} // default:0 // (B) Input file contains a flag which says
57 whether to use popinfo when USEPOPINFO==1
58 #define LOCDATA $main.LOCDATA // default:0 // (B) Input file contains a location identifier
59
60 #define PHENOTYPE $main.PHENOTYPE // default:0 // (B) Input file contains phenotype information
61 #define EXTRACOLS $main.EXTRACOLS // default:0 // (int) Number of additional columns of data
62 before the genotype data start.
63
64 #define MARKERNAMES $main.MARKERNAMES // default:1 // (B) data file contains row of marker names
65 #define RECESSIVEALLELES $main.recessivealleles_cond.RECESSIVEALLELES // default:0 // (B) data file contains dominant markers (eg AFLPs)
66 // and a row to indicate which alleles are recessive
67 #define MAPDISTANCES $main.MAPDISTANCES // default:0 // (B) data file contains row of map distances
68 // between loci
69
70
71 Advanced data file options
72
73 #define PHASED $main.PHASED // default:0 // (B) Data are in correct phase (relevant for linkage model only)
74 #define PHASEINFO $main.PHASEINFO // default:0 // (B) the data for each individual contains a line
75 indicating phase (linkage model)
76 #define MARKOVPHASE $main.MARKOVPHASE // default:0 // (B) the phase info follows a Markov model.
77 #define NOTAMBIGUOUS $main.recessivealleles_cond.NOTAMBIGUOUS // default:-999 // (int) for use in some analyses of polyploid data
78
79
80
81 Command line options:
82
83 -m mainparams
84 -e extraparams
85 -s stratparams
86 -K MAXPOPS
87 -L NUMLOCI
88 -N NUMINDS
89 -i input file
90 -o output file
91 -D SEED
92
93 ]]></configfile>
94 <configfile name="extraparams"><![CDATA[
95 EXTRA PARAMS FOR THE PROGRAM structure. THESE PARAMETERS CONTROL HOW THE
96 PROGRAM RUNS. ATTRIBUTES OF THE DATAFILE AS WELL AS K AND RUNLENGTH ARE
97 SPECIFIED IN mainparams.
98
99 "(int)" means that this takes an integer value.
100 "(d)" means that this is a double (ie, a Real number such as 3.14).
101 "(B)" means that this variable is Boolean
102 (ie insert 1 for True, and 0 for False).
103
104 PROGRAM OPTIONS
105
106 #define NOADMIX $extra.NOADMIX // default:0 // (B) Use no admixture model (0=admixture model, 1=no-admix)
107 #define LINKAGE $extra.LINKAGE // default:0 // (B) Use the linkage model model
108 #define USEPOPINFO $extra.usepopinfo_cond.USEPOPINFO // default:0 // (B) Use prior population information to pre-assign individuals
109 to clusters
110 #define LOCPRIOR $extra.LOCPRIOR // default:0 //(B) Use location information to improve weak data
111
112 #define FREQSCORR $extra.FREQSCORR // default:1 // (B) allele frequencies are correlated among pops
113 #define ONEFST $extra.ONEFST // default:0 // (B) assume same value of Fst for all subpopulations.
114
115 #define INFERALPHA $extra.inferalpha_cond.INFERALPHA // default:1 // (B) Infer ALPHA (the admixture parameter)
116 #define POPALPHAS $extra.POPALPHAS // default:0 // (B) Individual alpha for each population
117 #define ALPHA $extra.inferalpha_cond.ALPHA // default:1.0 // (d) Dirichlet parameter for degree of admixture
118 (this is the initial value if INFERALPHA==1).
119
120 #define INFERLAMBDA $extra.inferlambda_cond.INFERLAMBDA // default:0 // (B) Infer LAMBDA (the allele frequencies parameter)
121 #define POPSPECIFICLAMBDA $extra.inferlambda_cond.POPSPECIFICLAMBDA // default:0 //(B) infer a separate lambda for each pop
122 (only if INFERLAMBDA=1).
123 #define LAMBDA $extra.LAMBDA // default:1.0 // (d) Dirichlet parameter for allele frequencies
124
125
126
127
128 PRIORS
129
130 #define FPRIORMEAN $extra.FPRIORMEAN // default:0.01 // (d) Prior mean and SD of Fst for pops.
131 #define FPRIORSD $extra.FPRIORSD // default:0.05 // (d) The prior is a Gamma distribution with these parameters
132
133 #define UNIFPRIORALPHA $extra.unifprioralpha_cond.UNIFPRIORALPHA // default:1 // (B) use a uniform prior for alpha;
134 otherwise gamma prior
135 #define ALPHAMAX $extra.ALPHAMAX // default:10.0 // (d) max value of alpha if uniform prior
136 #define ALPHAPRIORA $extra.unifprioralpha_cond.ALPHAPRIORA // default:1.0 // (only if UNIFPRIORALPHA==0): alpha has a gamma
137 prior with mean A*B, and
138 #define ALPHAPRIORB $extra.unifprioralpha_cond.ALPHAPRIORB // default:2.0 // variance A*B^2.
139
140
141 #define LOG10RMIN $extra.LOG10RMIN // default:-4.0 //(d) Log10 of minimum allowed value of r under linkage model
142 #define LOG10RMAX $extra.LOG10RMAX // default:1.0 //(d) Log10 of maximum allowed value of r
143 #define LOG10RPROPSD $extra.LOG10RPROPSD // default:0.1 //(d) standard deviation of log r in update
144 #define LOG10RSTART $extra.LOG10RSTART // default:-2.0 //(d) initial value of log10 r
145
146
147 USING PRIOR POPULATION INFO (USEPOPINFO)
148
149 #define GENSBACK $extra.GENSBACK // default:2 //(int) For use when inferring whether an indiv-
150 idual is an immigrant, or has an immigrant an-
151 cestor in the past GENSBACK generations. eg, if
152 GENSBACK==2, it tests for immigrant ancestry
153 back to grandparents.
154 #define MIGRPRIOR $extra.usepopinfo_cond.MIGRPRIOR // default:0.01 //(d) prior prob that an individual is a migrant
155 (used only when USEPOPINFO==1). This should
156 be small, eg 0.01 or 0.1.
157 #define PFROMPOPFLAGONLY $extra.PFROMPOPFLAGONLY // default:0 // (B) only use individuals with POPFLAG=1 to update P.
158 This is to enable use of a reference set of
159 individuals for clustering additional "test"
160 individuals.
161
162 LOCPRIOR MODEL FOR USING LOCATION INFORMATION
163
164 #define LOCISPOP $extra.LOCISPOP // default:1 //(B) use POPDATA for location information
165 #define LOCPRIORINIT $extra.LOCPRIORINIT // default:1.0 //(d) initial value for r, the location prior
166 #define MAXLOCPRIOR $extra.MAXLOCPRIOR // default:20.0 //(d) max allowed value for r
167
168
169
170
171 OUTPUT OPTIONS
172
173 #define PRINTNET $extra.PRINTNET // default:1 // (B) Print the "net nucleotide distance" to screen during the run
174 #define PRINTLAMBDA $extra.PRINTLAMBDA // default:1 // (B) Print current value(s) of lambda to screen
175 #define PRINTQSUM $extra.PRINTQSUM // default:1 // (B) Print summary of current population membership to screen
176
177 #define SITEBYSITE $extra.SITEBYSITE // default:0 // (B) whether or not to print site by site results.
178 (Linkage model only) This is a large file!
179 #define PRINTQHAT $extra.PRINTQHAT // default:0 // (B) Q-hat printed to a separate file. Turn this
180 on before using STRAT.
181 #define UPDATEFREQ $extra.UPDATEFREQ // default:100 // (int) frequency of printing update on the screen.
182 Set automatically if this is 0.
183 #define PRINTLIKES $extra.PRINTLIKES // default:0 // (B) print current likelihood to screen every rep
184 #define INTERMEDSAVE $extra.INTERMEDSAVE // default:0 // (int) number of saves to file during run
185
186 #define ECHODATA $extra.ECHODATA // default:1 // (B) Print some of data file to screen to check
187 that the data entry is correct.
188 (NEXT 3 ARE FOR COLLECTING DISTRIBUTION OF Q:)
189 #define ANCESTDIST $extra.ANCESTDIST // default:0 // (B) collect data about the distribution of an-
190 cestry coefficients (Q) for each individual
191 #define NUMBOXES $extra.NUMBOXES // default:1000 // (int) the distribution of Q values is stored as
192 a histogram with this number of boxes.
193 #define ANCESTPINT $extra.ANCESTPINT // default:0.90 // (d) the size of the displayed probability
194 interval on Q (values between 0.0--1.0)
195
196
197
198 MISCELLANEOUS
199
200 #define COMPUTEPROB $extra.COMPUTEPROB // default:1 // (B) Estimate the probability of the Data under
201 the model. This is used when choosing the
202 best number of subpopulations.
203 #define ADMBURNIN $extra.ADMBURNIN // default:500 // (int) [only relevant for linkage model]:
204 Initial period of burnin with admixture model (see Readme)
205 #define ALPHAPROPSD $extra.ALPHAPROPSD // default:0.025 // (d) SD of proposal for updating alpha
206 #define STARTATPOPINFO $extra.STARTATPOPINFO // default:0 // Use given populations as the initial condition
207 for population origins. (Need POPDATA==1). It
208 is assumed that the PopData in the input file
209 are between 1 and k where k<=MAXPOPS.
210 #define RANDOMIZE $extra.randomize_cond.RANDOMIZE // default:1 // (B) use new random seed for each run
211 #define SEED $extra.randomize_cond.SEED // default:2245 // (int) seed value for random number generator
212 (must set RANDOMIZE=0)
213 #define METROFREQ $extra.METROFREQ // default:10 // (int) Frequency of using Metropolis step to update
214 Q under admixture model (ie use the metr. move every
215 i steps). If this is set to 0, it is never used.
216 (Proposal for each q^(i) sampled from prior. The
217 goal is to improve mixing for small alpha.)
218 #define REPORTHITRATE $extra.REPORTHITRATE // default:0 // (B) report hit rate if using METROFREQ
219
220 ]]></configfile>
221 </configfiles>
222 <inputs>
223 <param name="infile" type="data" label="Genotype data" format="tabular" />
224 <param name="nb_run" value="1" type="integer" label="Number of runs" min="1" max="10" help="Note that the runs are sequential. Please launch separate runs if it's too long" />
225 <section name="main" title="mainparams" expanded="True">
226 <!--Basic Program Parameters-->
227 <param argument="MAXPOPS" value="" type="integer" label="Number of populations assumed" help="or [K]"/>
228 <param argument="BURNIN" value="10000" type="integer" label="Length of burnin period" />
229 <param argument="NUMREPS" value="20000" type="integer" label="Number of MCMC reps after burnin" />
230
231 <!--Data file format-->
232 <param argument="NUMINDS" value="" type="integer" label="Number of diploid individuals in data file" help="or [N]"/>
233 <param argument="NUMLOCI" value="" type="integer" label="Number of loci in data file" help="or [L]"/>
234 <param argument="PLOIDY" value="2" type="integer" label="Ploidy of data" />
235 <param argument="MISSING" value="-9" type="integer" label="Value given to missing genotype data" />
236 <param argument="ONEROWPERIND" checked="False" type="boolean" label="Store data for individuals in a single line" truevalue="1" falsevalue="0" help=" E.g., for diploid data, this would mean that the two alleles for each locus are in consecutive order in the same row, rather than being arranged in the same column, in two consecutive rows "/>
237
238
239 <param argument="LABEL" checked="true" type="boolean" label="Input file contains individual labels" truevalue="1" falsevalue="0" />
240 <param argument="POPDATA" checked="true" type="boolean" label="Input file contains a user-defined population-of-origin for each individual" truevalue="1" falsevalue="0" />
241 <param argument="LOCDATA" checked="false" type="boolean" label="Input file contains a location identifier" truevalue="1" falsevalue="0" />
242
243 <param argument="PHENOTYPE" checked="false" type="boolean" label="Input file contains phenotype information" truevalue="1" falsevalue="0" />
244 <param argument="EXTRACOLS" value="0" type="integer" label="Number of additional columns of data before the genotype data start." />
245
246 <param argument="MARKERNAMES" checked="true" type="boolean" label="Data file contains row of marker names" truevalue="1" falsevalue="0" />
247 <conditional name="recessivealleles_cond">
248 <param argument="RECESSIVEALLELES" type="select" label="Data file contains dominant markers (eg AFLPs) and a row to indicate which alleles are recessive" >
249 <option value="0" selected="True">No</option>
250 <option value="1">Yes</option>
251 </param>
252 <when value="0">
253 <param argument="NOTAMBIGUOUS" value="-999" type="hidden" label="Defines the code indicating that genotype data at a marker are unambiguous." help="For use with polyploids when RECESSIVEALLELES=1/True. Must not match MISSING or any allele value in the data." />
254 </when>
255 <when value="1">
256 <param argument="NOTAMBIGUOUS" value="-999" type="integer" label="Defines the code indicating that genotype data at a marker are unambiguous." help="For use with polyploids when RECESSIVEALLELES=1/True. Must not match MISSING or any allele value in the data." />
257 </when>
258 </conditional>
259 <param argument="MAPDISTANCES" checked="false" type="boolean" label="Data file contains row of map distances between loci" truevalue="1" falsevalue="0" />
260
261
262 <!--Advanced data file options-->
263
264 <param argument="PHASED" checked="false" type="boolean" label="Data are in correct phase (relevant for linkage model only)" truevalue="1" falsevalue="0" />
265 <param argument="PHASEINFO" checked="false" type="boolean" label="The data for each individual contains a line indicating phase (linkage model)" truevalue="1" falsevalue="0" />
266 <param argument="MARKOVPHASE" checked="false" type="boolean" label="The phase info follows a Markov model." truevalue="1" falsevalue="0" />
267 </section>
268 <section name="extra" title="extraparams" expanded="False">
269
270 <param argument="NOADMIX" checked="false" type="boolean" label="Use no admixture model" help="(0/False=admixture model, 1/True=no-admix)" truevalue="1" falsevalue="0" />
271 <param argument="LINKAGE" checked="false" type="boolean" label="Use the linkage model model" truevalue="1" falsevalue="0" />
272 <conditional name="usepopinfo_cond">
273 <param argument="USEPOPINFO" type="select" label="Use prior population information to pre-assign individuals to clusters">
274 <option value="0" selected="True">No</option>
275 <option value="1">Yes</option>
276 </param>
277 <when value="0">
278 <param argument="POPFLAG" value="0" type="hidden" label="Input file contains a flag which says whether to use popinfo" help="[mainparams] when USEPOPINFO is 1/True" />
279 <param argument="MIGRPRIOR" value="0.01" type="hidden" label="Prior prob that an individual is a migrant" help="(used only when USEPOPINFO==1/True). This should be small, eg 0.01 or 0.1." />
280 </when>
281 <when value="1">
282 <param argument="POPFLAG" checked="false" type="boolean" label="Input file contains a flag which says whether to use popinfo" help="[mainparams] when USEPOPINFO is 1/True" truevalue="1" falsevalue="0" />
283 <param argument="MIGRPRIOR" value="0.01" type="float" label="Prior prob that an individual is a migrant" help="(used only when USEPOPINFO==1/True). This should be small, eg 0.01 or 0.1." />
284 </when>
285 </conditional>
286 <param argument="LOCPRIOR" checked="false" type="boolean" label="Use location information to improve weak data" truevalue="1" falsevalue="0" />
287
288 <param argument="FREQSCORR" checked="true" type="boolean" label="Allele frequencies are correlated among pops" truevalue="1" falsevalue="0" />
289 <param argument="ONEFST" checked="false" type="boolean" label="Assume same value of Fst for all subpopulations" truevalue="1" falsevalue="0" />
290
291 <conditional name="inferalpha_cond">
292 <param argument="INFERALPHA" type="select" label="Infer ALPHA (the admixture parameter)">
293 <option value="1" selected="True">Yes</option>
294 <option value="0">No</option>
295 </param>
296 <when value="1">
297 <param argument="ALPHA" value="1.0" type="float" label="Dirichlet parameter for degree of admixture" help="this is the initial value if INFERALPHA is 1/True." />
298 </when>
299 <when value="0">
300 <param argument="ALPHA" value="1.0" type="hidden" label="Dirichlet parameter for degree of admixture" help="this is the initial value if INFERALPHA is 1/True." />
301 </when>
302 </conditional>
303 <param argument="POPALPHAS" checked="false" type="boolean" label="Individual alpha for each population" truevalue="1" falsevalue="0" />
304
305 <conditional name="inferlambda_cond">
306 <param argument="INFERLAMBDA" type="select" label="Infer LAMBDA (the allele frequencies parameter)">
307 <option value="0" selected="True">No</option>
308 <option value="1">Yes</option>
309 </param>
310 <when value="0">
311 <param argument="POPSPECIFICLAMBDA" value="0" type="hidden" label="Infer a separate lambda for each pop" help="(only if INFERLAMBDA=1/True)." />
312 </when>
313 <when value="1">
314 <param argument="POPSPECIFICLAMBDA" checked="false" type="boolean" label="Infer a separate lambda for each pop" help="(only if INFERLAMBDA=1/True)." truevalue="1" falsevalue="0" />
315 </when>
316 </conditional>
317 <param argument="LAMBDA" value="1.0" type="float" label="Dirichlet parameter for allele frequencies" />
318
319
320 <!-- PRIORS -->
321
322 <param argument="FPRIORMEAN" value="0.01" type="float" label="The Prior (Gamma distribution) mean of Fst for pops." />
323 <param argument="FPRIORSD" value="0.05" type="float" label="The Prior (Gamma distribution) Standard Deviation of Fst for pops." />
324
325 <conditional name="unifprioralpha_cond">
326 <param argument="UNIFPRIORALPHA" type="select" label="Use a uniform prior for alpha; otherwise gamma prior">
327 <option value="1" selected="True">Yes</option>
328 <option value="0">No</option>
329 </param>
330 <when value="1">
331 <param argument="ALPHAPRIORA" value="1.0" type="hidden" label="Alpha has a gamma prior with mean A*B, and variance A*B^2." help="(only if UNIFPRIORALPHA==0/False)" />
332 <param argument="ALPHAPRIORB" value="2.0" type="hidden" label="Alpha has a gamma prior with mean A*B, and variance A*B^2." help="(only if UNIFPRIORALPHA==0/False)" />
333 </when>
334 <when value="0">
335 <param argument="ALPHAPRIORA" value="1.0" type="float" label="Alpha has a gamma prior with mean A*B, and variance A*B^2." help="(only if UNIFPRIORALPHA==0/False)"/>
336 <param argument="ALPHAPRIORB" value="2.0" type="float" label="Alpha has a gamma prior with mean A*B, and variance A*B^2." help="(only if UNIFPRIORALPHA==0/False)"/>
337 </when>
338 </conditional>
339 <param argument="ALPHAMAX" value="10.0" type="float" label="Max value of alpha if uniform prior" />
340
341
342 <param argument="LOG10RMIN" value="-4.0" type="float" label="Log10 of minimum allowed value of r under linkage model" />
343 <param argument="LOG10RMAX" value="1.0" type="float" label="Log10 of maximum allowed value of r" />
344 <param argument="LOG10RPROPSD" value="0.1" type="float" label="Standard deviation of log r in update" />
345 <param argument="LOG10RSTART" value="-2.0" type="float" label="Initial value of log10 r" />
346
347
348 <!-- USING PRIOR POPULATION INFO (USEPOPINFO) -->
349
350 <param argument="GENSBACK" value="2" type="integer" label="For use when inferring whether an individual is an immigrant, or has an immigrant an cestor in the past GENSBACK generations." help="eg, if GENSBACK==2, it tests for immigrant ancestry back to grandparents." />
351 <param argument="PFROMPOPFLAGONLY" checked="false" type="boolean" label="Only use individuals with POPFLAG=1 to update P." help="This is to enable use of a reference set of individuals for clustering additional 'test' individuals." truevalue="1" falsevalue="0" />
352
353 <!-- LOCPRIOR MODEL FOR USING LOCATION INFORMATION -->
354
355 <param argument="LOCISPOP" checked="true" type="boolean" label="Use POPDATA for location information" truevalue="1" falsevalue="0" />
356 <param argument="LOCPRIORINIT" value="1.0" type="float" label="Initial value for r, the location prior" />
357 <param argument="MAXLOCPRIOR" value="20.0" type="float" label="Max allowed value for r" />
358
359 <!-- OUTPUT OPTIONS -->
360
361 <param argument="PRINTNET" checked="true" type="boolean" label="Print the 'net nucleotide distance' to screen during the run" truevalue="1" falsevalue="0" />
362 <param argument="PRINTLAMBDA" checked="true" type="boolean" label="Print current value(s) of lambda to screen" truevalue="1" falsevalue="0" />
363 <param argument="PRINTQSUM" checked="true" type="boolean" label="Print summary of current population membership to screen" truevalue="1" falsevalue="0" />
364
365 <param argument="SITEBYSITE" checked="false" type="boolean" label="whether or not to print site by site results." help="(Linkage model only) This is a large file!" truevalue="1" falsevalue="0" />
366 <param argument="PRINTQHAT" checked="false" type="boolean" label="Q-hat printed to a separate file." help="Turn this on before using STRAT." truevalue="1" falsevalue="0" />
367 <param argument="UPDATEFREQ" value="100" type="integer" label="Frequency of printing update on the screen." help="Set automatically if this is 0/False." />
368 <param argument="PRINTLIKES" checked="false" type="boolean" label="Print current likelihood to screen every rep" truevalue="1" falsevalue="0" />
369 <param argument="INTERMEDSAVE" value="0" type="integer" label="Number of saves to file during run" />
370
371 <param argument="ECHODATA" checked="false" type="boolean" label="Print some of data file to screen to check that the data entry is correct." help="(NEXT 3 ARE FOR COLLECTING DISTRIBUTION OF Q:)" truevalue="1" falsevalue="0" />
372 <param argument="ANCESTDIST" checked="false" type="boolean" label="Collect data about the distribution of ancestry coefficients (Q) for each individual" truevalue="1" falsevalue="0" />
373 <param argument="NUMBOXES" value="1000" type="integer" label="The distribution of Q values is stored as a histogram with this number of boxes." />
374 <param argument="ANCESTPINT" value="0.90" type="float" label="The size of the displayed probability interval on Q (values between 0.0--1.0)" />
375
376
377
378 <!-- MISCELLANEOUS -->
379
380 <param argument="COMPUTEPROB" checked="true" type="boolean" label="Estimate the probability of the Data under the model." help="This is used when choosing the best number of subpopulations." truevalue="1" falsevalue="0" />
381 <param argument="ADMBURNIN" value="500" type="integer" label="Initial period of burnin with admixture model" help="[only relevant for linkage model] see Documentation" />
382 <param argument="ALPHAPROPSD" value="0.025" type="float" label="SD of proposal for updating alpha" />
383 <param argument="STARTATPOPINFO" checked="false" type="boolean" label="Use given populations as the initial condition for population origins." help="(Need POPDATA==1). It is assumed that the PopData in the input file are between 1 and k where k is less or equal MAXPOPS." truevalue="1" falsevalue="0" />
384 <conditional name="randomize_cond">
385 <param argument="RANDOMIZE" type="select" label="=use new random seed for each run">
386 <option value="1" selected="True">Yes</option>
387 <option value="0">No</option>
388 </param>
389 <when value="1">
390 <param argument="SEED" value="2245" type="hidden" label="Seed value for random number generator" help="(must set RANDOMIZE=0)" />
391 </when>
392 <when value="0">
393 <param argument="SEED" value="2245" type="integer" label="seed value for random number generator" help="(must set RANDOMIZE=0)" />
394 </when>
395 </conditional>
396 <param argument="METROFREQ" value="10" type="integer" label="Frequency of using Metropolis step to update Q under admixture model" help="(ie use the metr. move every i steps). If this is set to 0, it is never used. (Proposal for each q^(i) sampled from prior. The goal is to improve mixing for small alpha.)" />
397 <param argument="REPORTHITRATE" checked="false" type="boolean" label="Report hit rate if using METROFREQ" truevalue="1" falsevalue="0" />
398 </section>
399 </inputs>
400 <outputs>
401 <data name="out_mainparams" format="txt" label="run_K_${main.MAXPOPS}.mainparams" />
402 <data name="out_extraparams" format="txt" label="run_K_${main.MAXPOPS}.extraparams" />
403 <collection name="out" type="list" label="run_K_${main.MAXPOPS}.out">
404 <discover_datasets pattern="__name__" format="tabular" directory="out" />
405 </collection>
406 <collection name="log" type="list" label="run_K_${main.MAXPOPS}.log">
407 <discover_datasets pattern="__name__" format="tabular" directory="log" />
408 </collection>
409 </outputs>
410 <tests>
411 <test>
412 <!-- https://web.stanford.edu/group/pritchardlab/structure_software/release_versions/v2.3.4/html/structure-data.html -->
413 <param name="infile" value="testdata1" />
414 <param name="nb_run" value="2" />
415 <section name="main">
416 <param name="NUMINDS" value="200" />
417 <param name="MAXPOPS" value="2" />
418 <param name="LABEL" value="1" />
419 <param name="POPDATA" value="1" />
420 <param name="NUMLOCI" value="5" />
421 <param name="LOCDATA" value="1" />
422 <param name="PLOIDY" value="2" />
423 <param name="MISSING" value="-999" />
424 <param name="ONEROWPERIND" value="0" />
425 <param name="MARKERNAMES" value="0" />
426 </section>
427 <section name="extra">
428 <conditional name="randomize_cond">
429 <param name="RANDOMIZE" value="0" />
430 </conditional>
431 </section>
432 <output_collection name="out" type="list">
433 <element name="run1_K_2.out" value="testdata1_f" lines_diff="6" />
434 <element name="run2_K_2.out" value="testdata1_f" lines_diff="6" />
435 </output_collection>
436 <output_collection name="log" type="list">
437 <element name="run1_K_2.log">
438 <assert_contents>
439 <has_line line="Final results printed to file outfile_f" />
440 </assert_contents>
441 </element>
442 <element name="run2_K_2.log">
443 <assert_contents>
444 <has_line line="Final results printed to file outfile_f" />
445 </assert_contents>
446 </element>
447 </output_collection>
448 </test>
449 </tests>
450 <help><![CDATA[
451 **Introduction**
452
453 The program structure_ implements a model-based clustering method for inferring population structure
454 using genotype data consisting of unlinked markers. The method was introduced in a paper
455 by Pritchard, Stephens and Donnelly (2000a) and extended in sequels by Falush, Stephens and
456 Pritchard (2003a, 2007). Applications of our method include demonstrating the presence of population
457 structure, identifying distinct genetic populations, assigning individuals to populations, and
458 identifying migrants and admixed individuals.
459
460 Briefly, we assume a model in which there are K populations (where K may be unknown),
461 each of which is characterized by a set of allele frequencies at each locus. Individuals in the
462 sample are assigned (probabilistically) to populations, or jointly to two or more populations if their
463 genotypes indicate that they are admixed. It is assumed that within populations, the loci are at
464 Hardy-Weinberg equilibrium, and linkage equilibrium. Loosely speaking, individuals are assigned
465 to populations in such a way as to achieve this.
466
467 Our model does not assume a particular mutation process, and it can be applied to most of the
468 commonly used genetic markers including microsatellites, SNPs and RFLPs. The model assumes
469 that markers are not in linkage disequilibrium (LD) within subpopulations, so we can’t handle
470 markers that are extremely close together. Starting with version 2.0, we can now deal with weakly
471 linked markers.
472
473 While the computational approaches implemented here are fairly powerful, some care is needed
474 in running the program in order to ensure sensible answers. For example, it is not possible to
475 determine suitable run-lengths theoretically, and this requires some experimentation on the part of
476 the user. This document describes the use and interpretation of the software and supplements the
477 published papers, which provide more formal descriptions and evaluations of the methods.
478
479 .. _structure: https://web.stanford.edu/group/pritchardlab/structure.html
480
481 **Documentation**
482
483 Please see the full Sructure documentation_
484
485 .. _documentation: https://web.stanford.edu/group/pritchardlab/structure_software/release_versions/v2.3.4/structure_doc.pdf
486
487 **Upstream**
488
489 Inputs can be produced from:
490
491 - Microsatellite analysis
492 - RADSeq analysis (eg: using populations_ from Stacks suite)
493
494 .. _populations: http://catchenlab.life.illinois.edu/stacks/manual/#export
495
496 **Input**
497
498 ======= === ===== ===== ===== ===== =====
499 loc_a loc_b loc_c loc_d loc_e
500 ======= === ===== ===== ===== ===== =====
501 George 1 -9 145 66 0 92
502 George 1 -9 -9 64 0 94
503 Paula 1 106 142 68 1 92
504 Paula 1 106 148 64 0 94
505 Matthew 2 110 145 -9 0 92
506 Matthew 2 110 148 66 1 -9
507 Bob 2 108 142 64 1 94
508 Bob 2 -9 142 -9 0 94
509 Anja 1 112 142 -9 1 -9
510 Anja 1 114 142 66 1 94
511 Peter 1 -9 145 66 0 -9
512 Peter 1 110 145 -9 1 -9
513 Carsten 2 108 145 62 0 -9
514 Carsten 2 110 145 64 1 92
515 ======= === ===== ===== ===== ===== =====
516
517 You will find other sample data sets: here_
518
519 .. _here: https://web.stanford.edu/group/pritchardlab/structure_software/release_versions/v2.3.4/html/structure-data.html
520
521 **Downstream**
522
523 - Clumpp_
524 - Distruct_
525 - Structure-harvester_
526
527 .. _Clumpp: https://rosenberglab.stanford.edu/clumpp.html
528 .. _Distruct: https://rosenberglab.stanford.edu/distruct.html
529 .. _Structure-harvester: http://taylor0.biology.ucla.edu/structureHarvester/
530
531 ]]></help>
532 <citations>
533 <citation type="doi">10.1111/j.1471-8286.2007.01758.x</citation>
534 <citation type="doi">10.1111/j.1755-0998.2009.02591.x</citation>
535 </citations>
536 </tool>