sharplabtool: tools/rgenetics/rgLDIndep.xml annotate

annotate tools/rgenetics/rgLDIndep.xml @ 0:9071e359b9a3

Uploaded

author	xuebing
date	Fri, 09 Mar 2012 19:37:19 -0500
parents
children

rev	line source
0 9071e359b9a3 Uploaded xuebing parents: diff changeset	1 <tool id="rgLDIndep1" name="LD Independent:">
9071e359b9a3 Uploaded xuebing parents: diff changeset	2 <code file="rgLDIndep_code.py"/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	3
9071e359b9a3 Uploaded xuebing parents: diff changeset	4 <description>filter high LD pairs - decrease redundancy</description>
9071e359b9a3 Uploaded xuebing parents: diff changeset	5
9071e359b9a3 Uploaded xuebing parents: diff changeset	6 <command interpreter="python">
9071e359b9a3 Uploaded xuebing parents: diff changeset	7 rgLDIndep.py '$input_file.extra_files_path' '$input_file.metadata.base_name' '$title1' '$mind'
9071e359b9a3 Uploaded xuebing parents: diff changeset	8 '$geno' '$hwe' '$maf' '$mef' '$mei' '$out_file1'
9071e359b9a3 Uploaded xuebing parents: diff changeset	9 '$out_file1.files_path' '$window' '$step' '$r2'
9071e359b9a3 Uploaded xuebing parents: diff changeset	10 </command>
9071e359b9a3 Uploaded xuebing parents: diff changeset	11
9071e359b9a3 Uploaded xuebing parents: diff changeset	12 <inputs>
9071e359b9a3 Uploaded xuebing parents: diff changeset	13 <param name="input_file" type="data" label="RGenetics genotype data from your current history"
9071e359b9a3 Uploaded xuebing parents: diff changeset	14 size="80" format="pbed" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	15 <param name="title1" type="text" size="80" label="Descriptive title for cleaned genotype file" value="LD_Independent"/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	16 <param name="r2" type="float" value = "0.1"
9071e359b9a3 Uploaded xuebing parents: diff changeset	17 label="r2 threshold: Select only pairs at or below this r^2 threshold (eg 0.1)"
9071e359b9a3 Uploaded xuebing parents: diff changeset	18 help="LD threshold defining LD independent markers" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	19 <param name="window" type="integer" value = "40" label="Window: Window size to limit LD pairwise"
9071e359b9a3 Uploaded xuebing parents: diff changeset	20 help = "Bigger is better but time taken blows up exponentially as the window grows!" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	21 <param name="step" type="integer" value = "30" label="Step: Move window this far and recompute"
9071e359b9a3 Uploaded xuebing parents: diff changeset	22 help = "Smaller is better but of course, time increases..." />
9071e359b9a3 Uploaded xuebing parents: diff changeset	23 <param name="geno" type="float" label="Maximum Missing Fraction: Markers" value="1.0" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	24 <param name="mind" type="float" value="1.0" label="Maximum Missing Fraction: Subjects"/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	25 <param name="mef" type="float" label="Maximum Mendel Error Rate: Family" value="1.0"/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	26 <param name="mei" type="float" label="Maximum Mendel Error Rate: Marker" value="1.0"/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	27 <param name="hwe" type="float" value="0.0" label="Smallest HWE p value (set to 0 for all)" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	28 <param name="maf" type="float" value="0.0"
9071e359b9a3 Uploaded xuebing parents: diff changeset	29 label="Smallest Allowable Minor Allele Frequency (set to 0.0 for all)"/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	30
9071e359b9a3 Uploaded xuebing parents: diff changeset	31 </inputs>
9071e359b9a3 Uploaded xuebing parents: diff changeset	32
9071e359b9a3 Uploaded xuebing parents: diff changeset	33 <outputs>
9071e359b9a3 Uploaded xuebing parents: diff changeset	34 <data format="pbed" name="out_file1" metadata_source="input_file" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	35 </outputs>
9071e359b9a3 Uploaded xuebing parents: diff changeset	36 <tests>
9071e359b9a3 Uploaded xuebing parents: diff changeset	37 <test>
9071e359b9a3 Uploaded xuebing parents: diff changeset	38
9071e359b9a3 Uploaded xuebing parents: diff changeset	39 <param name='input_file' value='tinywga' ftype='pbed' >
9071e359b9a3 Uploaded xuebing parents: diff changeset	40 <metadata name='base_name' value='tinywga' />
9071e359b9a3 Uploaded xuebing parents: diff changeset	41 <composite_data value='tinywga.bim' />
9071e359b9a3 Uploaded xuebing parents: diff changeset	42 <composite_data value='tinywga.bed' />
9071e359b9a3 Uploaded xuebing parents: diff changeset	43 <composite_data value='tinywga.fam' />
9071e359b9a3 Uploaded xuebing parents: diff changeset	44 <edit_attributes type='name' value='tinywga' />
9071e359b9a3 Uploaded xuebing parents: diff changeset	45 </param>
9071e359b9a3 Uploaded xuebing parents: diff changeset	46 <param name='title1' value='rgLDIndeptest1' />
9071e359b9a3 Uploaded xuebing parents: diff changeset	47 <param name="mind" value="1" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	48 <param name="geno" value="1" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	49 <param name="hwe" value="0" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	50 <param name="maf" value="0" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	51 <param name="mef" value="1" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	52 <param name="mei" value="1" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	53 <param name="window" value="10000" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	54 <param name="step" value="5000" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	55 <param name="r2" value="0.1" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	56 <output name='out_file1' file='rgtestouts/rgLDIndep/rgLDIndeptest1.pbed' ftype='pbed' compare="diff" lines_diff='7'>
9071e359b9a3 Uploaded xuebing parents: diff changeset	57 <extra_files type="file" name='rgLDIndeptest1.bim' value="rgtestouts/rgLDIndep/rgLDIndeptest1.bim" compare="sim_size" delta="1000"/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	58 <extra_files type="file" name='rgLDIndeptest1.fam' value="rgtestouts/rgLDIndep/rgLDIndeptest1.fam" compare="diff" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	59 <extra_files type="file" name='rgLDIndeptest1.bed' value="rgtestouts/rgLDIndep/rgLDIndeptest1.bed" compare="sim_size" delta = "1000" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	60 </output>
9071e359b9a3 Uploaded xuebing parents: diff changeset	61 </test>
9071e359b9a3 Uploaded xuebing parents: diff changeset	62 </tests>
9071e359b9a3 Uploaded xuebing parents: diff changeset	63 <help>
9071e359b9a3 Uploaded xuebing parents: diff changeset	64
9071e359b9a3 Uploaded xuebing parents: diff changeset	65 .. class:: infomark
9071e359b9a3 Uploaded xuebing parents: diff changeset	66
9071e359b9a3 Uploaded xuebing parents: diff changeset	67 Attribution
9071e359b9a3 Uploaded xuebing parents: diff changeset	68
9071e359b9a3 Uploaded xuebing parents: diff changeset	69 This tool relies on Plink from Shaun Purcell. For full documentation, please see his web site
9071e359b9a3 Uploaded xuebing parents: diff changeset	70 at http://pngu.mgh.harvard.edu/~purcell/plink/ where there is excellent documentation describing
9071e359b9a3 Uploaded xuebing parents: diff changeset	71 the parameters you can set here.
9071e359b9a3 Uploaded xuebing parents: diff changeset	72
9071e359b9a3 Uploaded xuebing parents: diff changeset	73 Rgenetics merely exposes them, wrapping Plink so you can use it in Galaxy.
9071e359b9a3 Uploaded xuebing parents: diff changeset	74
9071e359b9a3 Uploaded xuebing parents: diff changeset	75 Summary
9071e359b9a3 Uploaded xuebing parents: diff changeset	76
9071e359b9a3 Uploaded xuebing parents: diff changeset	77 In addition to filtering some marker and sample quality measures,
9071e359b9a3 Uploaded xuebing parents: diff changeset	78 this tool reduces the amount of overlapping information, by removing
9071e359b9a3 Uploaded xuebing parents: diff changeset	79 most of the duplicate information contained in linkage disequilibrium. This is
9071e359b9a3 Uploaded xuebing parents: diff changeset	80 a lossy process and for some methods, signal may be lost. However, this makes
9071e359b9a3 Uploaded xuebing parents: diff changeset	81 the dataset far more compact (eg 10% of the original storage size) while still
9071e359b9a3 Uploaded xuebing parents: diff changeset	82 being highly informative and less biased for some (note NOT all!) statistical methods.
9071e359b9a3 Uploaded xuebing parents: diff changeset	83 This is the Clean tool with additional data reduction via Plink LD pruning.
9071e359b9a3 Uploaded xuebing parents: diff changeset	84 Use the Clean tool if you don't want LD pruning - which you don't for most statistical testing.
9071e359b9a3 Uploaded xuebing parents: diff changeset	85 For ancestry and relatedness, you may well want LD pruned data as it has
9071e359b9a3 Uploaded xuebing parents: diff changeset	86 some specific desirable properties.
9071e359b9a3 Uploaded xuebing parents: diff changeset	87
9071e359b9a3 Uploaded xuebing parents: diff changeset	88 LD
9071e359b9a3 Uploaded xuebing parents: diff changeset	89
9071e359b9a3 Uploaded xuebing parents: diff changeset	90 Pairwise Linkage disequilibrium (LD) measures the extent to which the genotype at one locus
9071e359b9a3 Uploaded xuebing parents: diff changeset	91 predicts the state of another locus at the level of an entire population.
9071e359b9a3 Uploaded xuebing parents: diff changeset	92 When population LD between a pair of markers is high,
9071e359b9a3 Uploaded xuebing parents: diff changeset	93 knowing an individual's genotype at one locus allows confident prediction of the genotype at the other.
9071e359b9a3 Uploaded xuebing parents: diff changeset	94 In other words, high LD means information redundancy between markers. For some
9071e359b9a3 Uploaded xuebing parents: diff changeset	95 purposes, removing some of this redundancy can improve the performance of some analyses.
9071e359b9a3 Uploaded xuebing parents: diff changeset	96 Executing this tool will create a new genotype dataset in your current history containing
9071e359b9a3 Uploaded xuebing parents: diff changeset	97 LD independent markers - most of the genetic information is retained but without as much redundancy.
9071e359b9a3 Uploaded xuebing parents: diff changeset	98
9071e359b9a3 Uploaded xuebing parents: diff changeset	99 Set a pairwise LD threshold (eg r^2 = 0.2) and the (smaller) resulting dataset will have no
9071e359b9a3 Uploaded xuebing parents: diff changeset	100 pairs of marker with r^2 greater than 0.2. Additional filters are available to remove markers
9071e359b9a3 Uploaded xuebing parents: diff changeset	101 below a specific minor allele frequency, or above a specific level of missingness,
9071e359b9a3 Uploaded xuebing parents: diff changeset	102 and to remove subjects using similar criteria. Subjects and markers for family data can be
9071e359b9a3 Uploaded xuebing parents: diff changeset	103 filtered by proportions of Mendelian errors in observed transmission.
9071e359b9a3 Uploaded xuebing parents: diff changeset	104
9071e359b9a3 Uploaded xuebing parents: diff changeset	105 -----
9071e359b9a3 Uploaded xuebing parents: diff changeset	106
9071e359b9a3 Uploaded xuebing parents: diff changeset	107 Syntax
9071e359b9a3 Uploaded xuebing parents: diff changeset	108
9071e359b9a3 Uploaded xuebing parents: diff changeset	109 - Genotype data is the input pedfile chosen from available library files
9071e359b9a3 Uploaded xuebing parents: diff changeset	110 - New name is the name to use for the filtered output file
9071e359b9a3 Uploaded xuebing parents: diff changeset	111 - Missfrac threshold: subjects is the threshold for missingness by subject. Subjects with more than this fraction missing will be excluded from the import
9071e359b9a3 Uploaded xuebing parents: diff changeset	112 - Missfrac threshold: markers is the threshold for missingness by marker. Markers with more than this fraction missing will be excluded from the import
9071e359b9a3 Uploaded xuebing parents: diff changeset	113 - MaxMendel Individuals Mendel error fraction above which to exclude subjects with more than the specified fraction of mendelian errors in transmission (for family data only)
9071e359b9a3 Uploaded xuebing parents: diff changeset	114 - MaxMendel Families Mendel error fraction above which to exclude families with more than the specified fraction of mendelian errors in transmission (for family data only)
9071e359b9a3 Uploaded xuebing parents: diff changeset	115 - HWE is the threshold for HWE test p values below which the marker will not be imported. Set this to -1 and all markers will be imported regardless of HWE p value
9071e359b9a3 Uploaded xuebing parents: diff changeset	116 - MAF is the threshold for minor allele frequency - SNPs with lower MAF will be excluded
9071e359b9a3 Uploaded xuebing parents: diff changeset	117 - r^2 is the pairwise LD threshold as r^2. Lower -> less marker redundancy -> fewer markers
9071e359b9a3 Uploaded xuebing parents: diff changeset	118 - Window is the window width for LD threshold. Bigger -> slower -> more complete
9071e359b9a3 Uploaded xuebing parents: diff changeset	119 - Skip is the distance to move the window along the genome. Should be window or less.
9071e359b9a3 Uploaded xuebing parents: diff changeset	120
9071e359b9a3 Uploaded xuebing parents: diff changeset	121 -----
9071e359b9a3 Uploaded xuebing parents: diff changeset	122
9071e359b9a3 Uploaded xuebing parents: diff changeset	123 Disclaimer
9071e359b9a3 Uploaded xuebing parents: diff changeset	124
9071e359b9a3 Uploaded xuebing parents: diff changeset	125 This tool relies on Plink from Shaun Purcell. For full documentation, please see his web site
9071e359b9a3 Uploaded xuebing parents: diff changeset	126 at http://pngu.mgh.harvard.edu/~purcell/plink/ where thereis excellent documentation describing
9071e359b9a3 Uploaded xuebing parents: diff changeset	127 the parameters you can set here. Rgenetics merely exposes them, and wraps Plink so you can use it in Galaxy.
9071e359b9a3 Uploaded xuebing parents: diff changeset	128
9071e359b9a3 Uploaded xuebing parents: diff changeset	129 This tool is designed to create genotype data files with more or less LD independent sets of markers. These
9071e359b9a3 Uploaded xuebing parents: diff changeset	130 reduced genotype data files are particularly useful for purposes such as evaluating
9071e359b9a3 Uploaded xuebing parents: diff changeset	131 ancestry (eg eigenstrat) or relatedness (eg rgGRR)
9071e359b9a3 Uploaded xuebing parents: diff changeset	132
9071e359b9a3 Uploaded xuebing parents: diff changeset	133 LD pruning decreases redundancy among the genotype data by removing one of each pair of markers
9071e359b9a3 Uploaded xuebing parents: diff changeset	134 in strong LD (above the r^2 threshold) over successive genomic windows (the Window parameter),
9071e359b9a3 Uploaded xuebing parents: diff changeset	135 skipping (the Skip parameter bases between windows. The defaults should produce useable outputs.
9071e359b9a3 Uploaded xuebing parents: diff changeset	136
9071e359b9a3 Uploaded xuebing parents: diff changeset	137 This might be more efficient for rgGRR and
9071e359b9a3 Uploaded xuebing parents: diff changeset	138 eigenstrat...The core quote is
9071e359b9a3 Uploaded xuebing parents: diff changeset	139
9071e359b9a3 Uploaded xuebing parents: diff changeset	140 "This generates the same output files as the first version;
9071e359b9a3 Uploaded xuebing parents: diff changeset	141 the only difference is that a simple pairwise threshold is used.
9071e359b9a3 Uploaded xuebing parents: diff changeset	142 The first two parameters (50 and 5) are the same as above (window size and step);
9071e359b9a3 Uploaded xuebing parents: diff changeset	143 the third parameter represents the r^2 threshold.
9071e359b9a3 Uploaded xuebing parents: diff changeset	144 Note: this represents the pairwise SNP-SNP metric now, not the
9071e359b9a3 Uploaded xuebing parents: diff changeset	145 multiple correlation coefficient; also note, this is based on the
9071e359b9a3 Uploaded xuebing parents: diff changeset	146 genotypic correlation, i.e. it does not involve phasing.
9071e359b9a3 Uploaded xuebing parents: diff changeset	147 "
9071e359b9a3 Uploaded xuebing parents: diff changeset	148
9071e359b9a3 Uploaded xuebing parents: diff changeset	149 -----
9071e359b9a3 Uploaded xuebing parents: diff changeset	150
9071e359b9a3 Uploaded xuebing parents: diff changeset	151
9071e359b9a3 Uploaded xuebing parents: diff changeset	152
9071e359b9a3 Uploaded xuebing parents: diff changeset	153 This Galaxy tool was written by Ross Lazarus for the Rgenetics project
9071e359b9a3 Uploaded xuebing parents: diff changeset	154 It uses Plink for most calculations - for full Plink attribution, source code and documentation,
9071e359b9a3 Uploaded xuebing parents: diff changeset	155 please see http://pngu.mgh.harvard.edu/~purcell/plink/ plus some custom python code
9071e359b9a3 Uploaded xuebing parents: diff changeset	156
9071e359b9a3 Uploaded xuebing parents: diff changeset	157 </help>
9071e359b9a3 Uploaded xuebing parents: diff changeset	158 </tool>

Mercurial > repos > xuebing > sharplabtool

annotate tools/rgenetics/rgLDIndep.xml @ 0:9071e359b9a3