Mercurial > repos > xuebing > sharplabtool
diff tools/rgenetics/rgGLM.xml @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/rgenetics/rgGLM.xml Fri Mar 09 19:37:19 2012 -0500 @@ -0,0 +1,146 @@ +<tool id="rgGLM1" name="Linear Models:" version="0.2"> + <description>for genotype data</description> + <code file="rgGLM_code.py"/> + <command interpreter="python"> + rgGLM.py '$i.extra_files_path/$i.metadata.base_name' '$phef.extra_files_path/$phef.metadata.base_name' + "$title" '$predvar' '$covar' '$out_file1' '$logf' '$i.metadata.base_name' + '$inter' '$cond' '$gender' '$mind' '$geno' '$maf' '$logistic' '$gffout' + </command> + + <inputs> + <page> + <param name='title' label='Title for outputs' type='text' value='GLM' size="80" /> + <param name="i" type="data" format="pbed" label="Genotype file" size="80" /> + <param name="phef" type="data" format="pphe" label="Phenotype file" size="80" + help="Dependent variable and covariates will be chosen from this file on the next page"/> + <param name="logistic" type="text" value = "0" label="1=Use a logistic model (trait must be 1/2 coded like affection)" + help="Please read the Plink documentation about this option" /> + <param name="gender" type="text" value = "0" label="1=Add a gender term to model" /> + <param name='inter' label='1=Build an interaction model - please read the docs carefully before using this' + type='text' value='0' size="1" /> + <param name="cond" type="text" area='true' size='15x20' value = "" + label="condition on this whitespace delimited rs (snp id) list" /> + <param name="mind" type="float" value = "0.1" label="Remove subjects with missing genotypes gt (eg 0.1)" + help = "Set to 1 to include all subjects in the input file" /> + <param name="geno" type="float" value = "0.1" label="Remove markers with missing genotypes gt (eg 0.1)" + help = "Set to 1 to include all markers in the input file" /> + <param name="maf" type="float" value = "0.01" label="Remove markers with MAF lt (eg 0.01) " + help = "Set to 0 to include all markers in the input file"/> + </page> + <page> + <param name="predvar" size="80" type="select" label="Dependent Trait" + dynamic_options="get_phecols(phef=phef,selectOne=1)" display="radio" multiple="false" + help="Model this characteristic in terms of subject snp genotypes - eg rare allele dosage for additive model" /> + <param name="covar" size="80" type="select" label="Covariates" + dynamic_options="get_phecols(phef=phef,selectOne=0)" multiple="true" display="checkboxes" + help="Use these phenotypes as covariates in models of snp dosage effects on the dependent trait"/> + </page> + </inputs> + + <outputs> + <data format="tabular" name="out_file1" label="${title}_rgGLM.xls"/> + <data format="txt" name="logf" label="${title}_rgGLMlog.txt" /> + <data format="gff" name="gffout" label="${title}_rgGLM.gff"/> + </outputs> +<tests> + <test> + <param name='i' value='tinywga' ftype='pbed' > + <metadata name='base_name' value='tinywga' /> + <composite_data value='tinywga.bim' /> + <composite_data value='tinywga.bed' /> + <composite_data value='tinywga.fam' /> + <edit_attributes type='name' value='tinywga' /> + </param> + <param name='phef' value='tinywga' ftype='pphe' > + <metadata name='base_name' value='tinywga' /> + <composite_data value='tinywga.pphe' /> + <edit_attributes type='name' value='tinywga' /> + </param> + <param name='title' value='rgGLMtest1' /> + <param name='predvar' value='c1' /> + <param name='covar' value='None' /> + <param name='inter' value='0' /> + <param name='cond' value='' /> + <param name='gender' value='0' /> + <param name='mind' value='1.0' /> + <param name='geno' value='1.0' /> + <param name='maf' value='0.0' /> + <param name='logistic' value='0' /> + <output name='out_file1' file='rgGLMtest1_GLM.xls' ftype='tabular' compare="diff" /> + <output name='logf' file='rgGLMtest1_GLM_log.txt' ftype='txt' compare="diff" lines_diff='36'/> + <output name='gffout' file='rgGLMtest1_GLM_topTable.gff' compare="diff" ftype='gff' /> + </test> +</tests> +<help> + +.. class:: infomark + +**Syntax** + +Note this is a two form tool - you will choose the dependent trait and covariates +on the second page based on the phenotype file you choose on the first page + +- **Genotype file** is the input Plink format compressed genotype (pbed) file +- **Phenotype file** is the input Plink phenotype (pphe) file with FAMID IID followed by phenotypes +- **Dependant variable** is the term on the left of the model and is chosen from the pphe columns on the second page +- **Logistic** if you are (eg) using disease status as the outcome variable (case/control) - otherwise the model is linear. +- **Covariates** are covariate terms on the right of the model, also chosen on the second page +- **Interactions** will add interactions - please be careful how you interpret these - see the Plink documentation. +- **Gender** will add gender as a model term - described in the Plink documentation +- **Condition** will condition the model on one or more specific SNP rs ids as a whitespace delimited sequence +- **Format** determines how your data will be returned to your Galaxy workspace + +----- + +.. class:: infomark + +**Summary** + +This tool will test GLM models for SNP predicting a dependent phenotype +variable with adjustment for specified covariates. + +If you don't see the genotype or phenotype data set you want here, it can be imported using +one of the methods available from the rg get data tool group. + +Output format can be UCSC .bed if you want to see one column of your +results as a fully fledged UCSC genome browser track. A map file containing the chromosome and offset for each marker is +required for writing this kind of output. +Alternatively you can use .gg for the UCSC Genome Graphs tool which has all of the advantages +of the the .bed track, plus a neat, visual front end that displays a lot of useful clues. +Either of these are a very useful way of quickly getting a look +at your data in full genomic context. + +Finally, if you can't live without +spreadsheet data, choose the .xls tab delimited format. It's not a stupid binary excel file. Just a plain old tab +delimited +one with a header. Fortunately excel is dumb enough to open these without much protest. + +----- + +.. class:: infomark + +**Attribution** + +This Galaxy tool relies on Plink (see Plinksrc_) to test GLM models. + +So, we rely on the author (Shaun Purcell) for the documentation you need specific to those settings - they are very nicely documented - see +DOC_ + +Tool and Galaxy datatypes originally designed and written for the Rgenetics +series of whole genome scale statistical genetics tools by ross lazarus (ross.lazarus@gmail.com) + +Copyright Ross Lazarus March 2007 +This Galaxy wrapper is released licensed under the LGPL_ but is about as useful as a chocolate teapot without Plink which is GPL. + +I'm no lawyer, but it looks like you got GPL if you use this software. Good luck. + +.. _Plinksrc: http://pngu.mgh.harvard.edu/~purcell/plink/ + +.. _LGPL: http://www.gnu.org/copyleft/lesser.html + +.. _DOC: http://pngu.mgh.harvard.edu/~purcell/plink/anal.shtml#glm + +</help> +</tool> + +