0
|
1 <tool id="rgQC1" name="QC reports:">
|
|
2
|
|
3 <description>Marker and Subject measures</description>
|
|
4
|
|
5 <command interpreter="python">
|
|
6 rgQC.py -i '$input_file.extra_files_path/$input_file.metadata.base_name' -o "$title"
|
|
7 -s '$html_file' -p '$html_file.files_path'
|
|
8 </command>
|
|
9
|
|
10 <inputs>
|
|
11 <param name="input_file" type="data" label="RGenetics genotype file in compressed Plink format"
|
|
12 size="80" format="pbed" />
|
|
13 <param name="title" size="80" type="text" value="RgQC report" label="Descriptive report title"/>
|
|
14 </inputs>
|
|
15
|
|
16 <outputs>
|
|
17 <data format="html" name="html_file" metadata_source="input_file" label="${title}.html"/>
|
|
18 </outputs>
|
|
19
|
|
20 <tests>
|
|
21 <test>
|
|
22 <param name='input_file' value='tinywga' ftype='pbed' >
|
|
23 <metadata name='base_name' value='tinywga' />
|
|
24 <composite_data value='tinywga.bim' />
|
|
25 <composite_data value='tinywga.bed' />
|
|
26 <composite_data value='tinywga.fam' />
|
|
27 <edit_attributes type='name' value='tinywga' />
|
|
28 </param>
|
|
29 <param name='title' value='rgQCtest1' />
|
|
30 <output name='html_file' file='rgtestouts/rgQC/rgQCtest1.html' ftype='html' lines_diff='300'>
|
|
31 <param name="dbkey" value="hg18" />
|
|
32 <extra_files type="file" name='tinywga_All_Paged.pdf' value="rgtestouts/rgQC/tinywga_All_Paged.pdf" compare="sim_size" delta = "100000"/>
|
|
33 <extra_files type="file" name='tinywga.log' value="rgtestouts/rgQC/tinywga.log" compare="diff" lines_diff="15"/>
|
|
34 <extra_files type="file" name='tinywga.frq' value="rgtestouts/rgQC/tinywga.frq" compare="diff" />
|
|
35 <extra_files type="file" name='tinywga.het' value="rgtestouts/rgQC/tinywga.het" compare="diff" lines_diff="90"/>
|
|
36 <extra_files type="file" name='tinywga.hwe' value="rgtestouts/rgQC/tinywga.hwe" compare="diff" lines_diff="90"/>
|
|
37 <extra_files type="file" name='tinywga.imendel' value="rgtestouts/rgQC/tinywga.imendel" compare="diff"/>
|
|
38 <extra_files type="file" name='tinywga.imiss' value="rgtestouts/rgQC/tinywga.imiss" compare="diff" />
|
|
39 <extra_files type="file" name='tinywga.lmendel' value="rgtestouts/rgQC/tinywga.lmendel" compare="diff" />
|
|
40 <extra_files type="file" name='tinywga.lmiss' value="rgtestouts/rgQC/tinywga.lmiss" compare="diff" />
|
|
41 <extra_files type="file" name='tinywga_All_3x3.pdf' value="rgtestouts/rgQC/tinywga_All_3x3.pdf" compare="sim_size" delta="100000"/>
|
|
42 <extra_files type="file" name='ldp_tinywga.bed' value="rgtestouts/rgQC/ldp_tinywga.bed" compare="diff" lines_diff="10" />
|
|
43 <extra_files type="file" name='ldp_tinywga.bim' value="rgtestouts/rgQC/ldp_tinywga.bim" compare="sim_size" delta="1000" />
|
|
44 <extra_files type="file" name='ldp_tinywga.fam' value="rgtestouts/rgQC/ldp_tinywga.fam" compare="diff" />
|
|
45 <extra_files type="file" name='ldp_tinywga.log' value="rgtestouts/rgQC/ldp_tinywga.log" compare="diff" lines_diff="20"/>
|
|
46 <extra_files type="file" name='Ranked_Marker_HWE.xls' value="rgtestouts/rgQC/Ranked_Marker_HWE.xls" compare="diff" />
|
|
47 <extra_files type="file" name='Ranked_Marker_MAF.xls' value="rgtestouts/rgQC/Ranked_Marker_MAF.xls" compare="diff" />
|
|
48 <extra_files type="file" name='Ranked_Marker_Missing_Genotype.xls' value="rgtestouts/rgQC/Ranked_Marker_Missing_Genotype.xls" compare="diff" lines_diff="5"/>
|
|
49 <extra_files type="file" name='Ranked_Subject_Missing_Genotype.xls' value="rgtestouts/rgQC/Ranked_Subject_Missing_Genotype.xls" compare="diff" lines_diff="40"/>
|
|
50 <extra_files type="file" name='tinywga_fracmiss_cum.jpg' value="rgtestouts/rgQC/tinywga_fracmiss_cum.jpg" compare="sim_size" delta = "20000"/>
|
|
51 <extra_files type="file" name='tinywga_fracmiss_cum.pdf' value="rgtestouts/rgQC/tinywga_fracmiss_cum.pdf" compare="sim_size" delta = "100000"/>
|
|
52 </output>
|
|
53 </test>
|
|
54 </tests>
|
|
55 <help>
|
|
56
|
|
57 .. class:: infomark
|
|
58
|
|
59 **Summary**
|
|
60
|
|
61 This tool prepares an extensive and comprehensive series of reports for quality control checking of SNP genotypes from any arbitrary
|
|
62 genotyping experiment. Designed for family based data, so includes optional reports on Mendelian errors by
|
|
63 subject and by marker.
|
|
64
|
|
65 The outputs include histograms and boxplots for missingness, maf, mendel counts and hwe by marker, and the ones that make sense by
|
|
66 subject. The report is built as a single web page containing links to the summary marker and subject files.
|
|
67
|
|
68 The F (inbreeding) statistic is calculated using a somewhat LD independent group of genotypes
|
|
69 The Plink used is --indep-pairwise 40 20 0.5 until we make it configurable.
|
|
70 High heterozygosity might mean contaminated sample - more than one DNA. Low heterozygosity might mean inbreeding as in strains
|
|
71 of mice.
|
|
72
|
|
73 If the data file you want is missing from the option list above,
|
|
74 you will first need to "import" it so it will be available here. Files available in the system library
|
|
75 can be imported by selecting and completing the "Import ped/map" choice from the Get Data tool group at the top of the Galaxy
|
|
76 menu. Your system administrator will be responsible for adding files to the system library.
|
|
77
|
|
78 -----
|
|
79
|
|
80 .. class:: infomark
|
|
81
|
|
82 **Syntax**
|
|
83
|
|
84 - **Genotype file** is the input pedfile -
|
|
85 - **Prefix** is a string used to name all of the outputs
|
|
86
|
|
87 -----
|
|
88
|
|
89 **Attribution**
|
|
90
|
|
91 This Galaxy tool was written by Ross Lazarus for the Rgenetics project
|
|
92 The current version uses Plink for most calculations and R for plotting - for full Plink attribution, source code and documentation,
|
|
93 please see http://pngu.mgh.harvard.edu/~purcell/plink/ while R attribution and source code can be found at http://r-project.org
|
|
94
|
|
95 Shaun Purcell provides the documentation you need specific to those settings, at
|
|
96 http://pngu.mgh.harvard.edu/~purcell/plink/anal.shtml#glm
|
|
97
|
|
98 Tool and Galaxy datatypes originally designed and written for the Rgenetics
|
|
99 series of whole genome scale statistical genetics tools by ross lazarus (ross.lazarus@gmail.com)
|
|
100 Shaun Purcell created and maintains Plink, while a cast of many maintain R.
|
|
101
|
|
102 Please acknowledge your use of this tool, Galaxy, R and Plink in your publications and let
|
|
103 us know so we can keep track. These tools all rely on highly competitive grant funding
|
|
104 so your letting us know about publications is important to our ongoing support.
|
|
105
|
|
106 </help>
|
|
107
|
|
108
|
|
109
|
|
110 </tool>
|