diff aggregate_gd_indivs.xml @ 13:fdb4240fb565

Uploaded Miller Lab Devshed version a51c894f5bed
author miller-lab
date Fri, 28 Sep 2012 11:34:31 -0400
parents
children f04f40a36cc8
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/aggregate_gd_indivs.xml	Fri Sep 28 11:34:31 2012 -0400
@@ -0,0 +1,86 @@
+<tool id="gd_sum_gd_snp" name="Aggregate Individuals" version="1.0.0">
+  <description>: Append summary columns for a population</description>
+
+  <command interpreter="python">
+    modify_snp_table.py "$input" "$p1_input" "$output" "-1" "-1" "-1" "-1"
+    #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
+        #set $arg = '%s:%s' % ($individual_col, $individual)
+        "$arg"
+    #end for
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_snp" label="SNP dataset" />
+    <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="gd_snp" metadata_source="input" />
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" />
+      <param name="choice" value="1" />
+      <param name="lo_coverage" value="0" />
+      <param name="hi_coverage" value="1000" />
+      <param name="low_ind_cov" value="3" />
+      <param name="lo_quality" value="30" />
+      <output name="output" file="test_out/modify_snp_table/modify.gd_snp" />
+    </test>
+  </tests>
+
+  <help>
+
+**Dataset formats**
+
+The input datasets are in gd_snp_ and gd_indivs_ formats.
+The output dataset is in gd_snp_ format.  (`Dataset missing?`_)
+
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _gd_indivs: ./static/formatHelp.html#gd_indivs
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+The user specifies that some of the individuals in a gd_snp dataset form a
+"population", by supplying a list that has been previously created using the
+Specify Individuals tool.  The program appends a
+new "entity" (set of four columns) to the gd_snp table, analogous to the columns
+for an individual but containing summary data for the population as a group.
+These four columns give the total counts for the two alleles, the "genotype" for
+the population, and the maximum quality value, taken over all individuals in the
+population.  If all defined genotypes in the population are 2 (agree with the
+reference), then the population's genotype is 2, and similarly for 0; otherwise
+the genotype is 1 (unless all individuals have undefined genotype, in which case
+it is -1).
+
+-----
+
+**Example**
+
+- input gd_snp::
+
+    Contig161_chr1_4641264_4641879   115  C  T  73.5   chr1   4641382  C   6  0  2  45   8  0  2  51   15  0  2  72   5  0  2  42   6  0  2  45   10  0  2  57   Y  54  0.323  0
+    Contig48_chr1_10150253_10151311   11  A  G  94.3   chr1  10150264  A   1  0  2  30   1  0  2  30    1  0  2  30   3  0  2  36   1  0  2  30    1  0  2  30   Y  22  +99.   0
+    Contig20_chr1_21313469_21313570   66  C  T  54.0   chr1  21313534  C   4  0  2  39   4  0  2  39    5  0  2  42   4  0  2  39   4  0  2  39    5  0  2  42   N   1  +99.   0
+    etc.
+
+- input individuals::
+
+    9   PB1
+    13  PB2
+    17  PB3
+
+- output::
+
+    Contig161_chr1_4641264_4641879   115  C  T  73.5   chr1   4641382  C   6  0  2  45   8  0  2  51   15  0  2  72   5  0  2  42   6  0  2  45   10  0  2  57   Y  54  0.323  0   29  0  2  72
+    Contig48_chr1_10150253_10151311   11  A  G  94.3   chr1  10150264  A   1  0  2  30   1  0  2  30    1  0  2  30   3  0  2  36   1  0  2  30    1  0  2  30   Y  22  +99.   0    3  0  2  30
+    Contig20_chr1_21313469_21313570   66  C  T  54.0   chr1  21313534  C   4  0  2  39   4  0  2  39    5  0  2  42   4  0  2  39   4  0  2  39    5  0  2  42   N   1  +99.   0   13  0  2  42
+    etc.
+
+  </help>
+</tool>