Mercurial > repos > fangly > copyrighter

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Galaxy_readme.txt	Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,5 @@
+This is an XML wrapper that provides a GUI for CopyRgihter in Galaxy (http://galaxy.psu.edu/).
+
+Place these files in your Galaxy directory. More information at http://wiki.g2.bx.psu.edu/FrontPage.
+
+Note: You can define a default CopyRighter trait database in Galaxy by defining a builtin dataset in the 'trait_db' data table.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/copyrighter.xml	Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,137 @@
+<tool id="copyrighter" name="CopyRighter" version="0.45">
+
+  <description>trait bias corrector for microbial profiles</description>
+
+  <requirements>
+    <requirement type="binary">copyrighter</requirement>
+  </requirements>
+
+  <version_string>copyrighter --version</version_string>
+
+  <command>
+    copyrighter
+      -i $input
+      -d $database.value
+      #if str($lookup):
+        -l $lookup
+      #end if
+      #if str($total) != "None":
+        -t $total
+      #end if
+      #if str($verbose):
+        -v
+      #end if
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="txt" label="Input community file" help="Text file obtained from 16S rRNA microarray, 16S rRNA amplicon sequencing or metagenomic sequencing, in biom, QIIME, GAAS, Unifrac, or generic (tabular site-by-species) format. The file must contain read counts (not percentages) and taxa must have UNALTERED taxonomic assignments." />
+    <conditional name="database">
+      <param name="specify" type="select" label="Trait database" help="Tab-delimited file of traits: 16S copy number, genome length, ...">
+        <option value="builtin">Built-in file</option>
+        <option value="uploaded">Uploaded file</option>
+      </param>
+      <when value="builtin">
+        <param name="value" type="select" label="Built-in file">
+          <options from_data_table="trait_db" />
+          <validator type="no_options" message="No built-in trait database is available"/>
+        </param>
+      </when>
+      <when value="uploaded">
+        <param name="value" type="data" format="tabular" label="Uploaded file" />
+      </when>
+    </conditional>
+    <param name="lookup" type="select" display="radio" value="desc" label="Lookup method" help="What to match when looking up the trait value of a taxon.">
+      <option value="desc">OTU name</option>
+      <option value="id">OTU ID (if recorded in your input community file)</option>
+    </param>
+    <param name="total" type="data" format="tabular" optional="true" label="Total abundance file" help="Tab-delimited file containing the total microbial abundance of each community, e.g. 16S rRNA quantitative PCR numbers to be corrected by the average 16S rRNA copy number." />
+    <param name="verbose" type="boolean" checked="no" truevalue="1" falsevalue="0" format="txt" label="Verbose" help="Display trait value assignments." />
+  </inputs>
+
+  <outputs>
+    <data format="txt" name="relative" from_work_dir="out_copyrighted.txt" label="${tool.name} from ${on_string} (relative)"/>
+    <data format="tabular" name="absolute" from_work_dir="out_copyrighted_total.tsv" label="${tool.name} from ${on_string} (absolute)">
+      <filter>str(total) != "None"</filter>
+    </data>
+    <data format="txt" name="combined" from_work_dir="out_copyrighted_combined.txt" label="${tool.name} from ${on_string} (combined)">
+      <filter>str(total) != "None"</filter>
+    </data>
+  </outputs>
+
+  <stdio>
+    <exit_code range="1:"  level="fatal" />
+    <regex match="error|exception|invalid" source="stderr" level="fatal" />
+  </stdio>
+
+  <tests>
+    <test>
+      <param name="input" value="test_data/in.qiime" />
+      <param name="specify" value="uploaded"/>
+      <param name="value" value="test_data/in_db.tsv"/>
+      <output name="relative" file="test_data/out.qiime"/>
+    </test>
+    <test>
+      <param name="input" value="test_data/in.biom"/>
+      <param name="specify"  value="uploaded"/>
+      <param name="value" value="test_data/in_db.tsv"/>
+      <output name="relative" file="test_data/out.biom" lines_diff="2"/>
+    </test>
+    <test>
+      <param name="input" value="test_data/in.qiime"/>
+      <param name="specify"  value="uploaded"/>
+      <param name="value" value="test_data/in_db.tsv"/>
+      <param name="verbose" value="yes"/>
+      <output name="relative" file="test_data/out.qiime"/>
+    </test>
+    <test>
+      <param name="input" value="test_data/in.biom"/>
+      <param name="specify"  value="uploaded"/>
+      <param name="value" value="test_data/in_db.tsv"/>
+      <param name="lookup" value="id"/>
+      <output name="relative" file="test_data/out2.biom" lines_diff="2"/>
+    </test>
+    <test>
+      <param name="input" value="test_data/in.qiime"/>
+      <param name="specify"  value="uploaded"/>
+      <param name="value" value="test_data/in_db.tsv"/>
+      <param name="total" value="test_data/in_total.tsv"/>
+      <output name="relative" file="test_data/out.qiime"/>
+      <output name="absolute" file="test_data/out_total.tsv"/>
+      <output name="combined" file="test_data/out_combined.qiime"/>
+    </test>
+    <!--<test>
+      <param name="input" value="test_data/in.biom"/>
+      <param name="specify"  value="builtin"/>
+      <param name="value" value="test_data/in_db.tsv"/>
+      <output name="relative" file="test_data/out.biom" lines_diff="2"/>
+    </test>-->
+  </tests>
+
+  <help>
+**What CopyRighter does**
+
+The genome of Bacteria and Archaea often contains several copies of the
+16S rRNA gene. This can lead to significant biases when estimating the
+composition of microbial communities using 16S rRNA amplicons or
+microarrays or their total abundance using 16S rRNA quantitative PCR,
+since species with a large number of copies will contribute
+disproportionally more 16S amplicons than species with a unique copy.
+Fortunately, it is possible to infer the copy number of unsequenced
+microbial species, based on that of close relatives that have been fully
+sequenced. Using this information, CopyRigher corrects microbial
+relative abundance by applying a weight proportional to the inverse of
+the estimated copy number to each species.
+
+In metagenomic surveys, a similar problem arises due to genome length
+variations between species, and can be corrected by CopyRighter as well.
+
+In all cases, a community file is used as input and a corrected community
+file with trait-corrected (16S rRNA gene copy number or genome length)
+relative abundances is generated. Total abundance can optionally be
+provided, corrected and combined with relative abundance estimates to
+get the absolute abundance of each species. Also the average trait value
+in each community is reported on standard output.
+  </help>
+
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test_data/in.biom	Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,46 @@
+{
+ "id": "Human microbiomes",
+ "comment": "This is an optional comment",
+ "format": "Biological Observation Matrix 0.9.1-dev",
+ "format_url": "http://biom-format.org/documentation/format_versions/biom-1.0.html",
+ "type": "OTU table",
+ "generated_by": "QIIME revision 1.4.0-dev",
+ "date": "2011-12-19T19:00:00",
+ "rows":[
+    {"id":"0", "metadata":{"taxonomy":["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__"]}},
+    {"id":"1", "metadata":{"taxonomy":["k__Bacteria", "p__Cyanobacteria", "c__Nostocophycideae", "o__Nostocales", "f__Nostocaceae", "g__Dolichospermum", "s__"]}},
+    {"id":"2", "metadata":{"taxonomy":["k__Archaea", "p__Euryarchaeota", "c__Methanomicrobia", "o__Methanosarcinales", "f__Methanosarcinaceae", "g__Methanosarcina", "s__mazei"]}},
+    {"id":"3", "metadata":{"taxonomy":["k__Bacteria", "p__Firmicutes", "c__Clostridia", "o__Halanaerobiales", "f__Halanaerobiaceae", "g__Halanaerobium", "s__"]}},
+    {"id":"4", "metadata":{"taxonomy":[]}}
+    ],
+ "columns":[
+    {"id":"Sample1", "metadata":{
+                             "BarcodeSequence":"CGCTTATCGAGA",
+                             "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+                             "BODY_SITE":"gut",
+                             "Description":"human gut"}},
+    {"id":"Sample2", "metadata":{
+                             "BarcodeSequence":"CATACCAGTAGC",
+                             "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+                             "BODY_SITE":"gut",
+                             "Description":"human gut"}},
+    {"id":"Sample3", "metadata":{
+                             "BarcodeSequence":"CTCTCTACCTGT",
+                             "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+                             "BODY_SITE":"gut",
+                             "Description":"human skin"}}
+            ],
+ "matrix_type": "sparse",
+ "matrix_element_type": "int",
+ "shape": [5, 3],
+ "data":[[0,2,4],
+         [1,0,5],
+         [1,1,3],
+         [2,2,3],
+         [3,0,2],
+         [3,1,2],
+         [3,2,2],
+         [4,1,1],
+         [4,2,1]
+        ]
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test_data/in.qiime	Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,7 @@
+# QIIME v1.3.0 OTU table
+#OTU ID	Sample1	Sample2	Sample3	Consensus Lineage
+0	0	0	4	k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__Escherichia; s__
+1	5	3	0	k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae; g__Dolichospermum; s__
+2	0	0	3	k__Archaea; p__Euryarchaeota; c__Methanomicrobia; o__Methanosarcinales; f__Methanosarcinaceae; g__Methanosarcina; s__mazei
+3	2	2	2	k__Bacteria; p__Firmicutes; c__Clostridia; o__Halanaerobiales; f__Halanaerobiaceae; g__Halanaerobium; s__
+4	0	1	1	No blast hit
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test_data/in_db.tsv	Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,12 @@
+# ID	16S rRNA count
+0	4
+1	3.31
+2	4.5
+3	4.98
+
+# tax_string	16S rRNA count
+k__Archaea; p__Euryarchaeota; c__Methanomicrobia; o__Methanosarcinales; f__Methanosarcinaceae; g__Methanosarcina; s__mazei	3
+k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae; g__Dolichospermum; s__	3.85824942205532
+k__Bacteria; p__Firmicutes; c__Clostridia; o__Halanaerobiales; f__Halanaerobiaceae; g__Halanaerobium; s__	4
+k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__Escherichia; s__	6.80611715914982
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test_data/in_total.tsv	Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,3 @@
+Sample1	142
+Sample2	1.31e3
+Sample3	215.3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test_data/out.biom	Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,137 @@
+{
+   "generated_by" : "Bio::Community version 0.1",
+   "matrix_type" : "sparse",
+   "date" : "2013-07-27T03:34:40",
+   "data" : [
+      [
+         0,
+         0,
+         72.1591861673745
+      ],
+      [
+         1,
+         0,
+         27.8408138326255
+      ],
+      [
+         0,
+         1,
+         50.7189439619348
+      ],
+      [
+         2,
+         1,
+         16.6666666666667
+      ],
+      [
+         1,
+         1,
+         32.6143893713985
+      ],
+      [
+         2,
+         2,
+         10
+      ],
+      [
+         1,
+         2,
+         21.5547529141356
+      ],
+      [
+         3,
+         2,
+         25.3357412575932
+      ],
+      [
+         4,
+         2,
+         43.1095058282712
+      ]
+   ],
+   "rows" : [
+      {
+         "metadata" : {
+            "taxonomy" : [
+               "k__Bacteria",
+               "p__Cyanobacteria",
+               "c__Nostocophycideae",
+               "o__Nostocales",
+               "f__Nostocaceae",
+               "g__Dolichospermum",
+               "s__"
+            ]
+         },
+         "id" : "1"
+      },
+      {
+         "metadata" : {
+            "taxonomy" : [
+               "k__Bacteria",
+               "p__Firmicutes",
+               "c__Clostridia",
+               "o__Halanaerobiales",
+               "f__Halanaerobiaceae",
+               "g__Halanaerobium",
+               "s__"
+            ]
+         },
+         "id" : "3"
+      },
+      {
+         "metadata" : null,
+         "id" : "4"
+      },
+      {
+         "metadata" : {
+            "taxonomy" : [
+               "k__Bacteria",
+               "p__Proteobacteria",
+               "c__Gammaproteobacteria",
+               "o__Enterobacteriales",
+               "f__Enterobacteriaceae",
+               "g__Escherichia",
+               "s__"
+            ]
+         },
+         "id" : "0"
+      },
+      {
+         "metadata" : {
+            "taxonomy" : [
+               "k__Archaea",
+               "p__Euryarchaeota",
+               "c__Methanomicrobia",
+               "o__Methanosarcinales",
+               "f__Methanosarcinaceae",
+               "g__Methanosarcina",
+               "s__mazei"
+            ]
+         },
+         "id" : "2"
+      }
+   ],
+   "matrix_element_type" : "float",
+   "format_url" : "http://biom-format.org/documentation/format_versions/biom-1.0.html",
+   "format" : "Biological Observation Matrix 1.0",
+   "columns" : [
+      {
+         "metadata" : null,
+         "id" : "Sample1"
+      },
+      {
+         "metadata" : null,
+         "id" : "Sample2"
+      },
+      {
+         "metadata" : null,
+         "id" : "Sample3"
+      }
+   ],
+   "shape" : [
+      5,
+      3
+   ],
+   "id" : "",
+   "type" : "OTU table"
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test_data/out.qiime	Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,7 @@
+# QIIME v1.3.0 OTU table
+#OTU ID	Sample1	Sample2	Sample3	Consensus Lineage
+1	72.1591861673745	50.7189439619348	0	k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae; g__Dolichospermum; s__
+3	27.8408138326255	32.6143893713985	21.5547529141356	k__Bacteria; p__Firmicutes; c__Clostridia; o__Halanaerobiales; f__Halanaerobiaceae; g__Halanaerobium; s__
+4	0	16.6666666666667	10	No blast hit
+0	0	0	25.3357412575932	k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__Escherichia; s__
+2	0	0	43.1095058282712	k__Archaea; p__Euryarchaeota; c__Methanomicrobia; o__Methanosarcinales; f__Methanosarcinaceae; g__Methanosarcina; s__mazei
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test_data/out2.biom	Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,137 @@
+{
+   "generated_by" : "Bio::Community version 0.1",
+   "matrix_type" : "sparse",
+   "date" : "2013-07-27T03:32:37",
+   "data" : [
+      [
+         0,
+         0,
+         78.997461928934
+      ],
+      [
+         1,
+         0,
+         21.002538071066
+      ],
+      [
+         0,
+         1,
+         57.7458256029685
+      ],
+      [
+         2,
+         1,
+         16.6666666666667
+      ],
+      [
+         1,
+         1,
+         25.5875077303649
+      ],
+      [
+         2,
+         2,
+         10
+      ],
+      [
+         1,
+         2,
+         17.4757281553398
+      ],
+      [
+         3,
+         2,
+         43.5145631067961
+      ],
+      [
+         4,
+         2,
+         29.0097087378641
+      ]
+   ],
+   "rows" : [
+      {
+         "metadata" : {
+            "taxonomy" : [
+               "k__Bacteria",
+               "p__Cyanobacteria",
+               "c__Nostocophycideae",
+               "o__Nostocales",
+               "f__Nostocaceae",
+               "g__Dolichospermum",
+               "s__"
+            ]
+         },
+         "id" : "1"
+      },
+      {
+         "metadata" : {
+            "taxonomy" : [
+               "k__Bacteria",
+               "p__Firmicutes",
+               "c__Clostridia",
+               "o__Halanaerobiales",
+               "f__Halanaerobiaceae",
+               "g__Halanaerobium",
+               "s__"
+            ]
+         },
+         "id" : "3"
+      },
+      {
+         "metadata" : null,
+         "id" : "4"
+      },
+      {
+         "metadata" : {
+            "taxonomy" : [
+               "k__Bacteria",
+               "p__Proteobacteria",
+               "c__Gammaproteobacteria",
+               "o__Enterobacteriales",
+               "f__Enterobacteriaceae",
+               "g__Escherichia",
+               "s__"
+            ]
+         },
+         "id" : "0"
+      },
+      {
+         "metadata" : {
+            "taxonomy" : [
+               "k__Archaea",
+               "p__Euryarchaeota",
+               "c__Methanomicrobia",
+               "o__Methanosarcinales",
+               "f__Methanosarcinaceae",
+               "g__Methanosarcina",
+               "s__mazei"
+            ]
+         },
+         "id" : "2"
+      }
+   ],
+   "matrix_element_type" : "float",
+   "format_url" : "http://biom-format.org/documentation/format_versions/biom-1.0.html",
+   "format" : "Biological Observation Matrix 1.0",
+   "columns" : [
+      {
+         "metadata" : null,
+         "id" : "Sample1"
+      },
+      {
+         "metadata" : null,
+         "id" : "Sample2"
+      },
+      {
+         "metadata" : null,
+         "id" : "Sample3"
+      }
+   ],
+   "shape" : [
+      5,
+      3
+   ],
+   "id" : "",
+   "type" : "OTU table"
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test_data/out_combined.qiime	Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,7 @@
+# QIIME v1.3.0 OTU table
+#OTU ID	Sample1	Sample2	Sample3	Consensus Lineage
+1	26.2887543891705	169.766111090629	0	k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae; g__Dolichospermum; s__
+3	10.1428571428571	109.166666666667	10.765	k__Bacteria; p__Firmicutes; c__Clostridia; o__Halanaerobiales; f__Halanaerobiaceae; g__Halanaerobium; s__
+4	0	55.7865555514592	4.99425813085536	No blast hit
+0	0	0	12.6533231776982	k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__Escherichia; s__
+2	0	0	21.53	k__Archaea; p__Euryarchaeota; c__Methanomicrobia; o__Methanosarcinales; f__Methanosarcinaceae; g__Methanosarcina; s__mazei
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test_data/out_total.tsv	Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,3 @@
+Sample1	36.4316115320277
+Sample2	334.719333308755
+Sample3	49.9425813085536
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/trait_db.loc.sample	Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,20 @@
+# File of pre-defined trait databases for CopyRighter
+#
+# This file defines the locations and of available database files of microbial
+# traits needed to run CopyRighter. Edit this file by adding the name and
+# location of the databases you want (tab-delimited!) and move the file to the
+# tool-data/ directory. Then add this to your tool_data_table_conf.xml file:
+#
+#    <!-- Locations of database files for CopyRighter -->
+#    <table name="trait_db" comment_char="#">
+#        <columns>id, name, value</columns>
+#        <file path="tool-data/trait_db.loc" />
+#    </table>
+#
+
+# id	name	path
+ssu_img40_gg201210	Short ribosomal subunit (16S) data (IMG 4.0, Greengenes 2012/10)	/path/to/ssu_img40_gg201210.txt
+genlength_img40_gg201210	Genome length data (IMG 4.0, Greengenes 2012/10)	/path/to/genlength_img40_gg201210.txt
+test_invalid	Invalid database	/path/to/db.txt
+
+