# HG changeset patch
# User fangly
# Date 1375095156 14400
# Node ID 7a7ecf9b9df77089d548902d60f7e408802813a4
Initial upload
diff -r 000000000000 -r 7a7ecf9b9df7 Galaxy_readme.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Galaxy_readme.txt Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,5 @@
+This is an XML wrapper that provides a GUI for CopyRgihter in Galaxy (http://galaxy.psu.edu/).
+
+Place these files in your Galaxy directory. More information at http://wiki.g2.bx.psu.edu/FrontPage.
+
+Note: You can define a default CopyRighter trait database in Galaxy by defining a builtin dataset in the 'trait_db' data table.
diff -r 000000000000 -r 7a7ecf9b9df7 copyrighter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/copyrighter.xml Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,137 @@
+
+
+ trait bias corrector for microbial profiles
+
+
+ copyrighter
+
+
+ copyrighter --version
+
+
+ copyrighter
+ -i $input
+ -d $database.value
+ #if str($lookup):
+ -l $lookup
+ #end if
+ #if str($total) != "None":
+ -t $total
+ #end if
+ #if str($verbose):
+ -v
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ str(total) != "None"
+
+
+ str(total) != "None"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What CopyRighter does**
+
+The genome of Bacteria and Archaea often contains several copies of the
+16S rRNA gene. This can lead to significant biases when estimating the
+composition of microbial communities using 16S rRNA amplicons or
+microarrays or their total abundance using 16S rRNA quantitative PCR,
+since species with a large number of copies will contribute
+disproportionally more 16S amplicons than species with a unique copy.
+Fortunately, it is possible to infer the copy number of unsequenced
+microbial species, based on that of close relatives that have been fully
+sequenced. Using this information, CopyRigher corrects microbial
+relative abundance by applying a weight proportional to the inverse of
+the estimated copy number to each species.
+
+In metagenomic surveys, a similar problem arises due to genome length
+variations between species, and can be corrected by CopyRighter as well.
+
+In all cases, a community file is used as input and a corrected community
+file with trait-corrected (16S rRNA gene copy number or genome length)
+relative abundances is generated. Total abundance can optionally be
+provided, corrected and combined with relative abundance estimates to
+get the absolute abundance of each species. Also the average trait value
+in each community is reported on standard output.
+
+
+
+
diff -r 000000000000 -r 7a7ecf9b9df7 test_data/in.biom
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test_data/in.biom Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,46 @@
+{
+ "id": "Human microbiomes",
+ "comment": "This is an optional comment",
+ "format": "Biological Observation Matrix 0.9.1-dev",
+ "format_url": "http://biom-format.org/documentation/format_versions/biom-1.0.html",
+ "type": "OTU table",
+ "generated_by": "QIIME revision 1.4.0-dev",
+ "date": "2011-12-19T19:00:00",
+ "rows":[
+ {"id":"0", "metadata":{"taxonomy":["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__"]}},
+ {"id":"1", "metadata":{"taxonomy":["k__Bacteria", "p__Cyanobacteria", "c__Nostocophycideae", "o__Nostocales", "f__Nostocaceae", "g__Dolichospermum", "s__"]}},
+ {"id":"2", "metadata":{"taxonomy":["k__Archaea", "p__Euryarchaeota", "c__Methanomicrobia", "o__Methanosarcinales", "f__Methanosarcinaceae", "g__Methanosarcina", "s__mazei"]}},
+ {"id":"3", "metadata":{"taxonomy":["k__Bacteria", "p__Firmicutes", "c__Clostridia", "o__Halanaerobiales", "f__Halanaerobiaceae", "g__Halanaerobium", "s__"]}},
+ {"id":"4", "metadata":{"taxonomy":[]}}
+ ],
+ "columns":[
+ {"id":"Sample1", "metadata":{
+ "BarcodeSequence":"CGCTTATCGAGA",
+ "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+ "BODY_SITE":"gut",
+ "Description":"human gut"}},
+ {"id":"Sample2", "metadata":{
+ "BarcodeSequence":"CATACCAGTAGC",
+ "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+ "BODY_SITE":"gut",
+ "Description":"human gut"}},
+ {"id":"Sample3", "metadata":{
+ "BarcodeSequence":"CTCTCTACCTGT",
+ "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+ "BODY_SITE":"gut",
+ "Description":"human skin"}}
+ ],
+ "matrix_type": "sparse",
+ "matrix_element_type": "int",
+ "shape": [5, 3],
+ "data":[[0,2,4],
+ [1,0,5],
+ [1,1,3],
+ [2,2,3],
+ [3,0,2],
+ [3,1,2],
+ [3,2,2],
+ [4,1,1],
+ [4,2,1]
+ ]
+}
diff -r 000000000000 -r 7a7ecf9b9df7 test_data/in.qiime
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test_data/in.qiime Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,7 @@
+# QIIME v1.3.0 OTU table
+#OTU ID Sample1 Sample2 Sample3 Consensus Lineage
+0 0 0 4 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__Escherichia; s__
+1 5 3 0 k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae; g__Dolichospermum; s__
+2 0 0 3 k__Archaea; p__Euryarchaeota; c__Methanomicrobia; o__Methanosarcinales; f__Methanosarcinaceae; g__Methanosarcina; s__mazei
+3 2 2 2 k__Bacteria; p__Firmicutes; c__Clostridia; o__Halanaerobiales; f__Halanaerobiaceae; g__Halanaerobium; s__
+4 0 1 1 No blast hit
diff -r 000000000000 -r 7a7ecf9b9df7 test_data/in_db.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test_data/in_db.tsv Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,12 @@
+# ID 16S rRNA count
+0 4
+1 3.31
+2 4.5
+3 4.98
+
+# tax_string 16S rRNA count
+k__Archaea; p__Euryarchaeota; c__Methanomicrobia; o__Methanosarcinales; f__Methanosarcinaceae; g__Methanosarcina; s__mazei 3
+k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae; g__Dolichospermum; s__ 3.85824942205532
+k__Bacteria; p__Firmicutes; c__Clostridia; o__Halanaerobiales; f__Halanaerobiaceae; g__Halanaerobium; s__ 4
+k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__Escherichia; s__ 6.80611715914982
+
diff -r 000000000000 -r 7a7ecf9b9df7 test_data/in_total.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test_data/in_total.tsv Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,3 @@
+Sample1 142
+Sample2 1.31e3
+Sample3 215.3
diff -r 000000000000 -r 7a7ecf9b9df7 test_data/out.biom
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test_data/out.biom Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,137 @@
+{
+ "generated_by" : "Bio::Community version 0.1",
+ "matrix_type" : "sparse",
+ "date" : "2013-07-27T03:34:40",
+ "data" : [
+ [
+ 0,
+ 0,
+ 72.1591861673745
+ ],
+ [
+ 1,
+ 0,
+ 27.8408138326255
+ ],
+ [
+ 0,
+ 1,
+ 50.7189439619348
+ ],
+ [
+ 2,
+ 1,
+ 16.6666666666667
+ ],
+ [
+ 1,
+ 1,
+ 32.6143893713985
+ ],
+ [
+ 2,
+ 2,
+ 10
+ ],
+ [
+ 1,
+ 2,
+ 21.5547529141356
+ ],
+ [
+ 3,
+ 2,
+ 25.3357412575932
+ ],
+ [
+ 4,
+ 2,
+ 43.1095058282712
+ ]
+ ],
+ "rows" : [
+ {
+ "metadata" : {
+ "taxonomy" : [
+ "k__Bacteria",
+ "p__Cyanobacteria",
+ "c__Nostocophycideae",
+ "o__Nostocales",
+ "f__Nostocaceae",
+ "g__Dolichospermum",
+ "s__"
+ ]
+ },
+ "id" : "1"
+ },
+ {
+ "metadata" : {
+ "taxonomy" : [
+ "k__Bacteria",
+ "p__Firmicutes",
+ "c__Clostridia",
+ "o__Halanaerobiales",
+ "f__Halanaerobiaceae",
+ "g__Halanaerobium",
+ "s__"
+ ]
+ },
+ "id" : "3"
+ },
+ {
+ "metadata" : null,
+ "id" : "4"
+ },
+ {
+ "metadata" : {
+ "taxonomy" : [
+ "k__Bacteria",
+ "p__Proteobacteria",
+ "c__Gammaproteobacteria",
+ "o__Enterobacteriales",
+ "f__Enterobacteriaceae",
+ "g__Escherichia",
+ "s__"
+ ]
+ },
+ "id" : "0"
+ },
+ {
+ "metadata" : {
+ "taxonomy" : [
+ "k__Archaea",
+ "p__Euryarchaeota",
+ "c__Methanomicrobia",
+ "o__Methanosarcinales",
+ "f__Methanosarcinaceae",
+ "g__Methanosarcina",
+ "s__mazei"
+ ]
+ },
+ "id" : "2"
+ }
+ ],
+ "matrix_element_type" : "float",
+ "format_url" : "http://biom-format.org/documentation/format_versions/biom-1.0.html",
+ "format" : "Biological Observation Matrix 1.0",
+ "columns" : [
+ {
+ "metadata" : null,
+ "id" : "Sample1"
+ },
+ {
+ "metadata" : null,
+ "id" : "Sample2"
+ },
+ {
+ "metadata" : null,
+ "id" : "Sample3"
+ }
+ ],
+ "shape" : [
+ 5,
+ 3
+ ],
+ "id" : "",
+ "type" : "OTU table"
+}
diff -r 000000000000 -r 7a7ecf9b9df7 test_data/out.qiime
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test_data/out.qiime Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,7 @@
+# QIIME v1.3.0 OTU table
+#OTU ID Sample1 Sample2 Sample3 Consensus Lineage
+1 72.1591861673745 50.7189439619348 0 k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae; g__Dolichospermum; s__
+3 27.8408138326255 32.6143893713985 21.5547529141356 k__Bacteria; p__Firmicutes; c__Clostridia; o__Halanaerobiales; f__Halanaerobiaceae; g__Halanaerobium; s__
+4 0 16.6666666666667 10 No blast hit
+0 0 0 25.3357412575932 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__Escherichia; s__
+2 0 0 43.1095058282712 k__Archaea; p__Euryarchaeota; c__Methanomicrobia; o__Methanosarcinales; f__Methanosarcinaceae; g__Methanosarcina; s__mazei
diff -r 000000000000 -r 7a7ecf9b9df7 test_data/out2.biom
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test_data/out2.biom Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,137 @@
+{
+ "generated_by" : "Bio::Community version 0.1",
+ "matrix_type" : "sparse",
+ "date" : "2013-07-27T03:32:37",
+ "data" : [
+ [
+ 0,
+ 0,
+ 78.997461928934
+ ],
+ [
+ 1,
+ 0,
+ 21.002538071066
+ ],
+ [
+ 0,
+ 1,
+ 57.7458256029685
+ ],
+ [
+ 2,
+ 1,
+ 16.6666666666667
+ ],
+ [
+ 1,
+ 1,
+ 25.5875077303649
+ ],
+ [
+ 2,
+ 2,
+ 10
+ ],
+ [
+ 1,
+ 2,
+ 17.4757281553398
+ ],
+ [
+ 3,
+ 2,
+ 43.5145631067961
+ ],
+ [
+ 4,
+ 2,
+ 29.0097087378641
+ ]
+ ],
+ "rows" : [
+ {
+ "metadata" : {
+ "taxonomy" : [
+ "k__Bacteria",
+ "p__Cyanobacteria",
+ "c__Nostocophycideae",
+ "o__Nostocales",
+ "f__Nostocaceae",
+ "g__Dolichospermum",
+ "s__"
+ ]
+ },
+ "id" : "1"
+ },
+ {
+ "metadata" : {
+ "taxonomy" : [
+ "k__Bacteria",
+ "p__Firmicutes",
+ "c__Clostridia",
+ "o__Halanaerobiales",
+ "f__Halanaerobiaceae",
+ "g__Halanaerobium",
+ "s__"
+ ]
+ },
+ "id" : "3"
+ },
+ {
+ "metadata" : null,
+ "id" : "4"
+ },
+ {
+ "metadata" : {
+ "taxonomy" : [
+ "k__Bacteria",
+ "p__Proteobacteria",
+ "c__Gammaproteobacteria",
+ "o__Enterobacteriales",
+ "f__Enterobacteriaceae",
+ "g__Escherichia",
+ "s__"
+ ]
+ },
+ "id" : "0"
+ },
+ {
+ "metadata" : {
+ "taxonomy" : [
+ "k__Archaea",
+ "p__Euryarchaeota",
+ "c__Methanomicrobia",
+ "o__Methanosarcinales",
+ "f__Methanosarcinaceae",
+ "g__Methanosarcina",
+ "s__mazei"
+ ]
+ },
+ "id" : "2"
+ }
+ ],
+ "matrix_element_type" : "float",
+ "format_url" : "http://biom-format.org/documentation/format_versions/biom-1.0.html",
+ "format" : "Biological Observation Matrix 1.0",
+ "columns" : [
+ {
+ "metadata" : null,
+ "id" : "Sample1"
+ },
+ {
+ "metadata" : null,
+ "id" : "Sample2"
+ },
+ {
+ "metadata" : null,
+ "id" : "Sample3"
+ }
+ ],
+ "shape" : [
+ 5,
+ 3
+ ],
+ "id" : "",
+ "type" : "OTU table"
+}
diff -r 000000000000 -r 7a7ecf9b9df7 test_data/out_combined.qiime
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test_data/out_combined.qiime Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,7 @@
+# QIIME v1.3.0 OTU table
+#OTU ID Sample1 Sample2 Sample3 Consensus Lineage
+1 26.2887543891705 169.766111090629 0 k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae; g__Dolichospermum; s__
+3 10.1428571428571 109.166666666667 10.765 k__Bacteria; p__Firmicutes; c__Clostridia; o__Halanaerobiales; f__Halanaerobiaceae; g__Halanaerobium; s__
+4 0 55.7865555514592 4.99425813085536 No blast hit
+0 0 0 12.6533231776982 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__Escherichia; s__
+2 0 0 21.53 k__Archaea; p__Euryarchaeota; c__Methanomicrobia; o__Methanosarcinales; f__Methanosarcinaceae; g__Methanosarcina; s__mazei
diff -r 000000000000 -r 7a7ecf9b9df7 test_data/out_total.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test_data/out_total.tsv Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,3 @@
+Sample1 36.4316115320277
+Sample2 334.719333308755
+Sample3 49.9425813085536
diff -r 000000000000 -r 7a7ecf9b9df7 trait_db.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/trait_db.loc.sample Mon Jul 29 06:52:36 2013 -0400
@@ -0,0 +1,20 @@
+# File of pre-defined trait databases for CopyRighter
+#
+# This file defines the locations and of available database files of microbial
+# traits needed to run CopyRighter. Edit this file by adding the name and
+# location of the databases you want (tab-delimited!) and move the file to the
+# tool-data/ directory. Then add this to your tool_data_table_conf.xml file:
+#
+#
+#
+#
+
+# id name path
+ssu_img40_gg201210 Short ribosomal subunit (16S) data (IMG 4.0, Greengenes 2012/10) /path/to/ssu_img40_gg201210.txt
+genlength_img40_gg201210 Genome length data (IMG 4.0, Greengenes 2012/10) /path/to/genlength_img40_gg201210.txt
+test_invalid Invalid database /path/to/db.txt
+
+