# HG changeset patch # User fangly # Date 1375095156 14400 # Node ID 7a7ecf9b9df77089d548902d60f7e408802813a4 Initial upload diff -r 000000000000 -r 7a7ecf9b9df7 Galaxy_readme.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Galaxy_readme.txt Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,5 @@ +This is an XML wrapper that provides a GUI for CopyRgihter in Galaxy (http://galaxy.psu.edu/). + +Place these files in your Galaxy directory. More information at http://wiki.g2.bx.psu.edu/FrontPage. + +Note: You can define a default CopyRighter trait database in Galaxy by defining a builtin dataset in the 'trait_db' data table. diff -r 000000000000 -r 7a7ecf9b9df7 copyrighter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/copyrighter.xml Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,137 @@ + + + trait bias corrector for microbial profiles + + + copyrighter + + + copyrighter --version + + + copyrighter + -i $input + -d $database.value + #if str($lookup): + -l $lookup + #end if + #if str($total) != "None": + -t $total + #end if + #if str($verbose): + -v + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + str(total) != "None" + + + str(total) != "None" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What CopyRighter does** + +The genome of Bacteria and Archaea often contains several copies of the +16S rRNA gene. This can lead to significant biases when estimating the +composition of microbial communities using 16S rRNA amplicons or +microarrays or their total abundance using 16S rRNA quantitative PCR, +since species with a large number of copies will contribute +disproportionally more 16S amplicons than species with a unique copy. +Fortunately, it is possible to infer the copy number of unsequenced +microbial species, based on that of close relatives that have been fully +sequenced. Using this information, CopyRigher corrects microbial +relative abundance by applying a weight proportional to the inverse of +the estimated copy number to each species. + +In metagenomic surveys, a similar problem arises due to genome length +variations between species, and can be corrected by CopyRighter as well. + +In all cases, a community file is used as input and a corrected community +file with trait-corrected (16S rRNA gene copy number or genome length) +relative abundances is generated. Total abundance can optionally be +provided, corrected and combined with relative abundance estimates to +get the absolute abundance of each species. Also the average trait value +in each community is reported on standard output. + + + + diff -r 000000000000 -r 7a7ecf9b9df7 test_data/in.biom --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/in.biom Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,46 @@ +{ + "id": "Human microbiomes", + "comment": "This is an optional comment", + "format": "Biological Observation Matrix 0.9.1-dev", + "format_url": "http://biom-format.org/documentation/format_versions/biom-1.0.html", + "type": "OTU table", + "generated_by": "QIIME revision 1.4.0-dev", + "date": "2011-12-19T19:00:00", + "rows":[ + {"id":"0", "metadata":{"taxonomy":["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__"]}}, + {"id":"1", "metadata":{"taxonomy":["k__Bacteria", "p__Cyanobacteria", "c__Nostocophycideae", "o__Nostocales", "f__Nostocaceae", "g__Dolichospermum", "s__"]}}, + {"id":"2", "metadata":{"taxonomy":["k__Archaea", "p__Euryarchaeota", "c__Methanomicrobia", "o__Methanosarcinales", "f__Methanosarcinaceae", "g__Methanosarcina", "s__mazei"]}}, + {"id":"3", "metadata":{"taxonomy":["k__Bacteria", "p__Firmicutes", "c__Clostridia", "o__Halanaerobiales", "f__Halanaerobiaceae", "g__Halanaerobium", "s__"]}}, + {"id":"4", "metadata":{"taxonomy":[]}} + ], + "columns":[ + {"id":"Sample1", "metadata":{ + "BarcodeSequence":"CGCTTATCGAGA", + "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT", + "BODY_SITE":"gut", + "Description":"human gut"}}, + {"id":"Sample2", "metadata":{ + "BarcodeSequence":"CATACCAGTAGC", + "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT", + "BODY_SITE":"gut", + "Description":"human gut"}}, + {"id":"Sample3", "metadata":{ + "BarcodeSequence":"CTCTCTACCTGT", + "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT", + "BODY_SITE":"gut", + "Description":"human skin"}} + ], + "matrix_type": "sparse", + "matrix_element_type": "int", + "shape": [5, 3], + "data":[[0,2,4], + [1,0,5], + [1,1,3], + [2,2,3], + [3,0,2], + [3,1,2], + [3,2,2], + [4,1,1], + [4,2,1] + ] +} diff -r 000000000000 -r 7a7ecf9b9df7 test_data/in.qiime --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/in.qiime Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,7 @@ +# QIIME v1.3.0 OTU table +#OTU ID Sample1 Sample2 Sample3 Consensus Lineage +0 0 0 4 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__Escherichia; s__ +1 5 3 0 k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae; g__Dolichospermum; s__ +2 0 0 3 k__Archaea; p__Euryarchaeota; c__Methanomicrobia; o__Methanosarcinales; f__Methanosarcinaceae; g__Methanosarcina; s__mazei +3 2 2 2 k__Bacteria; p__Firmicutes; c__Clostridia; o__Halanaerobiales; f__Halanaerobiaceae; g__Halanaerobium; s__ +4 0 1 1 No blast hit diff -r 000000000000 -r 7a7ecf9b9df7 test_data/in_db.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/in_db.tsv Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,12 @@ +# ID 16S rRNA count +0 4 +1 3.31 +2 4.5 +3 4.98 + +# tax_string 16S rRNA count +k__Archaea; p__Euryarchaeota; c__Methanomicrobia; o__Methanosarcinales; f__Methanosarcinaceae; g__Methanosarcina; s__mazei 3 +k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae; g__Dolichospermum; s__ 3.85824942205532 +k__Bacteria; p__Firmicutes; c__Clostridia; o__Halanaerobiales; f__Halanaerobiaceae; g__Halanaerobium; s__ 4 +k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__Escherichia; s__ 6.80611715914982 + diff -r 000000000000 -r 7a7ecf9b9df7 test_data/in_total.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/in_total.tsv Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,3 @@ +Sample1 142 +Sample2 1.31e3 +Sample3 215.3 diff -r 000000000000 -r 7a7ecf9b9df7 test_data/out.biom --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/out.biom Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,137 @@ +{ + "generated_by" : "Bio::Community version 0.1", + "matrix_type" : "sparse", + "date" : "2013-07-27T03:34:40", + "data" : [ + [ + 0, + 0, + 72.1591861673745 + ], + [ + 1, + 0, + 27.8408138326255 + ], + [ + 0, + 1, + 50.7189439619348 + ], + [ + 2, + 1, + 16.6666666666667 + ], + [ + 1, + 1, + 32.6143893713985 + ], + [ + 2, + 2, + 10 + ], + [ + 1, + 2, + 21.5547529141356 + ], + [ + 3, + 2, + 25.3357412575932 + ], + [ + 4, + 2, + 43.1095058282712 + ] + ], + "rows" : [ + { + "metadata" : { + "taxonomy" : [ + "k__Bacteria", + "p__Cyanobacteria", + "c__Nostocophycideae", + "o__Nostocales", + "f__Nostocaceae", + "g__Dolichospermum", + "s__" + ] + }, + "id" : "1" + }, + { + "metadata" : { + "taxonomy" : [ + "k__Bacteria", + "p__Firmicutes", + "c__Clostridia", + "o__Halanaerobiales", + "f__Halanaerobiaceae", + "g__Halanaerobium", + "s__" + ] + }, + "id" : "3" + }, + { + "metadata" : null, + "id" : "4" + }, + { + "metadata" : { + "taxonomy" : [ + "k__Bacteria", + "p__Proteobacteria", + "c__Gammaproteobacteria", + "o__Enterobacteriales", + "f__Enterobacteriaceae", + "g__Escherichia", + "s__" + ] + }, + "id" : "0" + }, + { + "metadata" : { + "taxonomy" : [ + "k__Archaea", + "p__Euryarchaeota", + "c__Methanomicrobia", + "o__Methanosarcinales", + "f__Methanosarcinaceae", + "g__Methanosarcina", + "s__mazei" + ] + }, + "id" : "2" + } + ], + "matrix_element_type" : "float", + "format_url" : "http://biom-format.org/documentation/format_versions/biom-1.0.html", + "format" : "Biological Observation Matrix 1.0", + "columns" : [ + { + "metadata" : null, + "id" : "Sample1" + }, + { + "metadata" : null, + "id" : "Sample2" + }, + { + "metadata" : null, + "id" : "Sample3" + } + ], + "shape" : [ + 5, + 3 + ], + "id" : "", + "type" : "OTU table" +} diff -r 000000000000 -r 7a7ecf9b9df7 test_data/out.qiime --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/out.qiime Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,7 @@ +# QIIME v1.3.0 OTU table +#OTU ID Sample1 Sample2 Sample3 Consensus Lineage +1 72.1591861673745 50.7189439619348 0 k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae; g__Dolichospermum; s__ +3 27.8408138326255 32.6143893713985 21.5547529141356 k__Bacteria; p__Firmicutes; c__Clostridia; o__Halanaerobiales; f__Halanaerobiaceae; g__Halanaerobium; s__ +4 0 16.6666666666667 10 No blast hit +0 0 0 25.3357412575932 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__Escherichia; s__ +2 0 0 43.1095058282712 k__Archaea; p__Euryarchaeota; c__Methanomicrobia; o__Methanosarcinales; f__Methanosarcinaceae; g__Methanosarcina; s__mazei diff -r 000000000000 -r 7a7ecf9b9df7 test_data/out2.biom --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/out2.biom Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,137 @@ +{ + "generated_by" : "Bio::Community version 0.1", + "matrix_type" : "sparse", + "date" : "2013-07-27T03:32:37", + "data" : [ + [ + 0, + 0, + 78.997461928934 + ], + [ + 1, + 0, + 21.002538071066 + ], + [ + 0, + 1, + 57.7458256029685 + ], + [ + 2, + 1, + 16.6666666666667 + ], + [ + 1, + 1, + 25.5875077303649 + ], + [ + 2, + 2, + 10 + ], + [ + 1, + 2, + 17.4757281553398 + ], + [ + 3, + 2, + 43.5145631067961 + ], + [ + 4, + 2, + 29.0097087378641 + ] + ], + "rows" : [ + { + "metadata" : { + "taxonomy" : [ + "k__Bacteria", + "p__Cyanobacteria", + "c__Nostocophycideae", + "o__Nostocales", + "f__Nostocaceae", + "g__Dolichospermum", + "s__" + ] + }, + "id" : "1" + }, + { + "metadata" : { + "taxonomy" : [ + "k__Bacteria", + "p__Firmicutes", + "c__Clostridia", + "o__Halanaerobiales", + "f__Halanaerobiaceae", + "g__Halanaerobium", + "s__" + ] + }, + "id" : "3" + }, + { + "metadata" : null, + "id" : "4" + }, + { + "metadata" : { + "taxonomy" : [ + "k__Bacteria", + "p__Proteobacteria", + "c__Gammaproteobacteria", + "o__Enterobacteriales", + "f__Enterobacteriaceae", + "g__Escherichia", + "s__" + ] + }, + "id" : "0" + }, + { + "metadata" : { + "taxonomy" : [ + "k__Archaea", + "p__Euryarchaeota", + "c__Methanomicrobia", + "o__Methanosarcinales", + "f__Methanosarcinaceae", + "g__Methanosarcina", + "s__mazei" + ] + }, + "id" : "2" + } + ], + "matrix_element_type" : "float", + "format_url" : "http://biom-format.org/documentation/format_versions/biom-1.0.html", + "format" : "Biological Observation Matrix 1.0", + "columns" : [ + { + "metadata" : null, + "id" : "Sample1" + }, + { + "metadata" : null, + "id" : "Sample2" + }, + { + "metadata" : null, + "id" : "Sample3" + } + ], + "shape" : [ + 5, + 3 + ], + "id" : "", + "type" : "OTU table" +} diff -r 000000000000 -r 7a7ecf9b9df7 test_data/out_combined.qiime --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/out_combined.qiime Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,7 @@ +# QIIME v1.3.0 OTU table +#OTU ID Sample1 Sample2 Sample3 Consensus Lineage +1 26.2887543891705 169.766111090629 0 k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae; g__Dolichospermum; s__ +3 10.1428571428571 109.166666666667 10.765 k__Bacteria; p__Firmicutes; c__Clostridia; o__Halanaerobiales; f__Halanaerobiaceae; g__Halanaerobium; s__ +4 0 55.7865555514592 4.99425813085536 No blast hit +0 0 0 12.6533231776982 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__Escherichia; s__ +2 0 0 21.53 k__Archaea; p__Euryarchaeota; c__Methanomicrobia; o__Methanosarcinales; f__Methanosarcinaceae; g__Methanosarcina; s__mazei diff -r 000000000000 -r 7a7ecf9b9df7 test_data/out_total.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/out_total.tsv Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,3 @@ +Sample1 36.4316115320277 +Sample2 334.719333308755 +Sample3 49.9425813085536 diff -r 000000000000 -r 7a7ecf9b9df7 trait_db.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trait_db.loc.sample Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,20 @@ +# File of pre-defined trait databases for CopyRighter +# +# This file defines the locations and of available database files of microbial +# traits needed to run CopyRighter. Edit this file by adding the name and +# location of the databases you want (tab-delimited!) and move the file to the +# tool-data/ directory. Then add this to your tool_data_table_conf.xml file: +# +# +# +# id, name, value +# +#
+# + +# id name path +ssu_img40_gg201210 Short ribosomal subunit (16S) data (IMG 4.0, Greengenes 2012/10) /path/to/ssu_img40_gg201210.txt +genlength_img40_gg201210 Genome length data (IMG 4.0, Greengenes 2012/10) /path/to/genlength_img40_gg201210.txt +test_invalid Invalid database /path/to/db.txt + +