Mercurial > repos > fangly > copyrighter
changeset 0:7a7ecf9b9df7 draft
Initial upload
author | fangly |
---|---|
date | Mon, 29 Jul 2013 06:52:36 -0400 |
parents | |
children | 658ad8b92341 |
files | Galaxy_readme.txt copyrighter.xml test_data/in.biom test_data/in.qiime test_data/in_db.tsv test_data/in_total.tsv test_data/out.biom test_data/out.qiime test_data/out2.biom test_data/out_combined.qiime test_data/out_total.tsv trait_db.loc.sample |
diffstat | 12 files changed, 521 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Galaxy_readme.txt Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,5 @@ +This is an XML wrapper that provides a GUI for CopyRgihter in Galaxy (http://galaxy.psu.edu/). + +Place these files in your Galaxy directory. More information at http://wiki.g2.bx.psu.edu/FrontPage. + +Note: You can define a default CopyRighter trait database in Galaxy by defining a builtin dataset in the 'trait_db' data table.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/copyrighter.xml Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,137 @@ +<tool id="copyrighter" name="CopyRighter" version="0.45"> + + <description>trait bias corrector for microbial profiles</description> + + <requirements> + <requirement type="binary">copyrighter</requirement> + </requirements> + + <version_string>copyrighter --version</version_string> + + <command> + copyrighter + -i $input + -d $database.value + #if str($lookup): + -l $lookup + #end if + #if str($total) != "None": + -t $total + #end if + #if str($verbose): + -v + #end if + </command> + + <inputs> + <param name="input" type="data" format="txt" label="Input community file" help="Text file obtained from 16S rRNA microarray, 16S rRNA amplicon sequencing or metagenomic sequencing, in biom, QIIME, GAAS, Unifrac, or generic (tabular site-by-species) format. The file must contain read counts (not percentages) and taxa must have UNALTERED taxonomic assignments." /> + <conditional name="database"> + <param name="specify" type="select" label="Trait database" help="Tab-delimited file of traits: 16S copy number, genome length, ..."> + <option value="builtin">Built-in file</option> + <option value="uploaded">Uploaded file</option> + </param> + <when value="builtin"> + <param name="value" type="select" label="Built-in file"> + <options from_data_table="trait_db" /> + <validator type="no_options" message="No built-in trait database is available"/> + </param> + </when> + <when value="uploaded"> + <param name="value" type="data" format="tabular" label="Uploaded file" /> + </when> + </conditional> + <param name="lookup" type="select" display="radio" value="desc" label="Lookup method" help="What to match when looking up the trait value of a taxon."> + <option value="desc">OTU name</option> + <option value="id">OTU ID (if recorded in your input community file)</option> + </param> + <param name="total" type="data" format="tabular" optional="true" label="Total abundance file" help="Tab-delimited file containing the total microbial abundance of each community, e.g. 16S rRNA quantitative PCR numbers to be corrected by the average 16S rRNA copy number." /> + <param name="verbose" type="boolean" checked="no" truevalue="1" falsevalue="0" format="txt" label="Verbose" help="Display trait value assignments." /> + </inputs> + + <outputs> + <data format="txt" name="relative" from_work_dir="out_copyrighted.txt" label="${tool.name} from ${on_string} (relative)"/> + <data format="tabular" name="absolute" from_work_dir="out_copyrighted_total.tsv" label="${tool.name} from ${on_string} (absolute)"> + <filter>str(total) != "None"</filter> + </data> + <data format="txt" name="combined" from_work_dir="out_copyrighted_combined.txt" label="${tool.name} from ${on_string} (combined)"> + <filter>str(total) != "None"</filter> + </data> + </outputs> + + <stdio> + <exit_code range="1:" level="fatal" /> + <regex match="error|exception|invalid" source="stderr" level="fatal" /> + </stdio> + + <tests> + <test> + <param name="input" value="test_data/in.qiime" /> + <param name="specify" value="uploaded"/> + <param name="value" value="test_data/in_db.tsv"/> + <output name="relative" file="test_data/out.qiime"/> + </test> + <test> + <param name="input" value="test_data/in.biom"/> + <param name="specify" value="uploaded"/> + <param name="value" value="test_data/in_db.tsv"/> + <output name="relative" file="test_data/out.biom" lines_diff="2"/> + </test> + <test> + <param name="input" value="test_data/in.qiime"/> + <param name="specify" value="uploaded"/> + <param name="value" value="test_data/in_db.tsv"/> + <param name="verbose" value="yes"/> + <output name="relative" file="test_data/out.qiime"/> + </test> + <test> + <param name="input" value="test_data/in.biom"/> + <param name="specify" value="uploaded"/> + <param name="value" value="test_data/in_db.tsv"/> + <param name="lookup" value="id"/> + <output name="relative" file="test_data/out2.biom" lines_diff="2"/> + </test> + <test> + <param name="input" value="test_data/in.qiime"/> + <param name="specify" value="uploaded"/> + <param name="value" value="test_data/in_db.tsv"/> + <param name="total" value="test_data/in_total.tsv"/> + <output name="relative" file="test_data/out.qiime"/> + <output name="absolute" file="test_data/out_total.tsv"/> + <output name="combined" file="test_data/out_combined.qiime"/> + </test> + <!--<test> + <param name="input" value="test_data/in.biom"/> + <param name="specify" value="builtin"/> + <param name="value" value="test_data/in_db.tsv"/> + <output name="relative" file="test_data/out.biom" lines_diff="2"/> + </test>--> + </tests> + + <help> +**What CopyRighter does** + +The genome of Bacteria and Archaea often contains several copies of the +16S rRNA gene. This can lead to significant biases when estimating the +composition of microbial communities using 16S rRNA amplicons or +microarrays or their total abundance using 16S rRNA quantitative PCR, +since species with a large number of copies will contribute +disproportionally more 16S amplicons than species with a unique copy. +Fortunately, it is possible to infer the copy number of unsequenced +microbial species, based on that of close relatives that have been fully +sequenced. Using this information, CopyRigher corrects microbial +relative abundance by applying a weight proportional to the inverse of +the estimated copy number to each species. + +In metagenomic surveys, a similar problem arises due to genome length +variations between species, and can be corrected by CopyRighter as well. + +In all cases, a community file is used as input and a corrected community +file with trait-corrected (16S rRNA gene copy number or genome length) +relative abundances is generated. Total abundance can optionally be +provided, corrected and combined with relative abundance estimates to +get the absolute abundance of each species. Also the average trait value +in each community is reported on standard output. + </help> + +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/in.biom Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,46 @@ +{ + "id": "Human microbiomes", + "comment": "This is an optional comment", + "format": "Biological Observation Matrix 0.9.1-dev", + "format_url": "http://biom-format.org/documentation/format_versions/biom-1.0.html", + "type": "OTU table", + "generated_by": "QIIME revision 1.4.0-dev", + "date": "2011-12-19T19:00:00", + "rows":[ + {"id":"0", "metadata":{"taxonomy":["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__"]}}, + {"id":"1", "metadata":{"taxonomy":["k__Bacteria", "p__Cyanobacteria", "c__Nostocophycideae", "o__Nostocales", "f__Nostocaceae", "g__Dolichospermum", "s__"]}}, + {"id":"2", "metadata":{"taxonomy":["k__Archaea", "p__Euryarchaeota", "c__Methanomicrobia", "o__Methanosarcinales", "f__Methanosarcinaceae", "g__Methanosarcina", "s__mazei"]}}, + {"id":"3", "metadata":{"taxonomy":["k__Bacteria", "p__Firmicutes", "c__Clostridia", "o__Halanaerobiales", "f__Halanaerobiaceae", "g__Halanaerobium", "s__"]}}, + {"id":"4", "metadata":{"taxonomy":[]}} + ], + "columns":[ + {"id":"Sample1", "metadata":{ + "BarcodeSequence":"CGCTTATCGAGA", + "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT", + "BODY_SITE":"gut", + "Description":"human gut"}}, + {"id":"Sample2", "metadata":{ + "BarcodeSequence":"CATACCAGTAGC", + "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT", + "BODY_SITE":"gut", + "Description":"human gut"}}, + {"id":"Sample3", "metadata":{ + "BarcodeSequence":"CTCTCTACCTGT", + "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT", + "BODY_SITE":"gut", + "Description":"human skin"}} + ], + "matrix_type": "sparse", + "matrix_element_type": "int", + "shape": [5, 3], + "data":[[0,2,4], + [1,0,5], + [1,1,3], + [2,2,3], + [3,0,2], + [3,1,2], + [3,2,2], + [4,1,1], + [4,2,1] + ] +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/in.qiime Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,7 @@ +# QIIME v1.3.0 OTU table +#OTU ID Sample1 Sample2 Sample3 Consensus Lineage +0 0 0 4 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__Escherichia; s__ +1 5 3 0 k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae; g__Dolichospermum; s__ +2 0 0 3 k__Archaea; p__Euryarchaeota; c__Methanomicrobia; o__Methanosarcinales; f__Methanosarcinaceae; g__Methanosarcina; s__mazei +3 2 2 2 k__Bacteria; p__Firmicutes; c__Clostridia; o__Halanaerobiales; f__Halanaerobiaceae; g__Halanaerobium; s__ +4 0 1 1 No blast hit
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/in_db.tsv Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,12 @@ +# ID 16S rRNA count +0 4 +1 3.31 +2 4.5 +3 4.98 + +# tax_string 16S rRNA count +k__Archaea; p__Euryarchaeota; c__Methanomicrobia; o__Methanosarcinales; f__Methanosarcinaceae; g__Methanosarcina; s__mazei 3 +k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae; g__Dolichospermum; s__ 3.85824942205532 +k__Bacteria; p__Firmicutes; c__Clostridia; o__Halanaerobiales; f__Halanaerobiaceae; g__Halanaerobium; s__ 4 +k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__Escherichia; s__ 6.80611715914982 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/in_total.tsv Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,3 @@ +Sample1 142 +Sample2 1.31e3 +Sample3 215.3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/out.biom Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,137 @@ +{ + "generated_by" : "Bio::Community version 0.1", + "matrix_type" : "sparse", + "date" : "2013-07-27T03:34:40", + "data" : [ + [ + 0, + 0, + 72.1591861673745 + ], + [ + 1, + 0, + 27.8408138326255 + ], + [ + 0, + 1, + 50.7189439619348 + ], + [ + 2, + 1, + 16.6666666666667 + ], + [ + 1, + 1, + 32.6143893713985 + ], + [ + 2, + 2, + 10 + ], + [ + 1, + 2, + 21.5547529141356 + ], + [ + 3, + 2, + 25.3357412575932 + ], + [ + 4, + 2, + 43.1095058282712 + ] + ], + "rows" : [ + { + "metadata" : { + "taxonomy" : [ + "k__Bacteria", + "p__Cyanobacteria", + "c__Nostocophycideae", + "o__Nostocales", + "f__Nostocaceae", + "g__Dolichospermum", + "s__" + ] + }, + "id" : "1" + }, + { + "metadata" : { + "taxonomy" : [ + "k__Bacteria", + "p__Firmicutes", + "c__Clostridia", + "o__Halanaerobiales", + "f__Halanaerobiaceae", + "g__Halanaerobium", + "s__" + ] + }, + "id" : "3" + }, + { + "metadata" : null, + "id" : "4" + }, + { + "metadata" : { + "taxonomy" : [ + "k__Bacteria", + "p__Proteobacteria", + "c__Gammaproteobacteria", + "o__Enterobacteriales", + "f__Enterobacteriaceae", + "g__Escherichia", + "s__" + ] + }, + "id" : "0" + }, + { + "metadata" : { + "taxonomy" : [ + "k__Archaea", + "p__Euryarchaeota", + "c__Methanomicrobia", + "o__Methanosarcinales", + "f__Methanosarcinaceae", + "g__Methanosarcina", + "s__mazei" + ] + }, + "id" : "2" + } + ], + "matrix_element_type" : "float", + "format_url" : "http://biom-format.org/documentation/format_versions/biom-1.0.html", + "format" : "Biological Observation Matrix 1.0", + "columns" : [ + { + "metadata" : null, + "id" : "Sample1" + }, + { + "metadata" : null, + "id" : "Sample2" + }, + { + "metadata" : null, + "id" : "Sample3" + } + ], + "shape" : [ + 5, + 3 + ], + "id" : "", + "type" : "OTU table" +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/out.qiime Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,7 @@ +# QIIME v1.3.0 OTU table +#OTU ID Sample1 Sample2 Sample3 Consensus Lineage +1 72.1591861673745 50.7189439619348 0 k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae; g__Dolichospermum; s__ +3 27.8408138326255 32.6143893713985 21.5547529141356 k__Bacteria; p__Firmicutes; c__Clostridia; o__Halanaerobiales; f__Halanaerobiaceae; g__Halanaerobium; s__ +4 0 16.6666666666667 10 No blast hit +0 0 0 25.3357412575932 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__Escherichia; s__ +2 0 0 43.1095058282712 k__Archaea; p__Euryarchaeota; c__Methanomicrobia; o__Methanosarcinales; f__Methanosarcinaceae; g__Methanosarcina; s__mazei
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/out2.biom Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,137 @@ +{ + "generated_by" : "Bio::Community version 0.1", + "matrix_type" : "sparse", + "date" : "2013-07-27T03:32:37", + "data" : [ + [ + 0, + 0, + 78.997461928934 + ], + [ + 1, + 0, + 21.002538071066 + ], + [ + 0, + 1, + 57.7458256029685 + ], + [ + 2, + 1, + 16.6666666666667 + ], + [ + 1, + 1, + 25.5875077303649 + ], + [ + 2, + 2, + 10 + ], + [ + 1, + 2, + 17.4757281553398 + ], + [ + 3, + 2, + 43.5145631067961 + ], + [ + 4, + 2, + 29.0097087378641 + ] + ], + "rows" : [ + { + "metadata" : { + "taxonomy" : [ + "k__Bacteria", + "p__Cyanobacteria", + "c__Nostocophycideae", + "o__Nostocales", + "f__Nostocaceae", + "g__Dolichospermum", + "s__" + ] + }, + "id" : "1" + }, + { + "metadata" : { + "taxonomy" : [ + "k__Bacteria", + "p__Firmicutes", + "c__Clostridia", + "o__Halanaerobiales", + "f__Halanaerobiaceae", + "g__Halanaerobium", + "s__" + ] + }, + "id" : "3" + }, + { + "metadata" : null, + "id" : "4" + }, + { + "metadata" : { + "taxonomy" : [ + "k__Bacteria", + "p__Proteobacteria", + "c__Gammaproteobacteria", + "o__Enterobacteriales", + "f__Enterobacteriaceae", + "g__Escherichia", + "s__" + ] + }, + "id" : "0" + }, + { + "metadata" : { + "taxonomy" : [ + "k__Archaea", + "p__Euryarchaeota", + "c__Methanomicrobia", + "o__Methanosarcinales", + "f__Methanosarcinaceae", + "g__Methanosarcina", + "s__mazei" + ] + }, + "id" : "2" + } + ], + "matrix_element_type" : "float", + "format_url" : "http://biom-format.org/documentation/format_versions/biom-1.0.html", + "format" : "Biological Observation Matrix 1.0", + "columns" : [ + { + "metadata" : null, + "id" : "Sample1" + }, + { + "metadata" : null, + "id" : "Sample2" + }, + { + "metadata" : null, + "id" : "Sample3" + } + ], + "shape" : [ + 5, + 3 + ], + "id" : "", + "type" : "OTU table" +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/out_combined.qiime Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,7 @@ +# QIIME v1.3.0 OTU table +#OTU ID Sample1 Sample2 Sample3 Consensus Lineage +1 26.2887543891705 169.766111090629 0 k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae; g__Dolichospermum; s__ +3 10.1428571428571 109.166666666667 10.765 k__Bacteria; p__Firmicutes; c__Clostridia; o__Halanaerobiales; f__Halanaerobiaceae; g__Halanaerobium; s__ +4 0 55.7865555514592 4.99425813085536 No blast hit +0 0 0 12.6533231776982 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__Escherichia; s__ +2 0 0 21.53 k__Archaea; p__Euryarchaeota; c__Methanomicrobia; o__Methanosarcinales; f__Methanosarcinaceae; g__Methanosarcina; s__mazei
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/out_total.tsv Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,3 @@ +Sample1 36.4316115320277 +Sample2 334.719333308755 +Sample3 49.9425813085536
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trait_db.loc.sample Mon Jul 29 06:52:36 2013 -0400 @@ -0,0 +1,20 @@ +# File of pre-defined trait databases for CopyRighter +# +# This file defines the locations and of available database files of microbial +# traits needed to run CopyRighter. Edit this file by adding the name and +# location of the databases you want (tab-delimited!) and move the file to the +# tool-data/ directory. Then add this to your tool_data_table_conf.xml file: +# +# <!-- Locations of database files for CopyRighter --> +# <table name="trait_db" comment_char="#"> +# <columns>id, name, value</columns> +# <file path="tool-data/trait_db.loc" /> +# </table> +# + +# id name path +ssu_img40_gg201210 Short ribosomal subunit (16S) data (IMG 4.0, Greengenes 2012/10) /path/to/ssu_img40_gg201210.txt +genlength_img40_gg201210 Genome length data (IMG 4.0, Greengenes 2012/10) /path/to/genlength_img40_gg201210.txt +test_invalid Invalid database /path/to/db.txt + +