# HG changeset patch # User iuc # Date 1499719135 14400 # Node ID 242462fc608b588089474c61ca63283d93952cb6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime/ commit d41cdeeec9fd0eb5612df0d5af63a979d1c9ec87 diff -r 000000000000 -r 242462fc608b README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.rst Mon Jul 10 16:38:55 2017 -0400 @@ -0,0 +1,7 @@ +QIIME 1.9.1 Galaxy Wrapper +-------------------------- + +Note: Many of these tools output html files that will not display properly +unless sanitization is turned off in Galaxy. This can be done globally via the +`santize_all_html` option in `galaxy.ini` or on a per tool basis using the +`santize_whitelist_file` in `galaxy.ini`. diff -r 000000000000 -r 242462fc608b collapse_samples.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/collapse_samples.xml Mon Jul 10 16:38:55 2017 -0400 @@ -0,0 +1,90 @@ + + Collapse samples in a BIOM table and mapping file + + macros.xml + + + collapse_samples.py --version + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + `_ collapse samples in a BIOM table and mapping file. + +The values in the BIOM table are collapsed in one of several different ways (see possible values of the --collapse_mode parameter). + +The values in the mapping file are collapsed by grouping the values if they differ for the grouped samples, and by providing the single value if they don’t differ for the grouped samples. + +The output of `collapse_samples.py `_ are a collapsed `biom file `_ and a collapsed mapping file. + +More information about this tool is available on +`QIIME documentation `_. + ]]> + + + + diff -r 000000000000 -r 242462fc608b generate_test_data.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/generate_test_data.sh Mon Jul 10 16:38:55 2017 -0400 @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +# make_otu_table +make_otu_table.py \ + --otu_map_fp 'test-data/make_otu_table/otu_map.txt' \ + --taxonomy 'test-data/make_otu_table/tax_assignments.txt' \ + --exclude_otus_fp 'test-data/make_otu_table/chimeric_seqs.txt' \ + --mapping_fp 'test-data/make_otu_table/mapping_file.txt' \ + --output_biom_fp 'test-data/make_otu_table/OTU_table_chimeric.biom' +biom convert \ + -i 'test-data/make_otu_table/OTU_table_chimeric.biom' \ + -o 'test-data/make_otu_table/OTU_table_chimeric.biom' \ + --to-json + +make_otu_table.py \ + --otu_map_fp 'test-data/make_otu_table/otu_map.txt' \ + --taxonomy 'test-data/make_otu_table/tax_assignments.txt' \ + --exclude_otus_fp 'test-data/make_otu_table/pynast_failures.fna' \ + --mapping_fp 'test-data/make_otu_table/mapping_file.txt' \ + --output_biom_fp 'test-data/make_otu_table/OTU_table_pynast.biom' +biom convert \ + -i 'test-data/make_otu_table/OTU_table_pynast.biom' \ + -o 'test-data/make_otu_table/OTU_table_pynast.biom' \ + --to-json + +# collapse_samples +collapse_samples.py \ + --input_biom_fp 'test-data/collapse_samples/table.biom' \ + --mapping_fp 'test-data/collapse_samples/map.txt' \ + --collapse_mode 'sum' \ + --collapse_fields 'SampleType' \ + --output_biom_fp 'test-data/collapse_samples/collapsed_sum_SampleType_table.biom' \ + --output_mapping_fp 'test-data/collapse_samples/collapsed_sum_SampleType_map.txt' +biom convert \ + -i 'test-data/collapse_samples/collapsed_sum_SampleType_table.biom' \ + -o 'test-data/collapse_samples/collapsed_sum_SampleType_table.biom' \ + --to-json + +collapse_samples.py \ + --input_biom_fp 'test-data/collapse_samples/table.biom' \ + --mapping_fp 'test-data/collapse_samples/map.txt' \ + --collapse_mode 'first' \ + --collapse_fields 'subject','year' \ + --normalize \ + --output_biom_fp 'test-data/collapse_samples/collapsed_first_2fields_table.biom' \ + --output_mapping_fp 'test-data/collapse_samples/collapsed_first_2fields_map.txt' +biom convert \ + -i 'test-data/collapse_samples/collapsed_first_2fields_table.biom' \ + -o 'test-data/collapse_samples/collapsed_first_2fields_table.biom' \ + --to-json \ No newline at end of file diff -r 000000000000 -r 242462fc608b macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Jul 10 16:38:55 2017 -0400 @@ -0,0 +1,70 @@ + + + 1.9.1 + + + qiime + + + + + 10.1038/nmeth.f.303 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 242462fc608b test-data/collapse_samples/collapsed_first_2fields_map.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/collapse_samples/collapsed_first_2fields_map.txt Mon Jul 10 16:38:55 2017 -0400 @@ -0,0 +1,3 @@ +#SampleID original-sample-ids BarcodeSequence LinkerPrimerSequence SampleType month day replicate-group days_since_epoch Description +1.2008 (f1, f2, f5, f6, p1, not16S.1) (ACACTGTTCATG, ACCAGACGATGC, ACCAGACGATGC, ACCAGACGATGC, AACGCACGCTAG, ATACTATTGCGC) GTGCCAGCMGCCGCGGTAA (feces, feces, feces, feces, L_palm, Other) 10 (22, 23, 23, 23, 21, 22) (1, 1, 2, 2, 2, 3) (14174, 14175, 14175, 14175, 14173, 14174) (fecal1, fecal2, derived from f3 with some changes to sequences to add one new otu, derived from f4 with some changes to sequences to add one new otu, palm1, contains one randomly generated sequence, randomly generated sequence plus some variants, these should not map to 16S) +2.2008 (f3, f4, p2, t1, t2) (ACCAGACGATGC, ACCAGACGATGC, ACACTGTTCATG, AGTGAGAGAAGC, ATACTATTGCGC) GTGCCAGCMGCCGCGGTAA (feces, feces, L_palm, Tongue, Tongue) 10 (23, 23, 22, 21, 22) (1, 1, 2, 2, 2) (14175, 14175, 14174, 14173, 14174) (identical sequences to fecal2, all sequences identical, map to GG 295053 at 97 percent id, palm2, tongue1, contains one randomly generated sequence, tongue2) \ No newline at end of file diff -r 000000000000 -r 242462fc608b test-data/collapse_samples/collapsed_first_2fields_table.biom --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/collapse_samples/collapsed_first_2fields_table.biom Mon Jul 10 16:38:55 2017 -0400 @@ -0,0 +1,1 @@ +{"id": "No Table ID","format": "Biological Observation Matrix 1.0.0","format_url": "http://biom-format.org","matrix_type": "sparse","generated_by": "BIOM-Format 2.1.5","date": "2017-05-19T14:27:09.894251","type": "OTU table","matrix_element_type": "float","shape": [14, 2],"data": [[0,0,0.81818181818181823],[0,1,0.90909090909090906],[2,1,0.045454545454545456],[13,0,0.18181818181818182],[13,1,0.045454545454545456]],"rows": [{"id": "295053", "metadata": {"taxonomy": ["k__Bacteria"]}},{"id": "42684", "metadata": {"taxonomy": ["k__Bacteria", "p__Proteobacteria"]}},{"id": "None11", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "None10", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "None7", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "None6", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "None5", "metadata": {"taxonomy": ["k__Bacteria"]}},{"id": "None4", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "None3", "metadata": {"taxonomy": ["k__Bacteria"]}},{"id": "None2", "metadata": {"taxonomy": ["k__Bacteria"]}},{"id": "None1", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "879972", "metadata": {"taxonomy": ["k__Bacteria"]}},{"id": "None9", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "None8", "metadata": {"taxonomy": ["k__Bacteria"]}}],"columns": [{"id": "2.2008", "metadata": {"collapsed_ids": ["f3", "f4", "p2", "t1", "t2"]}},{"id": "1.2008", "metadata": {"collapsed_ids": ["f2", "f1", "p1", "not16S.1"]}}]} \ No newline at end of file diff -r 000000000000 -r 242462fc608b test-data/collapse_samples/collapsed_sum_SampleType_map.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/collapse_samples/collapsed_sum_SampleType_map.txt Mon Jul 10 16:38:55 2017 -0400 @@ -0,0 +1,5 @@ +#SampleID original-sample-ids BarcodeSequence LinkerPrimerSequence year month day subject replicate-group days_since_epoch Description +L_palm (p1, p2) (AACGCACGCTAG, ACACTGTTCATG) GTGCCAGCMGCCGCGGTAA 2008 10 (21, 22) (1, 2) 2 (14173, 14174) (palm1, contains one randomly generated sequence, palm2) +Other not16S.1 ATACTATTGCGC GTGCCAGCMGCCGCGGTAA 2008 10 22 1 3 14174 randomly generated sequence plus some variants, these should not map to 16S +Tongue (t1, t2) (AGTGAGAGAAGC, ATACTATTGCGC) GTGCCAGCMGCCGCGGTAA 2008 10 (21, 22) 2 2 (14173, 14174) (tongue1, contains one randomly generated sequence, tongue2) +feces (f1, f2, f3, f4, f5, f6) (ACACTGTTCATG, ACCAGACGATGC, ACCAGACGATGC, ACCAGACGATGC, ACCAGACGATGC, ACCAGACGATGC) GTGCCAGCMGCCGCGGTAA 2008 10 (22, 23, 23, 23, 23, 23) (1, 1, 2, 2, 1, 1) (1, 1, 1, 1, 2, 2) (14174, 14175, 14175, 14175, 14175, 14175) (fecal1, fecal2, identical sequences to fecal2, all sequences identical, map to GG 295053 at 97 percent id, derived from f3 with some changes to sequences to add one new otu, derived from f4 with some changes to sequences to add one new otu) \ No newline at end of file diff -r 000000000000 -r 242462fc608b test-data/collapse_samples/collapsed_sum_SampleType_table.biom --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/collapse_samples/collapsed_sum_SampleType_table.biom Mon Jul 10 16:38:55 2017 -0400 @@ -0,0 +1,1 @@ +{"id": "No Table ID","format": "Biological Observation Matrix 1.0.0","format_url": "http://biom-format.org","matrix_type": "sparse","generated_by": "BIOM-Format 2.1.5","date": "2017-05-19T14:27:05.204426","type": "OTU table","matrix_element_type": "float","shape": [14, 4],"data": [[0,1,78.0],[0,3,4.0],[1,3,1.0],[2,1,1.0],[2,3,2.0],[3,0,1.0],[4,3,1.0],[5,2,20.0],[6,3,1.0],[7,3,2.0],[8,0,5.0],[8,3,1.0],[9,2,2.0],[10,3,1.0],[11,0,5.0],[11,3,29.0],[12,0,34.0],[12,3,3.0],[13,1,9.0]],"rows": [{"id": "295053", "metadata": {"taxonomy": ["k__Bacteria"]}},{"id": "42684", "metadata": {"taxonomy": ["k__Bacteria", "p__Proteobacteria"]}},{"id": "None11", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "None10", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "None7", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "None6", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "None5", "metadata": {"taxonomy": ["k__Bacteria"]}},{"id": "None4", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "None3", "metadata": {"taxonomy": ["k__Bacteria"]}},{"id": "None2", "metadata": {"taxonomy": ["k__Bacteria"]}},{"id": "None1", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "879972", "metadata": {"taxonomy": ["k__Bacteria"]}},{"id": "None9", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "None8", "metadata": {"taxonomy": ["k__Bacteria"]}}],"columns": [{"id": "Tongue", "metadata": {"collapsed_ids": ["t1", "t2"]}},{"id": "feces", "metadata": {"collapsed_ids": ["f2", "f1", "f3", "f4"]}},{"id": "Other", "metadata": {"collapsed_ids": ["not16S.1"]}},{"id": "L_palm", "metadata": {"collapsed_ids": ["p2", "p1"]}}]} \ No newline at end of file diff -r 000000000000 -r 242462fc608b test-data/collapse_samples/map.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/collapse_samples/map.txt Mon Jul 10 16:38:55 2017 -0400 @@ -0,0 +1,12 @@ +#SampleID BarcodeSequence LinkerPrimerSequence SampleType year month day subject replicate-group days_since_epoch Description +f1 ACACTGTTCATG GTGCCAGCMGCCGCGGTAA feces 2008 10 22 1 1 14174 fecal1 +f2 ACCAGACGATGC GTGCCAGCMGCCGCGGTAA feces 2008 10 23 1 1 14175 fecal2 +f3 ACCAGACGATGC GTGCCAGCMGCCGCGGTAA feces 2008 10 23 2 1 14175 identical sequences to fecal2 +f4 ACCAGACGATGC GTGCCAGCMGCCGCGGTAA feces 2008 10 23 2 1 14175 all sequences identical, map to GG 295053 at 97 percent id +f5 ACCAGACGATGC GTGCCAGCMGCCGCGGTAA feces 2008 10 23 1 2 14175 derived from f3 with some changes to sequences to add one new otu +f6 ACCAGACGATGC GTGCCAGCMGCCGCGGTAA feces 2008 10 23 1 2 14175 derived from f4 with some changes to sequences to add one new otu +p1 AACGCACGCTAG GTGCCAGCMGCCGCGGTAA L_palm 2008 10 21 1 2 14173 palm1, contains one randomly generated sequence +p2 ACACTGTTCATG GTGCCAGCMGCCGCGGTAA L_palm 2008 10 22 2 2 14174 palm2 +t1 AGTGAGAGAAGC GTGCCAGCMGCCGCGGTAA Tongue 2008 10 21 2 2 14173 tongue1, contains one randomly generated sequence +t2 ATACTATTGCGC GTGCCAGCMGCCGCGGTAA Tongue 2008 10 22 2 2 14174 tongue2 +not16S.1 ATACTATTGCGC GTGCCAGCMGCCGCGGTAA Other 2008 10 22 1 3 14174 randomly generated sequence plus some variants, these should not map to 16S \ No newline at end of file diff -r 000000000000 -r 242462fc608b test-data/collapse_samples/table.biom --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/collapse_samples/table.biom Mon Jul 10 16:38:55 2017 -0400 @@ -0,0 +1,1 @@ +{"id": "None","format": "Biological Observation Matrix 1.0.0","format_url": "http://biom-format.org","type": "OTU table","generated_by": "QIIME 1.6.0-dev","date": "2013-02-09T09:30:11.550590","matrix_type": "sparse","matrix_element_type": "int","shape": [14, 9],"data": [[0,0,20],[0,1,18],[0,2,18],[0,3,22],[0,4,4],[1,4,1],[2,0,1],[2,4,1],[2,5,1],[3,6,1],[4,4,1],[5,7,20],[6,4,1],[7,4,1],[7,5,1],[8,4,1],[8,6,2],[8,8,3],[9,7,2],[10,5,1],[11,4,9],[11,5,20],[11,6,1],[11,8,4],[12,4,3],[12,6,19],[12,8,15],[13,0,1],[13,1,4],[13,2,4]],"rows": [{"id": "295053", "metadata": {"taxonomy": ["k__Bacteria"]}},{"id": "42684", "metadata": {"taxonomy": ["k__Bacteria", "p__Proteobacteria"]}},{"id": "None11", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "None10", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "None7", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "None6", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "None5", "metadata": {"taxonomy": ["k__Bacteria"]}},{"id": "None4", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "None3", "metadata": {"taxonomy": ["k__Bacteria"]}},{"id": "None2", "metadata": {"taxonomy": ["k__Bacteria"]}},{"id": "None1", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "879972", "metadata": {"taxonomy": ["k__Bacteria"]}},{"id": "None9", "metadata": {"taxonomy": ["Unclassified"]}},{"id": "None8", "metadata": {"taxonomy": ["k__Bacteria"]}}],"columns": [{"id": "f2", "metadata": null},{"id": "f1", "metadata": null},{"id": "f3", "metadata": null},{"id": "f4", "metadata": null},{"id": "p2", "metadata": null},{"id": "p1", "metadata": null},{"id": "t1", "metadata": null},{"id": "not16S.1", "metadata": null},{"id": "t2", "metadata": null}]} \ No newline at end of file diff -r 000000000000 -r 242462fc608b test-data/make_otu_table/OTU_table_chimeric.biom --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/make_otu_table/OTU_table_chimeric.biom Mon Jul 10 16:38:55 2017 -0400 @@ -0,0 +1,1 @@ +{"id": "No Table ID","format": "Biological Observation Matrix 1.0.0","format_url": "http://biom-format.org","matrix_type": "sparse","generated_by": "BIOM-Format 2.1.5","date": "2017-05-19T14:25:37.660678","type": "OTU table","matrix_element_type": "float","shape": [2, 2],"data": [[0,0,1.0],[1,1,3.0]],"rows": [{"id": "1", "metadata": {"taxonomy": ["A", "B", "C", "D"]}},{"id": "x", "metadata": {"taxonomy": ["A", "B", "C"]}}],"columns": [{"id": "ABC", "metadata": {"LinkerPrimerSequence": "AAAAAA", "BarcodeSequence": "ATGC", "Description": "First Sample"}},{"id": "GHI", "metadata": {"LinkerPrimerSequence": "AAAAAA", "BarcodeSequence": "CATG", "Description": "Third Sample"}}]} \ No newline at end of file diff -r 000000000000 -r 242462fc608b test-data/make_otu_table/OTU_table_pynast.biom --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/make_otu_table/OTU_table_pynast.biom Mon Jul 10 16:38:55 2017 -0400 @@ -0,0 +1,1 @@ +{"id": "No Table ID","format": "Biological Observation Matrix 1.0.0","format_url": "http://biom-format.org","matrix_type": "sparse","generated_by": "BIOM-Format 2.1.5","date": "2017-05-19T14:26:19.586158","type": "OTU table","matrix_element_type": "float","shape": [2, 2],"data": [[0,0,1.0],[1,1,3.0]],"rows": [{"id": "1", "metadata": {"taxonomy": ["A", "B", "C", "D"]}},{"id": "x", "metadata": {"taxonomy": ["A", "B", "C"]}}],"columns": [{"id": "ABC", "metadata": {"LinkerPrimerSequence": "AAAAAA", "BarcodeSequence": "ATGC", "Description": "First Sample"}},{"id": "GHI", "metadata": {"LinkerPrimerSequence": "AAAAAA", "BarcodeSequence": "CATG", "Description": "Third Sample"}}]} \ No newline at end of file diff -r 000000000000 -r 242462fc608b test-data/make_otu_table/chimeric_seqs.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/make_otu_table/chimeric_seqs.txt Mon Jul 10 16:38:55 2017 -0400 @@ -0,0 +1,3 @@ +0 some comment +42 not a real otu id +z \ No newline at end of file diff -r 000000000000 -r 242462fc608b test-data/make_otu_table/mapping_file.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/make_otu_table/mapping_file.txt Mon Jul 10 16:38:55 2017 -0400 @@ -0,0 +1,5 @@ +#SampleID BarcodeSequence LinkerPrimerSequence Description +ABC ATGC AAAAAA First Sample +XYZ TGCA AAAAAA Fourth Sample +GHI CATG AAAAAA Third Sample +DEF GCAT AAAAAA Second Sample diff -r 000000000000 -r 242462fc608b test-data/make_otu_table/otu_map.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/make_otu_table/otu_map.txt Mon Jul 10 16:38:55 2017 -0400 @@ -0,0 +1,4 @@ +0 ABC_0 DEF_1 +1 ABC_1 +x GHI_2 GHI_3 GHI_77 +z DEF_3 XYZ_1 \ No newline at end of file diff -r 000000000000 -r 242462fc608b test-data/make_otu_table/pynast_failures.fna --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/make_otu_table/pynast_failures.fna Mon Jul 10 16:38:55 2017 -0400 @@ -0,0 +1,6 @@ +>0 something +ACCG +>42 some comment +CCGGTT +>z +CCGG diff -r 000000000000 -r 242462fc608b test-data/make_otu_table/tax_assignments.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/make_otu_table/tax_assignments.txt Mon Jul 10 16:38:55 2017 -0400 @@ -0,0 +1,4 @@ +0 A;B;C +1 A;B;C;D +x A;B;C +z A;B \ No newline at end of file