# HG changeset patch # User eschen42 # Date 1516043199 18000 # Node ID 38ccf6722d540fbb2f0a0e884098095564a90f0c # Parent 2cdf7d5982c87cf5709b71da6cc0bdc41f65db59 planemo upload for repository https://github.com/HegemanLab/w4mclassfilter_galaxy_wrapper/tree/master commit a06ae79d25b31d02217b934b9cd61a5aba3f640f diff -r 2cdf7d5982c8 -r 38ccf6722d54 test-data/expected_dataMatrix.tsv --- a/test-data/expected_dataMatrix.tsv Thu Sep 07 17:41:09 2017 -0400 +++ b/test-data/expected_dataMatrix.tsv Mon Jan 15 14:06:39 2018 -0500 @@ -1,16 +1,16 @@ HU_017 HU_034 HU_078 HU_091 HU_093 HU_099 HU_130 HU_134 HU_138 -HMDB03193 76043 44943 173175 242549 57066 559869 339188 471368 262271 -HMDB01101 30689 52217 229568 4763576 3878773 976436 608298 1605075 72021 -HMDB01101.1 6877586 3158 4763576 3878773 976436 831937 1605075 72021 442510 -HMDB10348 47259 60885 168264 176500 76457 610110 279156 524468 451573 -HMDB59717 357351 301983 1028110 1530493 270027 1378535 808334 1132813 871209 -HMDB13189 2644620 1661412 2755434 593863 837865 3526136 1608814 3446611 1941527 +HMDB00191 560002 575790 785428 645785 591569 960658 639437 1092885 1409045 +HMDB00208 747080 595872 3143654 4059767 1433702 5593888 2477288 3346077 4230072 +HMDB00251 368600 94936 293988 352855 767894 268331 310918 1248919 577184 HMDB00299 250551 456162 808657 614370 250403 768004 504108 1014041 1362408 -HMDB00191 560002 575790 785428 645785 591569 960658 639437 1092885 1409045 +HMDB00512 0 280560 556003 590779 209285 342532 569970 525240 246282 HMDB00518 0 85944 129886 175800 13154 230242 440223 315368 10657 HMDB00715 1252089 905408 5140022 2658555 814523 2558923 4184204 3865723 3236644 HMDB01032 2569205 1604999 26222916 257139 675754 59906109 31151730 18648127 14989438 -HMDB00208 747080 595872 3143654 4059767 1433702 5593888 2477288 3346077 4230072 +HMDB01101 30689 52217 229568 4763576 3878773 976436 608298 1605075 72021 +HMDB01101.1 6877586 3158 4763576 3878773 976436 831937 1605075 72021 442510 +HMDB03193 76043 44943 173175 242549 57066 559869 339188 471368 262271 HMDB04824 374028 539206 959381 605191 310260 1253319 477995 825691 1157093 -HMDB00512 0 280560 556003 590779 209285 342532 569970 525240 246282 -HMDB00251 368600 94936 293988 352855 767894 268331 310918 1248919 577184 +HMDB10348 47259 60885 168264 176500 76457 610110 279156 524468 451573 +HMDB13189 2644620 1661412 2755434 593863 837865 3526136 1608814 3446611 1941527 +HMDB59717 357351 301983 1028110 1530493 270027 1378535 808334 1132813 871209 diff -r 2cdf7d5982c8 -r 38ccf6722d54 test-data/expected_variableMetadata.tsv --- a/test-data/expected_variableMetadata.tsv Thu Sep 07 17:41:09 2017 -0400 +++ b/test-data/expected_variableMetadata.tsv Mon Jan 15 14:06:39 2018 -0500 @@ -1,16 +1,16 @@ -variableMetadata name -HMDB03193 Testosterone_glucuronide -HMDB01101 p-Anisic_acid -HMDB01101.1 p-Anisic_acid_2 -HMDB10348 Dehydroepiandrosterone_3-glucuronide -HMDB59717 Glu-Val -HMDB13189 3-Indole_carboxylic_acid_glucuronide -HMDB00299 Xanthosine -HMDB00191 L-Aspartic_acid -HMDB00518 Chenodeoxycholic_acid -HMDB00715 Kynurenic_acid -HMDB01032 Dehydroepiandrosterone_sulfate -HMDB00208 Oxoglutaric_acid -HMDB04824 N2,N2-Dimethylguanosine -HMDB00512 N-Acetyl-L-phenylalanine -HMDB00251 Taurine +variableMetadata name mz rt +HMDB00191 loquor 650 600 +HMDB00208 loquimini 873 476 +HMDB00251 pasamur 500 423 +HMDB00299 bantur 700 500 +HMDB00512 pantur 900 543 +HMDB00518 loquoris 870 250 +HMDB00715 loquitur 725 900 +HMDB01032 loquimur 550 425 +HMDB01101 bar 150 300 +HMDB01101.1 baz 200 225 +HMDB03193 foo 100 200 +HMDB04824 loquantur 950 522 +HMDB10348 batur 300 275 +HMDB13189 baris 800 325 +HMDB59717 bamur 125 400 diff -r 2cdf7d5982c8 -r 38ccf6722d54 test-data/input_variableMetadata.tsv --- a/test-data/input_variableMetadata.tsv Thu Sep 07 17:41:09 2017 -0400 +++ b/test-data/input_variableMetadata.tsv Mon Jan 15 14:06:39 2018 -0500 @@ -1,17 +1,17 @@ -variableMetadata name -HMDB03193 Testosterone_glucuronide -HMDB01101 p-Anisic_acid -HMDB01101 p-Anisic_acid_2 -HMDB10348 Dehydroepiandrosterone_3-glucuronide -HMDB59717 Glu-Val -HMDB00822 p-Hydroxymandelic_acid -HMDB13189 3-Indole_carboxylic_acid_glucuronide -HMDB00299 Xanthosine -HMDB00191 L-Aspartic_acid -HMDB00518 Chenodeoxycholic_acid -HMDB00715 Kynurenic_acid -HMDB01032 Dehydroepiandrosterone_sulfate -HMDB00208 Oxoglutaric_acid -HMDB04824 N2,N2-Dimethylguanosine -HMDB00512 N-Acetyl-L-phenylalanine -HMDB00251 Taurine +variable name mz rt +HMDB03193 foo 100 200 +HMDB01101 bar 150 300 +HMDB01101 baz 200 225 +HMDB00208 loquimini 873 476 +HMDB10348 batur 300 275 +HMDB00299 bantur 700 500 +HMDB00191 loquor 650 600 +HMDB00518 loquoris 870 250 +HMDB59717 bamur 125 400 +HMDB00822 bamini 300 199 +HMDB13189 baris 800 325 +HMDB00715 loquitur 725 900 +HMDB01032 loquimur 550 425 +HMDB04824 loquantur 950 522 +HMDB00512 pantur 900 543 +HMDB00251 pasamur 500 423 diff -r 2cdf7d5982c8 -r 38ccf6722d54 test-data/rangefilter_dataMatrix.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rangefilter_dataMatrix.tsv Mon Jan 15 14:06:39 2018 -0500 @@ -0,0 +1,6 @@ + HU_017 HU_028 HU_034 HU_051 HU_060 HU_078 HU_091 HU_093 HU_099 HU_110 HU_130 HU_134 HU_138 HU_149 HU_152 HU_175 HU_178 HU_185 HU_208 +HMDB00191 560002 771533 575790 392284 888498 785428 645785 591569 960658 910201 639437 1092885 1409045 2292023 1246459 1945577 710519 773384 622898 +HMDB00208 747080 13420742 595872 1172376 7172632 3143654 4059767 1433702 5593888 5402629 2477288 3346077 4230072 7621236 8960828 10335722 7037373 1574738 2540044 +HMDB01032 2569205 26023086 1604999 430453 8103558 26222916 257139 675754 59906109 263055 31151730 18648127 14989438 1554658 20249262 5588731 871010 15920 44276 +HMDB01101.1 6877586 52217 3158 10789748 229568 4763576 3878773 976436 831937 608298 1605075 72021 442510 1107705 1464339 31250 2724553 72900 30689 +HMDB13189 2644620 727587 1661412 619181 136278 2755434 593863 837865 3526136 2003278 1608814 3446611 1941527 113937 3132404 2893445 2092753 1034666 841661 diff -r 2cdf7d5982c8 -r 38ccf6722d54 test-data/rangefilter_sampleMetadata.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rangefilter_sampleMetadata.tsv Mon Jan 15 14:06:39 2018 -0500 @@ -0,0 +1,20 @@ +sampleMetadata injectionOrder mode age bmi gender +HU_017 2 pos 41 23.03 M +HU_028 7 pos 41 23.92 F +HU_034 9 pos 52 23.37 M +HU_051 20 pos 24 23.23 F +HU_060 24 pos 55 28.72 F +HU_078 34 pos 46 25.18 M +HU_091 42 pos 61 26.12 M +HU_093 43 pos 53 21.71 M +HU_099 46 pos 23 21.3 M +HU_110 53 pos 50 20.9 F +HU_130 63 pos 33 26.06 M +HU_134 67 pos 48 22.89 M +HU_138 68 pos 42 21.88 M +HU_149 72 pos 35 19.49 F +HU_152 75 pos 26 17.58 F +HU_175 87 pos 35 21.26 F +HU_178 88 pos 60 32.87 F +HU_185 95 pos 42 21.09 F +HU_208 106 pos 27 18.61 F diff -r 2cdf7d5982c8 -r 38ccf6722d54 test-data/rangefilter_variableMetadata.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rangefilter_variableMetadata.tsv Mon Jan 15 14:06:39 2018 -0500 @@ -0,0 +1,6 @@ +variableMetadata name mz rt +HMDB00191 loquor 650 600 +HMDB00208 loquimini 873 476 +HMDB01032 loquimur 550 425 +HMDB01101.1 baz 200 225 +HMDB13189 baris 800 325 diff -r 2cdf7d5982c8 -r 38ccf6722d54 w4mclassfilter.xml --- a/w4mclassfilter.xml Thu Sep 07 17:41:09 2017 -0400 +++ b/w4mclassfilter.xml Mon Jan 15 14:06:39 2018 -0500 @@ -1,10 +1,11 @@ - + Filter W4M data by sample class - r-base + + r-base r-batch - w4mclassfilter + w4mclassfilter @@ -22,6 +23,7 @@ wildcards '$wildcards' classnameColumn '$classnameColumn' samplenameColumn '$samplenameColumn' + variable_range_filter '$variableRangeFilter' dataMatrix_out '$dataMatrix_out' sampleMetadata_out '$sampleMetadata_out' variableMetadata_out '$variableMetadata_out' @@ -56,8 +58,8 @@ + - @@ -66,6 +68,18 @@ + + + + + + + + + + + + @@ -78,6 +92,60 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -436,6 +504,10 @@ | '``filter-out``' - exclude only the named sample classes | +Variable-range filters (default = no filters) + | comma-separated names of variable-range filters (see 'Variable-range filters' below) + | + ------------ @@ -512,28 +584,58 @@ - '``^[A-Z][A-Z]*[0-9][0-9]$``' - NO MATCH - the name does not end with the pattern '``[A-Z][0-9][0-9]$``', i.e., it ends with four digits, not two. - '``^[A-Z][0-9]*$``' - NO MATCH - the pattern specifies that second character and all those that follow, if present, must be digits. +---------------------- +Variable-range filters +---------------------- ---------------- -Working example ---------------- +An array of range-specification strings may be supplied in the `variableRangeFilter` +argument. If supplied, only features having numerical values in the specified column +of `variableMetadata` that fall within the specified ranges will be retained +in the output. Each range is a string of three colon-separated values (e.g., "mz:200:800") in the +following order: + +- the **name of a column of `variableMetadata`** which must have numerical data (e.g., "mz"); +- the **minimum allowed value** in that column for the feature to be retained (e.g., 200); +- the **maximum allowed value** (e.g., 800). + +Note for the range specification strings: + +- **If the "maximum" is less than the "minimum", then the range is exclusive** (e.g., "mz:800:200" means retain only features whose mz is NOT in the range 200-800) +- **If the name supplied in the first field is 'FEATMAX',** then the string is defining the minimum (and possibly, though less useful, maximum) intensity for each feature in the dataMatrix. For example, "FEATMAX:1e6:" would specify that any feature would be excluded if no sample had an intensity for that feature greater than 1000000. + + - Note, however, that when the "maximum" is greater than the "minimum" for the FEATMAX range specification, then the specification is ignored. + +----------------------------------------------------------------------------- + +---------------- +WORKING EXAMPLES +---------------- .. class:: infomark -**Input files** +----------- +Input Files +----------- -+-------------------+--------------------------------------------------------------------------------------------------------------+ -| Input File | Download from URL | -+===================+==============================================================================================================+ -| Data matrix | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter/master/tests/testthat/input_dataMatrix.tsv | -+-------------------+--------------------------------------------------------------------------------------------------------------+ -| Sample metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter/master/tests/testthat/input_sampleMetadata.tsv | -+-------------------+--------------------------------------------------------------------------------------------------------------+ -| Variable metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter/master/tests/testthat/input_variableMetadata.tsv | -+-------------------+--------------------------------------------------------------------------------------------------------------+ ++------------------------------------------------------------------------------------------------------------------------+ +| Input File URL | ++========================================================================================================================+ +| https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/test-data/input_dataMatrix.tsv | ++------------------------------------------------------------------------------------------------------------------------+ +| https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/test-data/input_sampleMetadata.tsv | ++------------------------------------------------------------------------------------------------------------------------+ +| https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/test-data/input_variableMetadata.tsv | ++------------------------------------------------------------------------------------------------------------------------+ .. class:: infomark -**Other input parameters** +------------------------------- +Running Without Range-Filtering +------------------------------- + +This example retains only samples whose 'gender' attribute is 'M'. + +**Input parameters** +------------------------------------+-----------------+ | Input Parameter | Value | @@ -547,20 +649,54 @@ | Column that names the sample | sampleMetadata | +------------------------------------+-----------------+ +**Expected outputs** + ++-------------------+---------------------------------------------------------------------------------------------------------------------------+ +| Expected Output | Download from URL | ++===================+===========================================================================================================================+ +| Data matrix | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/test-data/expected_dataMatrix.tsv | ++-------------------+---------------------------------------------------------------------------------------------------------------------------+ +| Sample metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/test-data/expected_sampleMetadata.tsv | ++-------------------+---------------------------------------------------------------------------------------------------------------------------+ +| Variable metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/test-data/expected_variableMetadata.tsv | ++-------------------+---------------------------------------------------------------------------------------------------------------------------+ + .. class:: infomark +---------------------------- +Running With Range-Filtering +---------------------------- + +This example retains only features whose mz is greater than 200, whose rt is less than 800, and whose maximum intensity across all samples is 2,000,000. +This example retains all samples (except those having zero variance for all feature), although it would be possible to filter on samples as well. + +**Input parameters** + ++------------------------------------+-------------------------------+ +| Input Parameter | Value | ++====================================+===============================+ +| Names of sample classes | (Leave this field empty.) | ++------------------------------------+-------------------------------+ +| Include named classes | filter-out | ++------------------------------------+-------------------------------+ +| Column that names the sample-class | class | ++------------------------------------+-------------------------------+ +| Column that names the sample | sampleMetadata | ++------------------------------------+-------------------------------+ +| Variable range-filters | FEATMAX:2e6:,mz:200:,rt::800 | ++------------------------------------+-------------------------------+ + **Expected outputs** -+-------------------+-----------------------------------------------------------------------------------------------------------------+ -| Expected Output | Download from URL | -+===================+=================================================================================================================+ -| Data matrix | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter/master/tests/testthat/expected_dataMatrix.tsv | -+-------------------+-----------------------------------------------------------------------------------------------------------------+ -| Sample metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter/master/tests/testthat/expected_sampleMetadata.tsv | -+-------------------+-----------------------------------------------------------------------------------------------------------------+ -| Variable metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter/master/tests/testthat/expected_variableMetadata.tsv | -+-------------------+-----------------------------------------------------------------------------------------------------------------+ - ++-------------------+------------------------------------------------------------------------------------------------------------------------------+ +| Expected Output | Download from URL | ++===================+==============================================================================================================================+ +| Data matrix | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/test-data/rangefilter_dataMatrix.tsv | ++-------------------+------------------------------------------------------------------------------------------------------------------------------+ +| Sample metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/test-data/rangefilter_sampleMetadata.tsv | ++-------------------+------------------------------------------------------------------------------------------------------------------------------+ +| Variable metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/test-data/rangefilter_variableMetadata.tsv | ++-------------------+------------------------------------------------------------------------------------------------------------------------------+ ----------------------------------------------------------------------------- @@ -568,42 +704,54 @@ NEWS ---- -CHANGES IN VERSION 0.98.3 +Changes in version 0.98.6 ========================= -INTERNAL MODIFICATIONS +New features + +* Added support for filtering out features whose attributes fall outside specified ranges. + For more detail, see "Variable-range filters" above. + +Internal modifications + +* Now uses w4mclassfilter R package v0.98.6. +* Now sorts sample names and feature names in output files because some statistical tools expect the same order in `dataMatrix` row and column names as in the corresponding metadata files. + +Changes in version 0.98.3 +========================= + +Internal modifications * Improved input handling. * Now uses w4mclassfilter R package v0.98.3, although that version has no functional implications for this tool. * Improved reference-list. -CHANGES IN VERSION 0.98.2 +Changes in version 0.98.2 ========================= -NEW FEATURES +New features * Added support for R-flavored regular expression pattern-matching when selecting names of sample-classes. * Empty classes argument or zero-length class_column result in no samples filtered out. -INTERNAL MODIFICATIONS +Internal modifications * Support and tests for new features. - -CHANGES IN VERSION 0.98.1 +Changes in version 0.98.1 ========================= -NEW FEATURES - First release - Wrap the w4mclassfilter R package that implements filtering of W4M data matrix, variable metadata, and sample metadata by class of sample. -*dataMatrix* *is* modified by the tool, so it *does* appear as an output file -*sampleMetadata* *is* modified by the tool, so it *does* appear as an output file -*variableMetadata* *is* modified by the tool, so it *does* appear as an output file +New features -INTERNAL MODIFICATIONS +* *dataMatrix* *is* modified by the tool, so it *does* appear as an output file +* *sampleMetadata* *is* modified by the tool, so it *does* appear as an output file +* *variableMetadata* *is* modified by the tool, so it *does* appear as an output file -none +Internal modifications + +* N/A ]]> diff -r 2cdf7d5982c8 -r 38ccf6722d54 w4mclassfilter_wrapper.R --- a/w4mclassfilter_wrapper.R Thu Sep 07 17:41:09 2017 -0400 +++ b/w4mclassfilter_wrapper.R Mon Jan 15 14:06:39 2018 -0500 @@ -82,8 +82,8 @@ # other parameters +wildcards <- as.logical(argVc["wildcards"]) sampleclassNames <- as.character(argVc["sampleclassNames"]) -wildcards <- as.logical(argVc["wildcards"]) sampleclassNames <- strsplit(x = sampleclassNames, split = ",", fixed = TRUE)[[1]] if (wildcards) { sampleclassNames <- gsub("[.]", "[.]", sampleclassNames) @@ -93,22 +93,26 @@ classnameColumn <- as.character(argVc["classnameColumn"]) samplenameColumn <- as.character(argVc["samplenameColumn"]) +variable_range_filter <- as.character(argVc["variable_range_filter"]) +variable_range_filter <- strsplit(x = variable_range_filter, split = ",", fixed = TRUE)[[1]] + ##------------------------------ ## Computation ##------------------------------ result <- w4m_filter_by_sample_class( - dataMatrix_in = dataMatrix_in -, sampleMetadata_in = sampleMetadata_in -, variableMetadata_in = variableMetadata_in -, dataMatrix_out = dataMatrix_out -, sampleMetadata_out = sampleMetadata_out -, variableMetadata_out = variableMetadata_out -, classes = sampleclassNames -, include = inclusive -, class_column = classnameColumn -, samplename_column = samplenameColumn -, failure_action = my_print + dataMatrix_in = dataMatrix_in +, sampleMetadata_in = sampleMetadata_in +, variableMetadata_in = variableMetadata_in +, dataMatrix_out = dataMatrix_out +, sampleMetadata_out = sampleMetadata_out +, variableMetadata_out = variableMetadata_out +, classes = sampleclassNames +, include = inclusive +, class_column = classnameColumn +, samplename_column = samplenameColumn +, variable_range_filter = variable_range_filter +, failure_action = my_print ) my_print("\nResult of '", modNamC, "' Galaxy module call to 'w4mclassfilter::w4m_filter_by_sample_class' R function: ",