# HG changeset patch # User malex # Date 1615241046 0 # Node ID 2e7d47c0b02794db35bf271ac9675be63e91d3be # Parent b54326490b4dd5b26db479bf6a30b49a39b5871c "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools" diff -r b54326490b4d -r 2e7d47c0b027 anova_fixed.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/anova_fixed.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,101 @@ + + - Perform a multi-way ANOVA with covariates and fixed effects. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 bland_altman_plot.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bland_altman_plot.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,129 @@ + + - Create pairwise BA plots for outlier detection. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 blank_feature_filtering_flags.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/blank_feature_filtering_flags.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,103 @@ + + - Calculate LOD and flag features in non-blank samples below threshold. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 coefficient_variation_flags.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/coefficient_variation_flags.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,85 @@ + + - Calculate the coefficient of variation and flag potential outliers. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 compare_flags.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/compare_flags.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,88 @@ + + within a flag file. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 compound_identification.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/compound_identification.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,105 @@ + + based on m/z ratio and retention time. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 data_normalization_and_rescaling.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_normalization_and_rescaling.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,100 @@ + + of data. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 distribution_features.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/distribution_features.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,77 @@ + + + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 distribution_samples.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/distribution_samples.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,85 @@ + + + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 generate_rank_wide.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/generate_rank_wide.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,55 @@ + + Generate a wide format file with ranked columns from an input wide file. + + macros.xml + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 hierarchical_clustering_heatmap.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hierarchical_clustering_heatmap.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,81 @@ + + - Calculate means per group and plot a heatmap. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 imputation.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/imputation.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,190 @@ + + of missing values using selected algorithm. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 kruskal_wallis.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kruskal_wallis.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,86 @@ + + on features (rows). + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 lasso_enet_var_select.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lasso_enet_var_select.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,110 @@ + + for feature selection. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 linear_discriminant_analysis.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linear_discriminant_analysis.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,128 @@ + + . + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 log_and_glog_transformation.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/log_and_glog_transformation.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,115 @@ + + + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,195 @@ + + + 21.3.4.2 + + + secimtools + + + + + @ARTICLE{Kirpich17secimtools, + author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre}, + title = {SECIMTools: A suite of Metabolomics Data Analysis Tools}, + journal = {BMC Bioinformatics}, + year = {in press} + } + + + + + **TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert* + + **WARNINGS:** + + (1) SampleIDs in the wide data that have no matching name in the design file, as well as sampleIDs in the design file that have no matching name in the data, will be excluded from the analysis. + (2) This script automatically removes spaces and special characters from strings. + (3) If a compound/feature name starts with a number it will prepend an '_'. + (4) Input names are case sensitive and must match exactly (e.g use 'Feature' not 'feature'). + + + **Wide Formatted Dataset** + + A wide formatted dataset that contains measurements for each sample: + + +---------+---------+---------+---------+-----+ + | Feature | sample1 | sample2 | sample3 | ... | + +=========+=========+=========+=========+=====+ + | one | 10 | 20 | 10 | ... | + +---------+---------+---------+---------+-----+ + | two | 5 | 22 | 30 | ... | + +---------+---------+---------+---------+-----+ + | three | 30 | 27 | 2 | ... | + +---------+---------+---------+---------+-----+ + | four | 32 | 17 | 8 | ... | + +---------+---------+---------+---------+-----+ + | ... | ... | ... | ... | ... | + +---------+---------+---------+---------+-----+ + + **NOTE:** The 'Feature' column defines the rows within a wide formatted dataset. + + + **Design File** + + A Design file relating samples to various groups/treatment: + + +----------+--------+ + | sampleID | group | + +==========+========+ + | sample1 | g1 | + +----------+--------+ + | sample2 | g1 | + +----------+--------+ + | sample3 | g1 | + +----------+--------+ + | sample4 | g2 | + +----------+--------+ + | sample5 | g2 | + +----------+--------+ + | sample6 | g2 | + +----------+--------+ + | ... | ... | + +----------+--------+ + + **NOTE:** You must have a column named **sampleID** and the values in this column must match the column names in the wide dataset. + + + **Design File** + + A Design file relating samples to various groups/treatment: + + +----------+--------+ + | sampleID | group | + +==========+========+ + | sample1 | g1 | + +----------+--------+ + | sample2 | g1 | + +----------+--------+ + | sample3 | g1 | + +----------+--------+ + | sample4 | g2 | + +----------+--------+ + | sample5 | g2 | + +----------+--------+ + | sample6 | g2 | + +----------+--------+ + | ... | ... | + +----------+--------+ + + **NOTE:** You must have a column named **sampleID** and the values in this column must match + the column names in the long dataset. + + + **Long Dataset:** + + A dataset in long/stacked format that contains measurements for each sample: + + +----------+----------+------------+ + | Feature | sampleID | Peak Height| + +==========+==========+============+ + | One | 1 | 10 | + +----------+----------+------------+ + | One | 2 | 5 | + +----------+----------+------------+ + | One | 3 | 30 | + +----------+----------+------------+ + | Two | 1 | 20 | + +----------+----------+------------+ + | Two | 2 | 22 | + +----------+----------+------------+ + | Two | 3 | 27 | + +----------+----------+------------+ + | ... | ... | ... | + +----------+----------+------------+ + + + + **Flag File:** + + A wide formated dataset that contains flags for each sample or feature: + + +----------+---------+---------+---------+-----+ + | Feature | flag_A | flag_B | flag_C | ... | + +==========+=========+=========+=========+=====+ + | one | 0 | 0 | 0 | ... | + +----------+---------+---------+---------+-----+ + | two | 0 | 1 | 1 | ... | + +----------+---------+---------+---------+-----+ + | three | 0 | 1 | 0 | ... | + +----------+---------+---------+---------+-----+ + | four | 1 | 0 | 0 | ... | + +----------+---------+---------+---------+-----+ + | ... | ... | ... | ... | ... | + +----------+---------+---------+---------+-----+ + + + + **M/Z RT File:** + + A wide formated dataset that contains M/Z and RT measurements for each sample: + + +----------+--------+----------------+ + | sampleID | M/Z | Retention Time | + +==========+========+================+ + | sample1 | 0.1556 | 0.253618769 | + +----------+--------+----------------+ + | sample2 | 0.1675 | 0.327658519 | + +----------+--------+----------------+ + | sample3 | 0.1341 | 0.156587769 | + +----------+--------+----------------+ + | sample4 | 0.2341 | 0.153658165 | + +----------+--------+----------------+ + | sample5 | 0.4557 | 0.315765787 | + +----------+--------+----------------+ + | sample6 | 0.1879 | 0.253655765 | + +----------+--------+----------------+ + | ... | ... | ... | + +----------+--------+----------------+ + + + + **Group/Treatment [Optional]** + + - Name of the column in your Design File that contains group classifications. + + + **Group/Treatment** + + - Name of the column in your Design File that contains group classifications. + + + **Unique Feature ID** + + - Name of the column in your Wide Dataset that has unique Feature IDs. + + + **Run Order ID** + + - The column name in your Design file that contains the order samples were run. + + + **Run Order ID [Optional]** + + - The column name in your Design file that contains the order samples were run. + + diff -r b54326490b4d -r 2e7d47c0b027 magnitude_difference_flags.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/magnitude_difference_flags.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,98 @@ + + - Count the number of digits before the decimal place. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 mahalanobis_distance.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mahalanobis_distance.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,136 @@ + + to compare groups + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 merge_flags.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge_flags.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,79 @@ + + with the same unique identifiers into a single file. + + macros.xml + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 modify_design_file.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify_design_file.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,77 @@ + + to remove specified group types or sampleIDs + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 modulated_modularity_clustering.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modulated_modularity_clustering.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,106 @@ + + with visual summaries. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 multiple_testing_adjustment.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/multiple_testing_adjustment.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,86 @@ + + of p-values. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 mzrt_match.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mzrt_match.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,160 @@ + + across 2 files. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 partial_least_squares.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/partial_least_squares.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,145 @@ + + + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 principal_component_analysis.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/principal_component_analysis.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,95 @@ + + for visual summaries of the components. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 random_forest.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/random_forest.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,103 @@ + + algorithm to select features. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 remove_selected_features_samples.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/remove_selected_features_samples.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,140 @@ + + from the data using a flag file. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 remove_user_specified_row_col.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/remove_user_specified_row_col.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,87 @@ + + from the data. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 retention_time_flags.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/retention_time_flags.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,110 @@ + + - Flag features with discrepancies in retention time. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 run_order_regression.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/run_order_regression.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,93 @@ + + using the order samples were run. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 scatter_plot_2D.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scatter_plot_2D.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,154 @@ + + - A standalone tool. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 scatter_plot_3D.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scatter_plot_3D.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,182 @@ + + - A standalone tool. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 standardized_euclidean_distance.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/standardized_euclidean_distance.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,116 @@ + + + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 subset_data.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/subset_data.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,82 @@ + + - Based on groups. + + os + pandas + interface + + + + + + + + + + + + + + macros.xml + + + + + + + + + + + + + +@TIP_AND_WARNING@ + +**Tool Description** + +The tool creates new wide format dataset and design dataset based on the existing wide and design datasets where only groups specified by the user are present. +The user chooses which group(s) to include in the new datasets. + +-------------------------------------------------------------------------------- + +**Input** + + - Two input datasets are required. + +@WIDE@ + +**NOTE:** The sample IDs must match the sample IDs in the Design File +(below). Extra columns will automatically be ignored. + +@METADATA@ + +@UNIQID@ + +**Group/Treatment [Optional]** + + - Name of the column in your Design File that contains group classifications. If none provided the drop will be performed by 'sampleID'. + +**Group(s)/Sample(s) to drop** + + - Name of the Group(s)/Sample(s), comma separeted, that will be removed from your wide datset. + +-------------------------------------------------------------------------------- + +**Output** + +This tool will output two TSV files: a TSV file containing the subset of the original wide format dataset and a TSV file containing the subset of the original design dataset. Both datasets will contain only the samples belonging to groups selected by the user. + + ]]> + + diff -r b54326490b4d -r 2e7d47c0b027 summarize_flags.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/summarize_flags.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,70 @@ + + + + macros.xml + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 svm_classifier.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/svm_classifier.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,198 @@ + + - Predict sample groups. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 threshold_based_flags.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/threshold_based_flags.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,78 @@ + + - Flag features based on a user-specified threshold. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 tool_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_conf.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,40 @@ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
diff -r b54326490b4d -r 2e7d47c0b027 ttest.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ttest.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,125 @@ + + on features. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 ttest_perm.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ttest_perm.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,92 @@ + + on features. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r b54326490b4d -r 2e7d47c0b027 ttest_single_group.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ttest_single_group.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,100 @@ + + for the specified mean. + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +