view macros.xml @ 0:cd2f3a280463 draft default tip

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/bioconductor-scp commit a0a1a3de5dd24b2aabe96ec3d6f89acdcf5e462b
author recetox
date Wed, 22 Jan 2025 07:44:00 +0000
parents
children
line wrap: on
line source

<macros>
    <token name="@TOOL_VERSION@">1.16.0</token>
    <xml name="creator">
        <creator>
            <person
                givenName="Kristina"
                familyName="Gomoryova"
                url="https://github.com/KristinaGomoryova"
                identifier="0000-0003-4407-3917" />
            <person
                givenName="Helge"
                familyName="Hecht"
                url="https://github.com/hechth"
                identifier="0000-0001-6744-996X" />
            <organization
                url="https://www.recetox.muni.cz/"
                email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
                name="RECETOX MUNI" />
        </creator>
    </xml>

    <xml name="aggregation_options">
        <option value="matrixStats::colMedians" selected="true">Aggregate using the median of each sample (colMedians)</option> 
        <option value="MsCoreUtils::medianPolish">Fit an additive model (two way decomposition) using Tukey's median polish procedure (medianPolish)</option>
        <option value="BiocGenerics::colMeans">Aggregate using the mean of each sample (colMeans)</option>
        <option value="BiocGenerics::colSums">Aggregate using the sum of each sample (colSums)</option>
        <option value="MsCoreUtils::robustSummary">Calculate a robust aggregation using MASS::rlm() (robustSummary)</option>
    </xml>

    <xml name="normalization_options">
        <option value="center.mean" selected="true">Center sample intensities by subtracting the respective column means (center.mean)</option> 
        <option value="sum">Divide each feature's intensity by the sum of the feature (sum)</option>
        <option value="max">Divide each feature's intensity by the maximum of the feature (max)</option>
        <option value="center.median">Center sample intensities by subtracting the respective column medians (center.median)</option>
        <option value="div.mean">Divide by the column means (div.mean)</option>
        <option value="div.median">Divide by the column medians (div.median)</option>
        <option value="diff.median">Center all samples so that they all match the grand median by subtracting the respective columns medians differences to the grand median (diff.median)</option>
        <option value="quantiles">Quantile normalization (quantiles)</option>
        <option value="vsn">Variance-stabilizing normalization (vsn)</option>
    </xml>

    <xml name="scp_param">
       <param name="input_data" type="data" format="tabular" label="Input evidence table" help= "Input file is the evidence.txt table from MaxQuant"/>
       <param name="input_annotations" type="data" format="tabular" label="Sample annotations table" help= "A data table specifying metadata; sample annotations."/>
       <param name="runcol" type="data_column" data_ref="input_data" use_header_names="true" label="Which column specifies the run identifier and batch name?" help="Column to specify both the run identifier and batch name, has to be present in both tables."/>
       <param name="remove_empty_columns" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Remove empty columns?" help="Whether the empty columns should be removed."/>
       <param name="generate_QC_plots" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Generate QC plots?" help="Whether to generate quality-control plots (distribution of the average SCR, distribution of median CV and median intensities)."/>

       <section name="filtering_data" title="Data Filtering" expanded="true">
            <param name="filter_reverse" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Filter reverse sequences?" help="Whether to filter the proteins labelled as 'reverse'."/>
            <param name="filter_contaminants" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Filter potential contaminants?" help="Whether to filter the proteins labelled as 'potential contaminant'."/>
            <param name="PIF_threshold" type="float" value="0.8" min="0" max="1" label="Parental ion fraction (PIF) threshold" help="The threshold for parental ion fraction (PIF)."/>
            <param name="minimum_features" type="integer" value="150" min="1" label="Minimum number of PSMs per assay required" help="What is the minimum number of peptide-to-spectrum matches per assay to keep the assay?"/>   
            <param name="count_cell_carrier" type="integer" value="200" min="0" label="Number of cells in the carrier channel" help="What is the number of cells in the carrier channel?"/> 
            <param name="single_cells" type="select" label="Single-cell channels present in the experiment" multiple="true" optional="false" min="1" help="What are the single-cell channels present in the experiment? At least one channel must be selected!">
                <options from_dataset="input_annotations">
                    <column name="name" index="3"/>
                    <column name="value" index="3"/>
                    <filter type="unique_value" name="unique" column="3"/>
                </options>
            </param>
            <param name="SCR_threshold" type="float" value="0.1" min="0" max="1" label="Mean SCR threshold" help="Mean sample-to-carrier (SCR) threshold. 0.1 corresponds to 10%."/> 
            <param name="qvalue_level" type="select" display="radio" label="Filter based on PSM, peptide or protein-level q-values?" help="Whether to perform the q-value computation and filtration on the PSM, peptide or protein level.">
                <option value="PSM">PSM</option>
                <option value="Modified.sequence">peptide</option>
                <option value="Leading.razor.protein" selected="true">protein</option> 
            </param>    
            <param name="qvalue_threshold" type="float" value="0.01" min="0" max="1" label="q-value threshold" help="Q-value threshold."/>    
            <param name="divide_reference" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Compute relative reporter ion intensities?" help="Whether to compute the relative reporter ion intensities by dividing the single-cell intensities by the reference channel. Specific for SCOPE2 data."/>                  
       </section>

        <section name="peptide_aggregation" title="Aggregation to peptides" expanded="true">
            <param name="aggregation_peptides" type="select" label="Which function to use for the aggregation?" help="How to aggregate PSMs to peptides?">
                <expand macro="aggregation_options"/>
            </param>    
            <param name="column_aggregation_peptides" type="data_column" data_ref="input_data" use_header_names="true" label="Which column should be used for the PSM to peptide aggregation?" help="Which column should be used for the PSM to peptide aggregation. Modified.sequence is recommended."/>
        </section>

        <section name="peptide_filtering" title="Peptide filtering" expanded="true">
            <param name="samples_to_keep" type="select" min="1" optional="false" label="Which samples to keep?" multiple="true" help="Which samples to keep? We recommend filtering out Carrier and Unused channels at this step.">
                <options from_dataset="input_annotations">
                    <column name="name" index="3"/>
                    <column name="value" index="3"/>
                    <filter type="unique_value" name="unique" column="3"/>
                </options>
            </param>
            <conditional name="filter_median_intensity">
                <param type="select" name="cut_median_intensity" label="Filter based on median relative intensity?" help="Whether to filter based on median intensity.">
                    <option value="no" selected="true">no</option> 
                    <option value="yes">yes</option>
                </param>     
                <when value="yes">
                    <param label="Median intensity threshold" name="median_intensity_threshold" type="float" min="0" value="0.4" help="Threshold for relative median intensity filtering."/>
                </when>
                <when value="no"/>
            </conditional>
            <conditional name="filter_median_CV">
                <param type="select" name="cut_median_CV" label="Filter based on median CV?" help="Whether to filter based on median CV.">
                    <option value="yes" selected="true">yes</option> 
                    <option value="no">no</option>
                </param>     
                <when value="yes">
                    <param label="How many peptides per protein are required to compute CV?" name="minimum_peptides_CV" type="integer" value="5" min="1" help="Minimum number of peptides per protein required for the CV computation."/>
                    <param label="Median CV threshold" name="median_CV_threshold" type="float" value="0.65" min="0" max="1" help="Threshold for median CV filtering."/>
                </when>
                <when value="no"/>
            </conditional>
            <param name="remove_blank" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Remove blank sample?" help="Whether to remove the blank sample. If it has been already removed, set to 'false'."/>    
        </section>

        <section name="peptide_processing" title="Processing peptide data" expanded="true">
            <conditional name="normalization_method">
                <param type="select" label="How to perform normalization?" name = "choose_normalization" display="radio" help="How to normalize peptide intensities?">
                    <option value="simple">Normalize columns or normalize rows (one only).</option> 
                    <option value="advanced" selected="true">Firstly normalize columns, then rows.</option>
                </param>  
                <when value="simple">
                    <param label="Normalization method" name="normalize_simple_method" type="select" help="Which normalization method to choose?">
                        <expand macro="normalization_options"/>
                    </param>
                </when>
                <when value="advanced">
                    <param label="Normalization method columns" name="normalize_columns" type="select" display="radio" help="Which normalization method to choose for columns?">
                        <option value="matrixStats::colMeans2">colMeans</option> 
                        <option value="matrixStats::colMedians" selected="true">colMedians</option> 
                    </param>
                    <param label="Normalization method rows" name="normalize_rows" type="select" display="radio" help="Which normalization method to choose for rows?">
                        <option value="matrixStats::rowMeans2" selected="true">rowMeans</option> 
                        <option value="matrixStats::rowMedians">rowMedians</option> 
                    </param>
                </when>               
            </conditional>
            <param label="Log transformation base" name="base" type="select" display="radio" help="Which base to use for the log transformation?">
                <option value="2" selected="true">2</option> 
                <option value="10">10</option> 
            </param>
            <conditional name="remove_missing_peptides">
                <param type="select" name="remove_peptides" label="Remove peptides with high missing rate?" help="Whether to remove the peptides with the high missing rate.">
                    <option value="yes" selected="true">yes</option> 
                    <option value="no">no</option>
                </param>     
                <when value="yes">
                    <param label="% of NA values filtering threshold" name="pNA_peptides" type="float" value="99" min="0" max="100" help="If peptide has this % of missing values, it will be removed."/>
                </when>
                <when value="no"/>
            </conditional>
        </section>

        <section name="protein_aggregation" title="Aggregation to proteins" expanded="true">
            <param name = "aggregation_proteins" type="select" label="Which function to use for the aggregation?" help="How to aggregate peptides to proteins?">
                <expand macro="aggregation_options"/>
            </param>    
            <param name="column_aggregation_proteins" type="data_column" data_ref="input_data" use_header_names="true" label="Which column should be used for the peptide to protein aggregation?" help="Which column should be used for the peptide to protein aggregation. Leading.razor.protein is recommended."/>
        </section>

        <section name="protein_processing" title="Processing protein data" expanded="true">
            <conditional name="normalization_method_protein">
                <param type="select" label="How to perform normalization?" name="choose_normalization_protein" display="radio" help="How to normalize protein intensities?">
                    <option value="simple_prot">Normalize columns or normalize rows (one only).</option> 
                    <option value="advanced_prot" selected="true">Firstly normalize columns, then rows.</option>
                </param>  
                <when value="simple_prot">
                    <param label="Normalization method" name="normalize_simple_method_prot" type="select" help="Which normalization method to choose?">
                        <expand macro="normalization_options"/>
                    </param>
                </when>
                <when value="advanced_prot">
                    <param label="Normalization method columns" name="normalize_columns_prot" display="radio" type="select" help="Which normalization method to choose for columns?">
                        <option value="matrixStats::colMeans2">colMeans</option> 
                        <option value="matrixStats::colMedians" selected="true">colMedians</option> 
                    </param>
                    <param label="Normalization method rows" name="normalize_rows_prot" display="radio" type="select" help="Which normalization method to choose for rows?">
                        <option value="matrixStats::rowMeans2" selected="true">rowMeans</option> 
                        <option value="matrixStats::rowMedians">rowMedians</option> 
                    </param>
                </when>               
            </conditional>
            <param label="Which k to use for the kNN imputation?" name="impute_k" type="integer" value="3" min="1" help="Number of k-nearest neighbours to consider for kNN imputation."/>
        </section>

        <section name="batch_correction" title="Batch correction" expanded="true">
            <conditional name="select_batch_correction">
                <param type="select" label="Which batch correction method to use?" name="batch_correction_method" display="radio" help="Which method to use for correcting the batch effect?">
                    <option value="combat" selected="true">ComBat()</option> 
                    <option value="removebatcheffect">removeBatchEffect()</option>
                </param>  
                <when value="combat">
                    <param name="batch_col" type="data_column" data_ref="input_annotations" use_header_names="true" label="Which column is the technical variable to be corrected?" help="Column to specify both the run identifier and batch name, has to be present in both tables."/>
                </when>
                <when value="removebatcheffect">
                    <param name="preserve_col" type="data_column" data_ref="input_annotations" use_header_names="true" label="Which column is the variable to be preserved?" help="Which column is the variable to be preserved."/>
                    <param name="batch_col" type="data_column" data_ref="input_annotations" use_header_names="true" label="Which column is the technical variable to be corrected?" help="Column to specify both the run identifier and batch name, has to be present in both tables."/>
                </when>
            </conditional>
        </section>

        <section name="dimensionality_reduction" title="Dimensionality reduction" expanded="true">
            <conditional name="PCA_computation">
                <param type="select" label="Run principal component analysis (PCA)?" name="run_PCA" display="radio" help="Run the PCA on imputed batch-corrected protein intensities?">
                    <option value="yes" selected="true">yes</option> 
                    <option value="no">no</option>
                </param>    
                <when value="yes">
                    <param label="Number of components" name="ncomponents_PCA" type="integer" value="5" min="2" help="Number of components in the PCA analysis."/>
                    <param name="pca_coloring" type="data_column" data_ref="input_annotations" use_header_names="true" label="What column to color the PCA according to?" help="Based on which column from the sampleAnnotation file should be the PCA colored."/>
                    <conditional name="UMAP_computation">
                        <param type="select" label="Run UMAP on PCA data?" name="run_UMAP" display="radio" help="Run the UMAP on PCA-reduced data?">
                            <option value="yes" selected="true">yes</option> 
                            <option value="no">no</option>
                        </param>    
                        <when value="yes">
                            <param label="Number of components" name="ncomponents_UMAP" type="integer" value="2" min="2" help="Number of components in the UMAP analysis."/>
                        </when>
                        <when value="no"/>
                    </conditional>
                </when>
                <when value="no"/>   
            </conditional>
        </section>

        <section name="data_export" title="Export data" expanded="true">
            <param name="export_tables" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Export intermediate results" help="Whether only the final result table (log2 transformed, normalized, imputed, batch-corrected data) should be exported or all intermediate results."/>
            <param name="export_RData" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Export scp object as .rds" help="Whether to export the scp object as rds file format."/>
            <param name="export_R_script" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Export the R script to reproduce the analysis"
                help="Check this box to export the script executed in the Galaxy tool as an R file to be able to reproduce the same processing offline. Not that in this case, the file paths need to be altered and all the dependencies have to be managed manually."/>
        </section>
    </xml>

    <xml name="citations">
        <citations>
            <citation type="doi">10.1002/cpz1.658</citation>
            <citation type="doi">10.1080/14789450.2021.1988571</citation>
            <citation type="doi">10.1021/acs.jproteome.3c00227</citation>
            <citation type="doi">10.1007/978-1-0716-3934-4_14</citation>
            <citation type="doi">10.1101/2023.12.14.571792</citation>
        </citations>        
    </xml>

</macros>