Mercurial > repos > recetox > bioconductor_scp
view bioconductor_scp.xml @ 0:cd2f3a280463 draft default tip
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/bioconductor-scp commit a0a1a3de5dd24b2aabe96ec3d6f89acdcf5e462b
author | recetox |
---|---|
date | Wed, 22 Jan 2025 07:44:00 +0000 |
parents | |
children |
line wrap: on
line source
<tool id="bioconductor_scp" name="bioconductor-scp" version="@TOOL_VERSION@+galaxy0" profile="23.0"> <description>single cell proteomics data analysis workflow</description> <macros> <import>macros.xml</import> <import>help.xml</import> </macros> <xrefs> <xref type="bio.tools">scp</xref> </xrefs> <requirements> <requirement type="package" version="@TOOL_VERSION@">bioconductor-scp</requirement> <requirement type="package" version="3.54.0">bioconductor-sva</requirement> <requirement type="package" version="1.80.0">bioconductor-impute</requirement> <requirement type="package" version="1.34.0">bioconductor-scater</requirement> <requirement type="package" version="1.16.0">bioconductor-qfeatures</requirement> <requirement type="package" version="3.62.1">bioconductor-limma</requirement> <requirement type="package" version="3.5.1">r-ggplot2</requirement> </requirements> <required_files> <include path="utils.r" /> </required_files> <expand macro="creator" /> <command detect_errors="exit_code"><![CDATA[ echo ${run_script} && Rscript -e 'source("${__tool_directory__}/utils.r")' -e 'source("${run_script}")' #if $data_export.export_R_script && cat ${run_script} >> $script #end if ]]></command> <configfiles> <configfile name="run_script"><![CDATA[ data_input <- read.delim("$input_data", sep="\t") metadata <- read.delim("$input_annotations", sep="\t") runCol <- colnames(data_input)[$runcol] fcol_aggregation_pep <- colnames(data_input)[${peptide_aggregation.column_aggregation_peptides}] fcol_aggregation_prot <- colnames(data_input)[${protein_aggregation.column_aggregation_proteins}] dir.create("plots") scp <- scp::readSCP( assayData = data_input, colData = metadata, runCol = runCol, removeEmptyCols = $remove_empty_columns ) number_of_assays <- length(scp) scp <- QFeatures::zeroIsNA(scp, i = 1:number_of_assays) #if $filtering_data.filter_reverse scp <- QFeatures::filterFeatures(scp, ~ Reverse != "+") #end if #if $filtering_data.filter_contaminants scp <- QFeatures::filterFeatures(scp, ~ Potential.contaminant != "+") #end if scp <- QFeatures::filterFeatures(scp, ~ !is.na(PIF) & PIF > ${filtering_data.PIF_threshold}) keepAssay <- QFeatures::dims(scp)[1, ] > ${filtering_data.minimum_features} scp <- scp[, , keepAssay] number_of_assays <- length(scp) single_cell_channels <- gsub(",", "|", "${filtering_data.single_cells}") scp <- scp::computeSCR(scp, i = 1:number_of_assays, colvar = "SampleType", carrierPattern = "Carrier", samplePattern = single_cell_channels, sampleFUN = "mean", rowDataName = "MeanSCR") #if $generate_QC_plots QC_plot_SCR <- QFeatures::rbindRowData(scp, i = 1:number_of_assays) |> data.frame() |> ggplot2::ggplot(ggplot2::aes(x = MeanSCR)) + ggplot2::geom_histogram() + ggplot2::geom_vline(xintercept = c(1/$count_cell_carrier, 0.1), lty = c(2, 1)) + ggplot2::scale_x_log10() ggplot2::ggsave(filename = file.path("plots", "QC_plot_SCR.pdf"), QC_plot_SCR) QC_plot_SCR_col <- QFeatures::rbindRowData(scp, i = 1:number_of_assays) |> data.frame() |> ggplot2::ggplot(ggplot2::aes(x = MeanSCR, color = runCol)) + ggplot2::geom_density() + ggplot2::geom_vline(xintercept = 0.02, lty = 2) + ggplot2::geom_vline(xintercept = 1, lty = 1)+ ggplot2::scale_x_log10() ggplot2::ggsave(filename = file.path("plots", "QC_plot_SCR_col.pdf"), QC_plot_SCR_col) #end if scp <- QFeatures::filterFeatures(scp, ~ !is.na(MeanSCR) & MeanSCR < ${filtering_data.SCR_threshold}) #if $filtering_data.qvalue_level == "PSM" scp <- scp::pep2qvalue(scp, i = 1:number_of_assays, PEP = "dart_PEP", rowDataName = "qvalue") scp <- QFeatures::filterFeatures(scp, ~ qvalue < ${filtering_data.qvalue_threshold}) #else scp <- scp::pep2qvalue(scp, i = 1:number_of_assays, PEP = "dart_PEP", groupBy = "$filtering_data.qvalue_level", rowDataName = "qvalue") scp <- QFeatures::filterFeatures(scp, ~ qvalue < ${filtering_data.qvalue_threshold}) #end if #if $filtering_data.divide_reference scp <- scp::divideByReference(scp, i = 1:number_of_assays, colvar = "SampleType", samplePattern = ".", refPattern = "Reference") #end if scp <- scp::aggregateFeaturesOverAssays( scp, i = 1:number_of_assays, fcol = fcol_aggregation_pep, name = paste0("peptide_", names(scp)), fun = ${peptide_aggregation.aggregation_peptides}, na.rm = TRUE ) scp <- QFeatures::joinAssays(scp, i = grep("peptide", names(scp)), name = "peptides") keep_samples <- unlist(strsplit("${peptide_filtering.samples_to_keep}", split=",")) scp <- scp[,SummarizedExperiment::colData(scp)[["SampleType"]] %in% keep_samples, ] #if $peptide_filtering.filter_median_intensity.cut_median_intensity == "yes" medians <- colMedians(SummarizedExperiment::assay(scp[["peptides"]]), na.rm = TRUE) SummarizedExperiment::colData(scp)[["MedianRI"]] <- medians #if $generate_QC_plots QC_medianRI <- SummarizedExperimentcolData(scp) |> data.frame() |> ggplot2::ggplot() + ggplot2::aes(x = MedianRI, y = SampleType, fill = SampleType) + ggplot2::geom_boxplot() + ggplot2::scale_x_log10() ggplot2::ggsave(filename = file.path("plots", "QC_medianRI.pdf"), QC_medianRI) #end if scp <- scp[, !is.na(SummarizedExperiment::colData(scp)[["MedianRI"]]) & SummarizedExperiment::colData(scp)[["MedianRI"]] < ${peptide_filtering.filter_median_intensity.median_intensity_threshold}, ] #end if #if $peptide_filtering.filter_median_CV.cut_median_CV == "yes" number_of_observations <- ${peptide_filtering.filter_median_CV.minimum_peptides_CV} CV_threshold <- ${peptide_filtering.filter_median_CV.median_CV_threshold} scp <- scp::medianCVperCell(scp, i = 1:number_of_assays, groupBy = "Leading.razor.protein", nobs = number_of_observations, norm = "div.median", na.rm = TRUE, colDataName = "MedianCV") #if $generate_QC_plots QC_medianCV <- MultiAssayExperiment::getWithColData(scp, "peptides") |> SummarizedExperiment::colData() |> data.frame() |> ggplot2::ggplot(ggplot2::aes(x = MedianCV, fill = SampleType)) + ggplot2::geom_boxplot() + ggplot2::geom_vline(xintercept = CV_threshold) ggplot2::ggsave(filename = file.path("plots", "QC_medianCV.pdf"), QC_medianCV) #end if scp <- scp[, !is.na(SummarizedExperiment::colData(scp)[["MedianCV"]]) & SummarizedExperiment::colData(scp)[["MedianCV"]] < CV_threshold, ] #end if #if $peptide_filtering.remove_blank scp <- scp[, SummarizedExperiment::colData(scp)[["SampleType"]] != "Blank", ] #end if #if $peptide_processing.normalization_method.choose_normalization == "simple" scp <- QFeatures::normalize( scp, i = "peptides", name = "peptides_norm", method = "${peptide_processing.normalization_method.normalize_simple_method}" ) #else scp <- QFeatures::sweep( scp, i = "peptides", MARGIN = 2, FUN = "/", STATS = ${peptide_processing.normalization_method.normalize_columns}(SummarizedExperiment::assay(scp[["peptides"]]), na.rm = TRUE), name = "peptides_norm_col" ) scp <- QFeatures::sweep( scp, i = "peptides_norm_col", MARGIN = 1, FUN = "/", STATS = ${peptide_processing.normalization_method.normalize_rows}(SummarizedExperiment::assay(scp[["peptides_norm_col"]]), na.rm = TRUE), name = "peptides_norm" ) #end if scp <- QFeatures::logTransform( scp, base = ${peptide_processing.base}, i = "peptides_norm", name = "peptides_log" ) #if $generate_QC_plots QC_boxplot_peptide <- create_boxplots(scp, "peptides", FALSE, "Peptides not normalized") QC_boxplot_peptide_norm <- create_boxplots(scp, "peptides_log", TRUE, "Peptides normalized") ggplot2::ggsave(filename = file.path("plots", "QC_boxplot_peptide.pdf"), QC_boxplot_peptide) ggplot2::ggsave(filename = file.path("plots", "QC_boxplot_peptide_norm.pdf"), QC_boxplot_peptide_norm) #end if #if $peptide_processing.remove_missing_peptides.remove_peptides == "yes" pNA <- ${peptide_processing.remove_missing_peptides.pNA_peptides} / 100 scp <- QFeatures::filterNA(scp, i = "peptides_log", pNA = pNA) #end if scp <- scp::aggregateFeaturesOverAssays( scp, i = "peptides_log", fcol = fcol_aggregation_prot, name = "proteins", fun = ${protein_aggregation.aggregation_proteins}, na.rm = TRUE ) #if $protein_processing.normalization_method_protein.choose_normalization_protein == "simple_prot" scp <- QFeatures::normalize( scp, i = "proteins", name = "proteins_norm", method = "${protein_processing.normalization_method_protein.normalize_simple_method_prot}" ) #else scp <- QFeatures::sweep( scp, i = "proteins", MARGIN = 2, FUN = "/", STATS = ${protein_processing.normalization_method_protein.normalize_columns_prot}(SummarizedExperiment::assay(scp[["proteins"]]), na.rm = TRUE), name = "proteins_norm_col" ) scp <- QFeatures::sweep( scp, i = "proteins_norm_col", MARGIN = 1, FUN = "/", STATS = ${protein_processing.normalization_method_protein.normalize_rows_prot}(SummarizedExperiment::assay(scp[["proteins_norm_col"]]), na.rm = TRUE), name = "proteins_norm" ) #end if #if $generate_QC_plots QC_boxplot_protein <- create_boxplots(scp, "proteins", TRUE, "Proteins not normalized") QC_boxplot_protein_norm <- create_boxplots(scp, "proteins_norm", TRUE, "Proteins normalized") ggplot2::ggsave(filename = file.path("plots", "QC_boxplot_protein.pdf"), QC_boxplot_protein) ggplot2::ggsave(filename = file.path("plots", "QC_boxplot_protein_norm.pdf"), QC_boxplot_protein_norm) pdf(file = file.path("plots", "QC_heatmap_proteins.pdf")) plot_heatmap(scp, "proteins_norm") dev.off() #end if scp <- QFeatures::impute( scp, i = "proteins_norm", name = "proteins_imptd", method = "knn", k = ${protein_processing.impute_k}, rowmax = 1, colmax= 1, maxp = Inf, rng.seed = 1234 ) batch_colname <- colnames(metadata)[${batch_correction.select_batch_correction.batch_col}] #if $batch_correction.select_batch_correction.batch_correction_method == "combat" sce <- MultiAssayExperiment::getWithColData(scp, "proteins_imptd") batch <- SummarizedExperiment::colData(scp)[[batch_colname]] model <- stats::model.matrix(~ SampleType, data = SummarizedExperiment::colData(sce)) SummarizedExperiment::assay(sce) <- sva::ComBat( dat = SummarizedExperiment::assay(sce), batch = batch, mod = model ) scp <- QFeatures::addAssay( scp, y = sce, name = "proteins_batchC" ) scp <- QFeatures::addAssayLinkOneToOne( scp, from = "proteins_imptd", to = "proteins_batchC" ) #else sce <- MultiAssayExperiment::getWithColData(scp, "proteins_imptd") preserve_colname <- colnames(metadata)[${batch_correction.select_batch_correction.preserve_col}] SummarizedExperiment::assay(sce) <- limma::removeBatchEffect( SummarizedExperiment::assay(sce), group = sce[[preserve_colname]], batch = sce[[batch_colname]] ) scp <- QFeatures::addAssay(scp, y = sce, name = "proteins_batchC") scp <- QFeatures::addAssayLinkOneToOne(scp, from = "proteins_imptd", to = "proteins_batchC") #end if #if $dimensionality_reduction.PCA_computation.run_PCA == "yes" PCA_color <- colnames(metadata)[$dimensionality_reduction.PCA_computation.pca_coloring] scp[["proteins_batchC"]] <- scater::runPCA( scp[["proteins_batchC"]], ncomponents = ${dimensionality_reduction.PCA_computation.ncomponents_PCA}, ntop = Inf, scale = TRUE, exprs_values = 1, name = "PCA" ) pca <- scater::plotReducedDim( scp[["proteins_batchC"]], dimred = "PCA", colour_by = PCA_color, point_alpha = 1 ) ggplot2::ggsave(filename = file.path("plots", "PCA.pdf"), pca) #if $dimensionality_reduction.PCA_computation.UMAP_computation.run_UMAP == "yes" scp[["proteins_batchC"]] <- scater::runUMAP( scp[["proteins_batchC"]], ncomponents = ${dimensionality_reduction.PCA_computation.UMAP_computation.ncomponents_UMAP}, ntop = Inf, scale = TRUE, exprs_values = 1, n_neighbors = 3, dimred = "PCA", name = "UMAP" ) umap <- scater::plotReducedDim( scp[["proteins_batchC"]], dimred = "UMAP", colour_by = "SampleType", point_alpha = 1 ) ggplot2::ggsave(filename = file.path("plots", "UMAP.pdf"), umap) #end if #end if assay_df <- as.data.frame(SummarizedExperiment::assay(scp, "proteins_batchC")) row_metadata <- as.data.frame(SummarizedExperiment::rowData(scp[["proteins_batchC"]])) export_data <- cbind(row_metadata, as.data.frame(assay_df)) write.table(export_data, file = '$Processed_data', sep = "\t", quote = F) #if $data_export.export_tables export_all_assays(scp) #end if #if $data_export.export_RData save(scp, file='$scp_object') #end if ]]></configfile> </configfiles> <inputs> <expand macro="scp_param"/> </inputs> <outputs> <data name="Processed_data" format="tabular" label="Batch-corrected protein levels"/> <collection name="intermediate_outputs" type="list" label="Intermediate outputs" format="tabular"> <discover_datasets pattern="__name_and_ext__" directory="outputs" /> <filter>data_export['export_tables']</filter> </collection> <data name="scp_object" format="rds" label="scp object as .rds"> <filter>data_export['export_RData']</filter> </data> <data name="script" format="txt" label="R script"> <filter>data_export['export_R_script']</filter> </data> <collection name="plots" type="list" label="Plots"> <discover_datasets pattern="__name_and_ext__" directory="plots" /> <filter>generate_QC_plots or dimensionality_reduction['PCA_computation']['run_PCA'] == 'yes' or dimensionality_reduction['PCA_computation']['run_PCA'] == 'yes'</filter> </collection> </outputs> <tests> <test expect_num_outputs='2'> <param name="input_data" value="evidence_subset.txt"/> <param name="input_annotations" value="sampleAnnotation.txt"/> <param name="runcol" value="19"/> <param name="single_cells" value="Macrophage,Monocyte"/> <param name="samples_to_keep" value="Macrophage,Monocyte,Blank"/> <param name="batch_col" value="2"/> <param name="column_aggregation_peptides" value="6"/> <param name="column_aggregation_proteins" value="17"/> <param name="pca_coloring" value="4"/> <output name="Processed_data"> <assert_contents> <has_n_lines n="90"/> <has_text text="E9PAV3"/> <has_size size="625000" delta="20"/> </assert_contents> </output> </test> <test expect_num_outputs='2'> <param name="input_data" value="evidence_subset.txt" /> <param name="input_annotations" value="sampleAnnotation.txt"/> <param name="runcol" value="19"/> <param name="single_cells" value="Macrophage,Monocyte"/> <param name="samples_to_keep" value="Macrophage,Monocyte,Blank"/> <param name="batch_col" value="2"/> <param name="column_aggregation_peptides" value="6"/> <param name="column_aggregation_proteins" value="17"/> <param name="pca_coloring" value="4"/> <output_collection name="plots" type="list"> <element name="PCA" file="PCA.pdf" ftype="pdf" compare="sim_size" delta="60"/> <element name="QC_boxplot_peptide" file="QC_boxplot_peptide.pdf" ftype="pdf" compare="sim_size" delta="40"/> <element name="QC_boxplot_peptide_norm" file="QC_boxplot_peptide_norm.pdf" ftype="pdf" compare="sim_size" delta="40"/> <element name="QC_boxplot_protein" file="QC_boxplot.pdf" ftype="pdf" compare="sim_size" delta="40"/> <element name="QC_boxplot_protein_norm" file="QC_boxplot_protein_norm.pdf" ftype="pdf" compare="sim_size" delta="40"/> <element name="QC_heatmap_proteins" file="QC_heatmap_proteins.pdf" ftype="pdf" compare="sim_size" delta="40"/> <element name="QC_medianCV" file="QC_medianCV.pdf" ftype="pdf" compare="sim_size" delta="40"/> <element name="QC_plot_SCR" file="QC_plot_SCR.pdf" ftype="pdf" compare="sim_size" delta="40"/> <element name="QC_plot_SCR_col" file="QC_plot_SCR_col.pdf" ftype="pdf" compare="sim_size" delta="40"/> <element name="UMAP" file="UMAP.pdf" ftype="pdf" compare="sim_size" delta="200"/> </output_collection> <output name="Processed_data"> <assert_contents> <has_size size="625000" delta="20"/> <has_n_lines n="90"/> <has_text text="E9PAV3"/> </assert_contents> </output> </test> <test expect_num_outputs='3'> <param name="input_data" value="evidence_subset.txt" /> <param name="input_annotations" value="sampleAnnotation.txt"/> <param name="runcol" value="19"/> <param name="single_cells" value="Macrophage,Monocyte"/> <param name="samples_to_keep" value="Macrophage,Monocyte,Blank"/> <param name="column_aggregation_peptides" value="6"/> <param name="column_aggregation_proteins" value="17"/> <param name="batch_col" value="2"/> <param name="export_tables" value="true"/> <param name="pca_coloring" value="4"/> <output_collection name="intermediate_outputs" type="list" count="6"/> <output name="Processed_data"> <assert_contents> <has_size size="625000" delta="20"/> <has_n_lines n="90"/> <has_text text="E9PAV3"/> </assert_contents> </output> </test> <test expect_num_outputs='4'> <param name="input_data" value="evidence_subset.txt" /> <param name="input_annotations" value="sampleAnnotation.txt"/> <param name="runcol" value="19"/> <param name="single_cells" value="Macrophage,Monocyte"/> <param name="samples_to_keep" value="Macrophage,Monocyte,Blank"/> <param name="batch_col" value="2"/> <param name="export_RData" value="TRUE"/> <param name="export_R_script" value="TRUE"/> <param name="column_aggregation_peptides" value="6"/> <param name="column_aggregation_proteins" value="17"/> <param name="pca_coloring" value="4"/> <output name="Processed_data"> <assert_contents> <has_size size="625000" delta="20"/> <has_n_lines n="90"/> <has_text text="E9PAV3"/> </assert_contents> </output> <output name="script"> <assert_contents> <has_n_lines n="271"/> <has_text text='ggplot2::ggsave(filename = file.path("plots", "PCA.pdf"), pca)'/> </assert_contents> </output> </test> </tests> <help><![CDATA[ @GENERAL_HELP@ ]]></help> <expand macro="citations" /> </tool>